diff options
Diffstat (limited to 'sys/kern/kern_tc.c')
-rw-r--r-- | sys/kern/kern_tc.c | 531 |
1 files changed, 54 insertions, 477 deletions
diff --git a/sys/kern/kern_tc.c b/sys/kern/kern_tc.c index 6166e1c..23a6606 100644 --- a/sys/kern/kern_tc.c +++ b/sys/kern/kern_tc.c @@ -1,74 +1,24 @@ -/*- - * Copyright (c) 1997, 1998 Poul-Henning Kamp <phk@FreeBSD.org> - * Copyright (c) 1982, 1986, 1991, 1993 - * The Regents of the University of California. All rights reserved. - * (c) UNIX System Laboratories, Inc. - * All or some portions of this file are derived from material licensed - * to the University of California by American Telephone and Telegraph - * Co. or Unix System Laboratories, Inc. and are reproduced herein with - * the permission of UNIX System Laboratories, Inc. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 3. All advertising materials mentioning features or use of this software - * must display the following acknowledgement: - * This product includes software developed by the University of - * California, Berkeley and its contributors. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. +/* + * ---------------------------------------------------------------------------- + * "THE BEER-WARE LICENSE" (Revision 42): + * <phk@FreeBSD.ORG> wrote this file. As long as you retain this notice you + * can do whatever you want with this stuff. If we meet some day, and you think + * this stuff is worth it, you can buy me a beer in return. Poul-Henning Kamp + * ---------------------------------------------------------------------------- * - * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 * $FreeBSD$ */ #include "opt_ntp.h" #include <sys/param.h> -#include <sys/systm.h> -#include <sys/dkstat.h> -#include <sys/callout.h> -#include <sys/kernel.h> -#include <sys/proc.h> +#include <sys/timetc.h> #include <sys/malloc.h> -#include <sys/resourcevar.h> -#include <sys/signalvar.h> +#include <sys/kernel.h> +#include <sys/sysctl.h> +#include <sys/systm.h> #include <sys/timex.h> #include <sys/timepps.h> -#include <vm/vm.h> -#include <sys/lock.h> -#include <vm/pmap.h> -#include <vm/vm_map.h> -#include <sys/sysctl.h> - -#include <machine/cpu.h> -#include <machine/limits.h> - -#ifdef GPROF -#include <sys/gmon.h> -#endif - -#if defined(SMP) && defined(BETTER_CLOCK) -#include <machine/smp.h> -#endif /* * Number of timecounters used to implement stable storage @@ -80,37 +30,33 @@ static MALLOC_DEFINE(M_TIMECOUNTER, "timecounter", "Timecounter stable storage"); -static void initclocks __P((void *dummy)); -SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL) - -static void tco_forward __P((int force)); static void tco_setscales __P((struct timecounter *tc)); static __inline unsigned tco_delta __P((struct timecounter *tc)); -/* Some of these don't belong here, but it's easiest to concentrate them. */ -#if defined(SMP) && defined(BETTER_CLOCK) -long cp_time[CPUSTATES]; -#else -static long cp_time[CPUSTATES]; -#endif - -long tk_cancc; -long tk_nin; -long tk_nout; -long tk_rawcc; - time_t time_second; struct timeval boottime; SYSCTL_STRUCT(_kern, KERN_BOOTTIME, boottime, CTLFLAG_RD, &boottime, timeval, "System boottime"); -/* - * Which update policy to use. - * 0 - every tick, bad hardware may fail with "calcru negative..." - * 1 - more resistent to the above hardware, but less efficient. - */ -static int tco_method; +SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW, 0, ""); + +static unsigned nmicrotime; +static unsigned nnanotime; +static unsigned ngetmicrotime; +static unsigned ngetnanotime; +static unsigned nmicrouptime; +static unsigned nnanouptime; +static unsigned ngetmicrouptime; +static unsigned ngetnanouptime; +SYSCTL_INT(_kern_timecounter, OID_AUTO, nmicrotime, CTLFLAG_RD, &nmicrotime, 0, ""); +SYSCTL_INT(_kern_timecounter, OID_AUTO, nnanotime, CTLFLAG_RD, &nnanotime, 0, ""); +SYSCTL_INT(_kern_timecounter, OID_AUTO, nmicrouptime, CTLFLAG_RD, &nmicrouptime, 0, ""); +SYSCTL_INT(_kern_timecounter, OID_AUTO, nnanouptime, CTLFLAG_RD, &nnanouptime, 0, ""); +SYSCTL_INT(_kern_timecounter, OID_AUTO, ngetmicrotime, CTLFLAG_RD, &ngetmicrotime, 0, ""); +SYSCTL_INT(_kern_timecounter, OID_AUTO, ngetnanotime, CTLFLAG_RD, &ngetnanotime, 0, ""); +SYSCTL_INT(_kern_timecounter, OID_AUTO, ngetmicrouptime, CTLFLAG_RD, &ngetmicrouptime, 0, ""); +SYSCTL_INT(_kern_timecounter, OID_AUTO, ngetnanouptime, CTLFLAG_RD, &ngetnanouptime, 0, ""); /* * Implement a dummy timecounter which we can use until we get a real one @@ -122,6 +68,7 @@ static unsigned dummy_get_timecount(struct timecounter *tc) { static unsigned now; + return (++now); } @@ -135,355 +82,6 @@ static struct timecounter dummy_timecounter = { struct timecounter *timecounter = &dummy_timecounter; -/* - * Clock handling routines. - * - * This code is written to operate with two timers that run independently of - * each other. - * - * The main timer, running hz times per second, is used to trigger interval - * timers, timeouts and rescheduling as needed. - * - * The second timer handles kernel and user profiling, - * and does resource use estimation. If the second timer is programmable, - * it is randomized to avoid aliasing between the two clocks. For example, - * the randomization prevents an adversary from always giving up the cpu - * just before its quantum expires. Otherwise, it would never accumulate - * cpu ticks. The mean frequency of the second timer is stathz. - * - * If no second timer exists, stathz will be zero; in this case we drive - * profiling and statistics off the main clock. This WILL NOT be accurate; - * do not do it unless absolutely necessary. - * - * The statistics clock may (or may not) be run at a higher rate while - * profiling. This profile clock runs at profhz. We require that profhz - * be an integral multiple of stathz. - * - * If the statistics clock is running fast, it must be divided by the ratio - * profhz/stathz for statistics. (For profiling, every tick counts.) - * - * Time-of-day is maintained using a "timecounter", which may or may - * not be related to the hardware generating the above mentioned - * interrupts. - */ - -int stathz; -int profhz; -static int profprocs; -int ticks; -static int psdiv, pscnt; /* prof => stat divider */ -int psratio; /* ratio: prof / stat */ - -/* - * Initialize clock frequencies and start both clocks running. - */ -/* ARGSUSED*/ -static void -initclocks(dummy) - void *dummy; -{ - register int i; - - /* - * Set divisors to 1 (normal case) and let the machine-specific - * code do its bit. - */ - psdiv = pscnt = 1; - cpu_initclocks(); - - /* - * Compute profhz/stathz, and fix profhz if needed. - */ - i = stathz ? stathz : hz; - if (profhz == 0) - profhz = i; - psratio = profhz / i; -} - -/* - * The real-time timer, interrupting hz times per second. - */ -void -hardclock(frame) - register struct clockframe *frame; -{ - register struct proc *p; - - p = curproc; - if (p) { - register struct pstats *pstats; - - /* - * Run current process's virtual and profile time, as needed. - */ - pstats = p->p_stats; - if (CLKF_USERMODE(frame) && - timevalisset(&pstats->p_timer[ITIMER_VIRTUAL].it_value) && - itimerdecr(&pstats->p_timer[ITIMER_VIRTUAL], tick) == 0) - psignal(p, SIGVTALRM); - if (timevalisset(&pstats->p_timer[ITIMER_PROF].it_value) && - itimerdecr(&pstats->p_timer[ITIMER_PROF], tick) == 0) - psignal(p, SIGPROF); - } - -#if defined(SMP) && defined(BETTER_CLOCK) - forward_hardclock(pscnt); -#endif - - /* - * If no separate statistics clock is available, run it from here. - */ - if (stathz == 0) - statclock(frame); - - tco_forward(0); - ticks++; - - /* - * Process callouts at a very low cpu priority, so we don't keep the - * relatively high clock interrupt priority any longer than necessary. - */ - if (TAILQ_FIRST(&callwheel[ticks & callwheelmask]) != NULL) { - if (CLKF_BASEPRI(frame)) { - /* - * Save the overhead of a software interrupt; - * it will happen as soon as we return, so do it now. - */ - (void)splsoftclock(); - softclock(); - } else - setsoftclock(); - } else if (softticks + 1 == ticks) - ++softticks; -} - -/* - * Compute number of ticks in the specified amount of time. - */ -int -tvtohz(tv) - struct timeval *tv; -{ - register unsigned long ticks; - register long sec, usec; - - /* - * If the number of usecs in the whole seconds part of the time - * difference fits in a long, then the total number of usecs will - * fit in an unsigned long. Compute the total and convert it to - * ticks, rounding up and adding 1 to allow for the current tick - * to expire. Rounding also depends on unsigned long arithmetic - * to avoid overflow. - * - * Otherwise, if the number of ticks in the whole seconds part of - * the time difference fits in a long, then convert the parts to - * ticks separately and add, using similar rounding methods and - * overflow avoidance. This method would work in the previous - * case but it is slightly slower and assumes that hz is integral. - * - * Otherwise, round the time difference down to the maximum - * representable value. - * - * If ints have 32 bits, then the maximum value for any timeout in - * 10ms ticks is 248 days. - */ - sec = tv->tv_sec; - usec = tv->tv_usec; - if (usec < 0) { - sec--; - usec += 1000000; - } - if (sec < 0) { -#ifdef DIAGNOSTIC - if (usec > 0) { - sec++; - usec -= 1000000; - } - printf("tvotohz: negative time difference %ld sec %ld usec\n", - sec, usec); -#endif - ticks = 1; - } else if (sec <= LONG_MAX / 1000000) - ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1)) - / tick + 1; - else if (sec <= LONG_MAX / hz) - ticks = sec * hz - + ((unsigned long)usec + (tick - 1)) / tick + 1; - else - ticks = LONG_MAX; - if (ticks > INT_MAX) - ticks = INT_MAX; - return ((int)ticks); -} - -/* - * Start profiling on a process. - * - * Kernel profiling passes proc0 which never exits and hence - * keeps the profile clock running constantly. - */ -void -startprofclock(p) - register struct proc *p; -{ - int s; - - if ((p->p_flag & P_PROFIL) == 0) { - p->p_flag |= P_PROFIL; - if (++profprocs == 1 && stathz != 0) { - s = splstatclock(); - psdiv = pscnt = psratio; - setstatclockrate(profhz); - splx(s); - } - } -} - -/* - * Stop profiling on a process. - */ -void -stopprofclock(p) - register struct proc *p; -{ - int s; - - if (p->p_flag & P_PROFIL) { - p->p_flag &= ~P_PROFIL; - if (--profprocs == 0 && stathz != 0) { - s = splstatclock(); - psdiv = pscnt = 1; - setstatclockrate(stathz); - splx(s); - } - } -} - -/* - * Statistics clock. Grab profile sample, and if divider reaches 0, - * do process and kernel statistics. Most of the statistics are only - * used by user-level statistics programs. The main exceptions are - * p->p_uticks, p->p_sticks, p->p_iticks, and p->p_estcpu. - */ -void -statclock(frame) - register struct clockframe *frame; -{ -#ifdef GPROF - register struct gmonparam *g; - int i; -#endif - register struct proc *p; - struct pstats *pstats; - long rss; - struct rusage *ru; - struct vmspace *vm; - - if (curproc != NULL && CLKF_USERMODE(frame)) { - /* - * Came from user mode; CPU was in user state. - * If this process is being profiled, record the tick. - */ - p = curproc; - if (p->p_flag & P_PROFIL) - addupc_intr(p, CLKF_PC(frame), 1); -#if defined(SMP) && defined(BETTER_CLOCK) - if (stathz != 0) - forward_statclock(pscnt); -#endif - if (--pscnt > 0) - return; - /* - * Charge the time as appropriate. - */ - p->p_uticks++; - if (p->p_nice > NZERO) - cp_time[CP_NICE]++; - else - cp_time[CP_USER]++; - } else { -#ifdef GPROF - /* - * Kernel statistics are just like addupc_intr, only easier. - */ - g = &_gmonparam; - if (g->state == GMON_PROF_ON) { - i = CLKF_PC(frame) - g->lowpc; - if (i < g->textsize) { - i /= HISTFRACTION * sizeof(*g->kcount); - g->kcount[i]++; - } - } -#endif -#if defined(SMP) && defined(BETTER_CLOCK) - if (stathz != 0) - forward_statclock(pscnt); -#endif - if (--pscnt > 0) - return; - /* - * Came from kernel mode, so we were: - * - handling an interrupt, - * - doing syscall or trap work on behalf of the current - * user process, or - * - spinning in the idle loop. - * Whichever it is, charge the time as appropriate. - * Note that we charge interrupts to the current process, - * regardless of whether they are ``for'' that process, - * so that we know how much of its real time was spent - * in ``non-process'' (i.e., interrupt) work. - */ - p = curproc; - if (CLKF_INTR(frame)) { - if (p != NULL) - p->p_iticks++; - cp_time[CP_INTR]++; - } else if (p != NULL) { - p->p_sticks++; - cp_time[CP_SYS]++; - } else - cp_time[CP_IDLE]++; - } - pscnt = psdiv; - - if (p != NULL) { - schedclock(p); - - /* Update resource usage integrals and maximums. */ - if ((pstats = p->p_stats) != NULL && - (ru = &pstats->p_ru) != NULL && - (vm = p->p_vmspace) != NULL) { - ru->ru_ixrss += pgtok(vm->vm_tsize); - ru->ru_idrss += pgtok(vm->vm_dsize); - ru->ru_isrss += pgtok(vm->vm_ssize); - rss = pgtok(vmspace_resident_count(vm)); - if (ru->ru_maxrss < rss) - ru->ru_maxrss = rss; - } - } -} - -/* - * Return information about system clocks. - */ -static int -sysctl_kern_clockrate SYSCTL_HANDLER_ARGS -{ - struct clockinfo clkinfo; - /* - * Construct clockinfo structure. - */ - clkinfo.hz = hz; - clkinfo.tick = tick; - clkinfo.tickadj = tickadj; - clkinfo.profhz = profhz; - clkinfo.stathz = stathz ? stathz : hz; - return (sysctl_handle_opaque(oidp, &clkinfo, sizeof clkinfo, req)); -} - -SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD, - 0, 0, sysctl_kern_clockrate, "S,clockinfo",""); - static __inline unsigned tco_delta(struct timecounter *tc) { @@ -508,12 +106,9 @@ getmicrotime(struct timeval *tvp) { struct timecounter *tc; - if (!tco_method) { - tc = timecounter; - *tvp = tc->tc_microtime; - } else { - microtime(tvp); - } + ngetmicrotime++; + tc = timecounter; + *tvp = tc->tc_microtime; } void @@ -521,12 +116,9 @@ getnanotime(struct timespec *tsp) { struct timecounter *tc; - if (!tco_method) { - tc = timecounter; - *tsp = tc->tc_nanotime; - } else { - nanotime(tsp); - } + ngetnanotime++; + tc = timecounter; + *tsp = tc->tc_nanotime; } void @@ -534,6 +126,7 @@ microtime(struct timeval *tv) { struct timecounter *tc; + nmicrotime++; tc = timecounter; tv->tv_sec = tc->tc_offset_sec; tv->tv_usec = tc->tc_offset_micro; @@ -553,6 +146,7 @@ nanotime(struct timespec *ts) u_int64_t delta; struct timecounter *tc; + nnanotime++; tc = timecounter; ts->tv_sec = tc->tc_offset_sec; count = tco_delta(tc); @@ -574,13 +168,10 @@ getmicrouptime(struct timeval *tvp) { struct timecounter *tc; - if (!tco_method) { - tc = timecounter; - tvp->tv_sec = tc->tc_offset_sec; - tvp->tv_usec = tc->tc_offset_micro; - } else { - microuptime(tvp); - } + ngetmicrouptime++; + tc = timecounter; + tvp->tv_sec = tc->tc_offset_sec; + tvp->tv_usec = tc->tc_offset_micro; } void @@ -588,13 +179,10 @@ getnanouptime(struct timespec *tsp) { struct timecounter *tc; - if (!tco_method) { - tc = timecounter; - tsp->tv_sec = tc->tc_offset_sec; - tsp->tv_nsec = tc->tc_offset_nano >> 32; - } else { - nanouptime(tsp); - } + ngetnanouptime++; + tc = timecounter; + tsp->tv_sec = tc->tc_offset_sec; + tsp->tv_nsec = tc->tc_offset_nano >> 32; } void @@ -602,6 +190,7 @@ microuptime(struct timeval *tv) { struct timecounter *tc; + nmicrouptime++; tc = timecounter; tv->tv_sec = tc->tc_offset_sec; tv->tv_usec = tc->tc_offset_micro; @@ -619,6 +208,7 @@ nanouptime(struct timespec *ts) u_int64_t delta; struct timecounter *tc; + nnanouptime++; tc = timecounter; ts->tv_sec = tc->tc_offset_sec; count = tco_delta(tc); @@ -647,13 +237,13 @@ tco_setscales(struct timecounter *tc) } void -update_timecounter(struct timecounter *tc) +tc_update(struct timecounter *tc) { tco_setscales(tc); } void -init_timecounter(struct timecounter *tc) +tc_init(struct timecounter *tc) { struct timespec ts1; struct timecounter *t1, *t2, *t3; @@ -696,7 +286,7 @@ init_timecounter(struct timecounter *tc) } void -set_timecounter(struct timespec *ts) +tc_setclock(struct timespec *ts) { struct timespec ts2; @@ -708,7 +298,7 @@ set_timecounter(struct timespec *ts) boottime.tv_sec--; } /* fiddle all the little crinkly bits around the fiords... */ - tco_forward(1); + tc_windup(); } static void @@ -754,8 +344,8 @@ sync_other_counter(void) return (tc); } -static void -tco_forward(int force) +void +tc_windup(void) { struct timecounter *tc, *tco; struct timeval tvt; @@ -792,12 +382,8 @@ tco_forward(int force) tc->tc_offset_sec++; ntp_update_second(tc); /* XXX only needed if xntpd runs */ tco_setscales(tc); - force++; } - if (tco_method && !force) - return; - tc->tc_offset_micro = (tc->tc_offset_nano / 1000) >> 32; /* Figure out the wall-clock time */ @@ -815,15 +401,6 @@ tco_forward(int force) timecounter = tc; } -SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW, 0, ""); - -SYSCTL_INT(_kern_timecounter, OID_AUTO, method, CTLFLAG_RW, &tco_method, 0, - "This variable determines the method used for updating timecounters. " - "If the default algorithm (0) fails with \"calcru negative...\" messages " - "try the alternate algorithm (1) which handles bad hardware better." - -); - static int sysctl_kern_timecounter_hardware SYSCTL_HANDLER_ARGS { |