From 044e1e629691b102791a17fc6db270846b71b01c Mon Sep 17 00:00:00 2001 From: phk Date: Fri, 20 Feb 1998 16:36:17 +0000 Subject: Replace TOD clock code with more systematic approach. Highlights: * Simple model for underlying hardware. * Hardware basis for timekeeping can be changed on the fly. * Only one hardware clock responsible for TOD keeping. * Provides a real nanotime() function. * Time granularity: .232E-18 seconds. * Frequency granularity: .238E-12 s/s * Frequency adjustment is continuous in time. * Less overhead for frequency adjustment. * Improves xntpd performance. Reviewed by: bde, bde, bde --- sys/kern/kern_clock.c | 335 ++++++++++++++++++++++++++++++++++-------------- sys/kern/kern_ntptime.c | 51 ++++---- sys/kern/kern_random.c | 19 +-- sys/kern/kern_tc.c | 335 ++++++++++++++++++++++++++++++++++-------------- sys/kern/kern_time.c | 13 +- 5 files changed, 519 insertions(+), 234 deletions(-) (limited to 'sys/kern') diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c index 20b700b..30bb775 100644 --- a/sys/kern/kern_clock.c +++ b/sys/kern/kern_clock.c @@ -1,4 +1,5 @@ /*- + * Copyright (c) 1997, 1998 Poul-Henning Kamp * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. @@ -36,7 +37,7 @@ * SUCH DAMAGE. * * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 - * $Id: kern_clock.c,v 1.55 1998/02/06 12:13:22 eivind Exp $ + * $Id: kern_clock.c,v 1.56 1998/02/15 13:55:06 phk Exp $ */ #include @@ -55,7 +56,6 @@ #include #include -#define CLOCK_HAIR /* XXX */ #include #include @@ -70,6 +70,9 @@ static void initclocks __P((void *dummy)); SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL) +static void tco_forward __P((void)); +static void tco_setscales __P((struct timecounter *tc)); + /* Some of these don't belong here, but it's easiest to concentrate them. */ #if defined(SMP) && defined(BETTER_CLOCK) long cp_time[CPUSTATES]; @@ -91,55 +94,43 @@ long tk_nin; long tk_nout; long tk_rawcc; +struct timecounter *timecounter; + /* * Clock handling routines. * - * This code is written to operate with two timers that run independently of - * each other. The main clock, running hz times per second, is used to keep - * track of real time. The second timer handles kernel and user profiling, - * and does resource use estimation. If the second timer is programmable, - * it is randomized to avoid aliasing between the two clocks. For example, - * the randomization prevents an adversary from always giving up the cpu - * just before its quantum expires. Otherwise, it would never accumulate - * cpu ticks. The mean frequency of the second timer is stathz. + * This code is written to operate with two timers that run independently + * of each other. * - * If no second timer exists, stathz will be zero; in this case we drive - * profiling and statistics off the main clock. This WILL NOT be accurate; - * do not do it unless absolutely necessary. + * The main clock, running hz times per second, is used to trigger + * interval timers, timeouts and rescheduling as needed. * + * The second timer handles kernel and user profiling, and does resource + * use estimation. If the second timer is programmable, it is randomized + * to avoid aliasing between the two clocks. For example, the + * randomization prevents an adversary from always giving up the cpu + * just before its quantum expires. Otherwise, it would never accumulate + * cpu ticks. The mean frequency of the second timer is stathz. + * If no second timer exists, stathz will be zero; in this case we + * drive profiling and statistics off the main clock. This WILL NOT + * be accurate; do not do it unless absolutely necessary. * The statistics clock may (or may not) be run at a higher rate while - * profiling. This profile clock runs at profhz. We require that profhz - * be an integral multiple of stathz. + * profiling. This profile clock runs at profhz. We require that + * profhz be an integral multiple of stathz. If the statistics clock + * is running fast, it must be divided by the ratio profhz/stathz for + * statistics. (For profiling, every tick counts.) * - * If the statistics clock is running fast, it must be divided by the ratio - * profhz/stathz for statistics. (For profiling, every tick counts.) - */ - -/* - * TODO: - * allocate more timeout table slots when table overflows. - */ - -/* - * Bump a timeval by a small number of usec's. + * Time-of-day is maintained using a "timecounter", which may or may + * not be related to the hardware generating the above mentioned + * interrupts. */ -#define BUMPTIME(t, usec) { \ - register volatile struct timeval *tp = (t); \ - register long us; \ - \ - tp->tv_usec = us = tp->tv_usec + (usec); \ - if (us >= 1000000) { \ - tp->tv_usec = us - 1000000; \ - tp->tv_sec++; \ - } \ -} int stathz; int profhz; static int profprocs; int ticks; static int psdiv, pscnt; /* prof => stat divider */ -int psratio; /* ratio: prof / stat */ +int psratio; /* ratio: prof / stat */ volatile struct timeval time; volatile struct timeval mono_time; @@ -178,9 +169,6 @@ hardclock(frame) register struct clockframe *frame; { register struct proc *p; - int time_update; - struct timeval newtime = time; - long ltemp; p = curproc; if (p) { @@ -208,55 +196,9 @@ hardclock(frame) if (stathz == 0) statclock(frame); - /* - * Increment the time-of-day. - */ - ticks++; + tco_forward(); - if (timedelta == 0) { - time_update = CPU_THISTICKLEN(tick); - } else { - time_update = CPU_THISTICKLEN(tick) + tickdelta; - timedelta -= tickdelta; - } - BUMPTIME(&mono_time, time_update); - - /* - * Compute the phase adjustment. If the low-order bits - * (time_phase) of the update overflow, bump the high-order bits - * (time_update). - */ - time_phase += time_adj; - if (time_phase <= -FINEUSEC) { - ltemp = -time_phase >> SHIFT_SCALE; - time_phase += ltemp << SHIFT_SCALE; - time_update -= ltemp; - } - else if (time_phase >= FINEUSEC) { - ltemp = time_phase >> SHIFT_SCALE; - time_phase -= ltemp << SHIFT_SCALE; - time_update += ltemp; - } - - newtime.tv_usec += time_update; - /* - * On rollover of the second the phase adjustment to be used for - * the next second is calculated. Also, the maximum error is - * increased by the tolerance. If the PPS frequency discipline - * code is present, the phase is increased to compensate for the - * CPU clock oscillator frequency error. - * - * On a 32-bit machine and given parameters in the timex.h - * header file, the maximum phase adjustment is +-512 ms and - * maximum frequency offset is a tad less than) +-512 ppm. On a - * 64-bit machine, you shouldn't need to ask. - */ - if (newtime.tv_usec >= 1000000) { - newtime.tv_usec -= 1000000; - newtime.tv_sec++; - ntp_update_second(&newtime.tv_sec); - } - CPU_CLOCKUPDATE(&time, &newtime); + ticks++; if (TAILQ_FIRST(&callwheel[ticks & callwheelmask]) != NULL) setsoftclock(); @@ -315,6 +257,10 @@ hzto(tv) } if (sec < 0) { #ifdef DIAGNOSTIC + if (sec == -1 && usec > 0) { + sec++; + usec -= 1000000; + } printf("hzto: negative time difference %ld sec %ld usec\n", sec, usec); #endif @@ -529,11 +475,212 @@ SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD, 0, 0, sysctl_kern_clockrate, "S,clockinfo",""); void -nanotime(ts) - struct timespec *ts; +microtime(struct timeval *tv) +{ + struct timecounter *tc; + + tc = (struct timecounter *)timecounter; + tv->tv_sec = tc->offset_sec; + tv->tv_usec = tc->offset_micro; + tv->tv_usec += + ((u_int64_t)tc->get_timedelta(tc) * tc->scale_micro) >> 32; + if (tv->tv_usec >= 1000000) { + tv->tv_usec -= 1000000; + tv->tv_sec++; + } +} + +void +nanotime(struct timespec *tv) +{ + u_int32_t count; + u_int64_t delta; + struct timecounter *tc; + + tc = (struct timecounter *)timecounter; + tv->tv_sec = tc->offset_sec; + count = tc->get_timedelta(tc); + delta = tc->offset_nano; + delta += ((u_int64_t)count * tc->scale_nano_f); + delta += ((u_int64_t)count * tc->scale_nano_i) << 32; + delta >>= 32; + if (delta >= 1000000000) { + delta -= 1000000000; + tv->tv_sec++; + } + tv->tv_nsec = delta; +} + +static void +tco_setscales(struct timecounter *tc) +{ + u_int64_t scale; + + scale = 1000000000LL << 32; + if (tc->adjustment > 0) + scale += (tc->adjustment * 1000LL) << 10; + else + scale -= (-tc->adjustment * 1000LL) << 10; + /* scale += tc->frequency >> 1; */ /* XXX do we want to round ? */ + scale /= tc->frequency; + tc->scale_micro = scale / 1000; + tc->scale_nano_f = scale & 0xffffffff; + tc->scale_nano_i = scale >> 32; +} + +static u_int +delta_timecounter(struct timecounter *tc) +{ + return((tc->get_timecount() - tc->offset_count) & tc->counter_mask); +} + +void +init_timecounter(struct timecounter *tc) +{ + struct timespec ts0, ts1; + int i; + + if (!tc->get_timedelta) + tc->get_timedelta = delta_timecounter; + tc->adjustment = 0; + tco_setscales(tc); + tc->offset_count = tc->get_timecount(); + tc[0].tweak = &tc[0]; + tc[2] = tc[1] = tc[0]; + tc[1].other = &tc[2]; + tc[2].other = &tc[1]; + if (!timecounter) + timecounter = &tc[2]; + tc = &tc[1]; + + /* + * Figure out the cost of calling this timecounter. + * XXX: The 1:15 ratio is a guess at reality. + */ + nanotime(&ts0); + for (i = 0; i < 16; i ++) + tc->get_timecount(); + for (i = 0; i < 240; i ++) + tc->get_timedelta(tc); + nanotime(&ts1); + ts1.tv_sec -= ts0.tv_sec; + tc->cost = ts1.tv_sec * 1000000000 + ts1.tv_nsec - ts0.tv_nsec; + tc->cost >>= 8; + printf("Timecounter \"%s\" frequency %lu Hz cost %u ns\n", + tc->name, tc->frequency, tc->cost); + + /* XXX: For now always start using the counter. */ + tc->offset_count = tc->get_timecount(); + nanotime(&ts1); + tc->offset_nano = (u_int64_t)ts1.tv_nsec << 32; + tc->offset_micro = ts1.tv_nsec / 1000; + tc->offset_sec = ts1.tv_sec; + timecounter = tc; +} + +void +set_timecounter(struct timespec *ts) { - struct timeval tv; - microtime(&tv); - ts->tv_sec = tv.tv_sec; - ts->tv_nsec = tv.tv_usec * 1000; + struct timecounter *tc, *tco; + int s; + + s = splclock(); + tc=timecounter->other; + tco = tc->other; + *tc = *timecounter; + tc->other = tco; + tc->offset_sec = ts->tv_sec; + tc->offset_nano = (u_int64_t)ts->tv_nsec << 32; + tc->offset_micro = ts->tv_nsec / 1000; + tc->offset_count = tc->get_timecount(); + time.tv_sec = tc->offset_sec; + time.tv_usec = tc->offset_micro; + timecounter = tc; + splx(s); } + +static struct timecounter * +sync_other_counter(int flag) +{ + struct timecounter *tc, *tco; + u_int32_t delta; + + tc = timecounter->other; + tco = tc->other; + *tc = *timecounter; + tc->other = tco; + delta = tc->get_timedelta(tc); + tc->offset_count += delta; + tc->offset_count &= tc->counter_mask; + tc->offset_nano += (u_int64_t)delta * tc->scale_nano_f; + tc->offset_nano += (u_int64_t)delta * tc->scale_nano_i << 32; + if (flag) + return (tc); + if (tc->offset_nano > 1000000000ULL << 32) { + tc->offset_sec++; + tc->offset_nano -= 1000000000ULL << 32; + } + tc->offset_micro = (tc->offset_nano / 1000) >> 32; + return (tc); +} + +static void +tco_forward(void) +{ + struct timecounter *tc; + u_int32_t time_update; + + tc = sync_other_counter(1); + time_update = 0; + + if (timedelta) { + time_update += tickdelta; + timedelta -= tickdelta; + } + mono_time.tv_usec += time_update + tick; + if (mono_time.tv_usec >= 1000000) { + mono_time.tv_usec -= 1000000; + mono_time.tv_sec++; + } + time_update *= 1000; + tc->offset_nano += (u_int64_t)time_update << 32; + if (tc->offset_nano >= 1000000000ULL << 32) { + tc->offset_nano -= 1000000000ULL << 32; + tc->offset_sec++; + tc->frequency = tc->tweak->frequency; + tc->adjustment = tc->tweak->adjustment; /* XXX remove this ? */ + ntp_update_second(tc); /* XXX only needed if xntpd runs */ + tco_setscales(tc); + } + /* + * Find the usec from the nsec. This is just as fast (one + * multiplication) and prevents skew between the two due + * to rounding errors. (2^32/1000 = 4294967.296) + */ + tc->offset_micro = (tc->offset_nano / 1000) >> 32; + time.tv_usec = tc->offset_micro; + time.tv_sec = tc->offset_sec; + timecounter = tc; +} + +static int +sysctl_kern_timecounter_frequency SYSCTL_HANDLER_ARGS +{ + return (sysctl_handle_opaque(oidp, &timecounter->tweak->frequency, + sizeof(timecounter->tweak->frequency), req)); +} + +static int +sysctl_kern_timecounter_adjustment SYSCTL_HANDLER_ARGS +{ + return (sysctl_handle_opaque(oidp, &timecounter->tweak->adjustment, + sizeof(timecounter->tweak->adjustment), req)); +} + +SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW, 0, ""); + +SYSCTL_PROC(_kern_timecounter, OID_AUTO, frequency, CTLTYPE_INT|CTLFLAG_RW, + 0, sizeof(u_int) , sysctl_kern_timecounter_frequency, "I", ""); + +SYSCTL_PROC(_kern_timecounter, OID_AUTO, adjustment, CTLTYPE_INT|CTLFLAG_RW, + 0, sizeof(int) , sysctl_kern_timecounter_adjustment, "I", ""); diff --git a/sys/kern/kern_ntptime.c b/sys/kern/kern_ntptime.c index 102e650..636a5ce 100644 --- a/sys/kern/kern_ntptime.c +++ b/sys/kern/kern_ntptime.c @@ -99,6 +99,7 @@ static long time_tolerance = MAXFREQ; /* frequency tolerance (scaled ppm) */ static long time_precision = 1; /* clock precision (us) */ static long time_maxerror = MAXPHASE; /* maximum error (us) */ static long time_esterror = MAXPHASE; /* estimated error (us) */ +static int time_daemon = 0; /* No timedaemon active */ /* * The following variables establish the state of the PLL/FLL and the @@ -285,11 +286,28 @@ hardupdate(offset) time_freq = -time_tolerance; } +/* + * On rollover of the second the phase adjustment to be used for + * the next second is calculated. Also, the maximum error is + * increased by the tolerance. If the PPS frequency discipline + * code is present, the phase is increased to compensate for the + * CPU clock oscillator frequency error. + * + * On a 32-bit machine and given parameters in the timex.h + * header file, the maximum phase adjustment is +-512 ms and + * maximum frequency offset is a tad less than) +-512 ppm. On a + * 64-bit machine, you shouldn't need to ask. + */ void -ntp_update_second(long *newsec) +ntp_update_second(struct timecounter *tc) { + u_int32_t *newsec; long ltemp; + if (!time_daemon) + return; + + newsec = &tc->offset_sec; time_maxerror += time_tolerance >> SHIFT_USEC; /* @@ -308,7 +326,7 @@ ntp_update_second(long *newsec) if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE) ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE; time_offset += ltemp; - time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE); + time_adj = -ltemp << (SHIFT_SCALE - SHIFT_UPDATE); } else { ltemp = time_offset; if (!(time_status & STA_FLL)) @@ -316,7 +334,7 @@ ntp_update_second(long *newsec) if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE) ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE; time_offset -= ltemp; - time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE); + time_adj = ltemp << (SHIFT_SCALE - SHIFT_UPDATE); } /* @@ -339,29 +357,12 @@ ntp_update_second(long *newsec) ltemp = time_freq; #endif /* PPS_SYNC */ if (ltemp < 0) - time_adj -= -ltemp >> (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE); + time_adj -= -ltemp << (SHIFT_SCALE - SHIFT_USEC); else - time_adj += ltemp >> (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE); - -#if SHIFT_HZ == 7 - /* - * When the CPU clock oscillator frequency is not a - * power of two in Hz, the SHIFT_HZ is only an - * approximate scale factor. In the SunOS kernel, this - * results in a PLL gain factor of 1/1.28 = 0.78 what it - * should be. In the following code the overall gain is - * increased by a factor of 1.25, which results in a - * residual error less than 3 percent. - */ - /* Same thing applies for FreeBSD --GAW */ - if (hz == 100) { - if (time_adj < 0) - time_adj -= -time_adj >> 2; - else - time_adj += time_adj >> 2; - } -#endif /* SHIFT_HZ */ + time_adj += ltemp << (SHIFT_SCALE - SHIFT_USEC); + tc->adjustment = time_adj; + /* XXX - this is really bogus, but can't be fixed until xntpd's idea of the system clock is fixed to know how the user wants leap seconds handled; in the mean time, @@ -490,6 +491,8 @@ ntp_adjtime(struct proc *p, struct ntp_adjtime_args *uap) int s; int error; + time_daemon = 1; + error = copyin((caddr_t)uap->tp, (caddr_t)&ntv, sizeof(ntv)); if (error) return error; diff --git a/sys/kern/kern_random.c b/sys/kern/kern_random.c index f066949..7fd8364 100644 --- a/sys/kern/kern_random.c +++ b/sys/kern/kern_random.c @@ -1,7 +1,7 @@ /* * random_machdep.c -- A strong random number generator * - * $Id: random_machdep.c,v 1.19 1997/10/28 15:58:13 bde Exp $ + * $Id: random_machdep.c,v 1.20 1997/12/26 20:42:11 phk Exp $ * * Version 0.95, last modified 18-Oct-95 * @@ -190,21 +190,8 @@ add_timer_randomness(struct random_bucket *r, struct timer_rand_state *state, u_int nbits; u_int32_t time; -#if defined(I586_CPU) || defined(I686_CPU) - if (tsc_freq != 0) { - num ^= (u_int32_t) rdtsc() << 16; - r->entropy_count += 2; - } else { -#endif - disable_intr(); - outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH); - num ^= inb(TIMER_CNTR0) << 16; - num ^= inb(TIMER_CNTR0) << 24; - enable_intr(); - r->entropy_count += 2; -#if defined(I586_CPU) || defined(I686_CPU) - } -#endif + num ^= timecounter->get_timecount() << 16; + r->entropy_count += 2; time = ticks; diff --git a/sys/kern/kern_tc.c b/sys/kern/kern_tc.c index 20b700b..30bb775 100644 --- a/sys/kern/kern_tc.c +++ b/sys/kern/kern_tc.c @@ -1,4 +1,5 @@ /*- + * Copyright (c) 1997, 1998 Poul-Henning Kamp * Copyright (c) 1982, 1986, 1991, 1993 * The Regents of the University of California. All rights reserved. * (c) UNIX System Laboratories, Inc. @@ -36,7 +37,7 @@ * SUCH DAMAGE. * * @(#)kern_clock.c 8.5 (Berkeley) 1/21/94 - * $Id: kern_clock.c,v 1.55 1998/02/06 12:13:22 eivind Exp $ + * $Id: kern_clock.c,v 1.56 1998/02/15 13:55:06 phk Exp $ */ #include @@ -55,7 +56,6 @@ #include #include -#define CLOCK_HAIR /* XXX */ #include #include @@ -70,6 +70,9 @@ static void initclocks __P((void *dummy)); SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL) +static void tco_forward __P((void)); +static void tco_setscales __P((struct timecounter *tc)); + /* Some of these don't belong here, but it's easiest to concentrate them. */ #if defined(SMP) && defined(BETTER_CLOCK) long cp_time[CPUSTATES]; @@ -91,55 +94,43 @@ long tk_nin; long tk_nout; long tk_rawcc; +struct timecounter *timecounter; + /* * Clock handling routines. * - * This code is written to operate with two timers that run independently of - * each other. The main clock, running hz times per second, is used to keep - * track of real time. The second timer handles kernel and user profiling, - * and does resource use estimation. If the second timer is programmable, - * it is randomized to avoid aliasing between the two clocks. For example, - * the randomization prevents an adversary from always giving up the cpu - * just before its quantum expires. Otherwise, it would never accumulate - * cpu ticks. The mean frequency of the second timer is stathz. + * This code is written to operate with two timers that run independently + * of each other. * - * If no second timer exists, stathz will be zero; in this case we drive - * profiling and statistics off the main clock. This WILL NOT be accurate; - * do not do it unless absolutely necessary. + * The main clock, running hz times per second, is used to trigger + * interval timers, timeouts and rescheduling as needed. * + * The second timer handles kernel and user profiling, and does resource + * use estimation. If the second timer is programmable, it is randomized + * to avoid aliasing between the two clocks. For example, the + * randomization prevents an adversary from always giving up the cpu + * just before its quantum expires. Otherwise, it would never accumulate + * cpu ticks. The mean frequency of the second timer is stathz. + * If no second timer exists, stathz will be zero; in this case we + * drive profiling and statistics off the main clock. This WILL NOT + * be accurate; do not do it unless absolutely necessary. * The statistics clock may (or may not) be run at a higher rate while - * profiling. This profile clock runs at profhz. We require that profhz - * be an integral multiple of stathz. + * profiling. This profile clock runs at profhz. We require that + * profhz be an integral multiple of stathz. If the statistics clock + * is running fast, it must be divided by the ratio profhz/stathz for + * statistics. (For profiling, every tick counts.) * - * If the statistics clock is running fast, it must be divided by the ratio - * profhz/stathz for statistics. (For profiling, every tick counts.) - */ - -/* - * TODO: - * allocate more timeout table slots when table overflows. - */ - -/* - * Bump a timeval by a small number of usec's. + * Time-of-day is maintained using a "timecounter", which may or may + * not be related to the hardware generating the above mentioned + * interrupts. */ -#define BUMPTIME(t, usec) { \ - register volatile struct timeval *tp = (t); \ - register long us; \ - \ - tp->tv_usec = us = tp->tv_usec + (usec); \ - if (us >= 1000000) { \ - tp->tv_usec = us - 1000000; \ - tp->tv_sec++; \ - } \ -} int stathz; int profhz; static int profprocs; int ticks; static int psdiv, pscnt; /* prof => stat divider */ -int psratio; /* ratio: prof / stat */ +int psratio; /* ratio: prof / stat */ volatile struct timeval time; volatile struct timeval mono_time; @@ -178,9 +169,6 @@ hardclock(frame) register struct clockframe *frame; { register struct proc *p; - int time_update; - struct timeval newtime = time; - long ltemp; p = curproc; if (p) { @@ -208,55 +196,9 @@ hardclock(frame) if (stathz == 0) statclock(frame); - /* - * Increment the time-of-day. - */ - ticks++; + tco_forward(); - if (timedelta == 0) { - time_update = CPU_THISTICKLEN(tick); - } else { - time_update = CPU_THISTICKLEN(tick) + tickdelta; - timedelta -= tickdelta; - } - BUMPTIME(&mono_time, time_update); - - /* - * Compute the phase adjustment. If the low-order bits - * (time_phase) of the update overflow, bump the high-order bits - * (time_update). - */ - time_phase += time_adj; - if (time_phase <= -FINEUSEC) { - ltemp = -time_phase >> SHIFT_SCALE; - time_phase += ltemp << SHIFT_SCALE; - time_update -= ltemp; - } - else if (time_phase >= FINEUSEC) { - ltemp = time_phase >> SHIFT_SCALE; - time_phase -= ltemp << SHIFT_SCALE; - time_update += ltemp; - } - - newtime.tv_usec += time_update; - /* - * On rollover of the second the phase adjustment to be used for - * the next second is calculated. Also, the maximum error is - * increased by the tolerance. If the PPS frequency discipline - * code is present, the phase is increased to compensate for the - * CPU clock oscillator frequency error. - * - * On a 32-bit machine and given parameters in the timex.h - * header file, the maximum phase adjustment is +-512 ms and - * maximum frequency offset is a tad less than) +-512 ppm. On a - * 64-bit machine, you shouldn't need to ask. - */ - if (newtime.tv_usec >= 1000000) { - newtime.tv_usec -= 1000000; - newtime.tv_sec++; - ntp_update_second(&newtime.tv_sec); - } - CPU_CLOCKUPDATE(&time, &newtime); + ticks++; if (TAILQ_FIRST(&callwheel[ticks & callwheelmask]) != NULL) setsoftclock(); @@ -315,6 +257,10 @@ hzto(tv) } if (sec < 0) { #ifdef DIAGNOSTIC + if (sec == -1 && usec > 0) { + sec++; + usec -= 1000000; + } printf("hzto: negative time difference %ld sec %ld usec\n", sec, usec); #endif @@ -529,11 +475,212 @@ SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD, 0, 0, sysctl_kern_clockrate, "S,clockinfo",""); void -nanotime(ts) - struct timespec *ts; +microtime(struct timeval *tv) +{ + struct timecounter *tc; + + tc = (struct timecounter *)timecounter; + tv->tv_sec = tc->offset_sec; + tv->tv_usec = tc->offset_micro; + tv->tv_usec += + ((u_int64_t)tc->get_timedelta(tc) * tc->scale_micro) >> 32; + if (tv->tv_usec >= 1000000) { + tv->tv_usec -= 1000000; + tv->tv_sec++; + } +} + +void +nanotime(struct timespec *tv) +{ + u_int32_t count; + u_int64_t delta; + struct timecounter *tc; + + tc = (struct timecounter *)timecounter; + tv->tv_sec = tc->offset_sec; + count = tc->get_timedelta(tc); + delta = tc->offset_nano; + delta += ((u_int64_t)count * tc->scale_nano_f); + delta += ((u_int64_t)count * tc->scale_nano_i) << 32; + delta >>= 32; + if (delta >= 1000000000) { + delta -= 1000000000; + tv->tv_sec++; + } + tv->tv_nsec = delta; +} + +static void +tco_setscales(struct timecounter *tc) +{ + u_int64_t scale; + + scale = 1000000000LL << 32; + if (tc->adjustment > 0) + scale += (tc->adjustment * 1000LL) << 10; + else + scale -= (-tc->adjustment * 1000LL) << 10; + /* scale += tc->frequency >> 1; */ /* XXX do we want to round ? */ + scale /= tc->frequency; + tc->scale_micro = scale / 1000; + tc->scale_nano_f = scale & 0xffffffff; + tc->scale_nano_i = scale >> 32; +} + +static u_int +delta_timecounter(struct timecounter *tc) +{ + return((tc->get_timecount() - tc->offset_count) & tc->counter_mask); +} + +void +init_timecounter(struct timecounter *tc) +{ + struct timespec ts0, ts1; + int i; + + if (!tc->get_timedelta) + tc->get_timedelta = delta_timecounter; + tc->adjustment = 0; + tco_setscales(tc); + tc->offset_count = tc->get_timecount(); + tc[0].tweak = &tc[0]; + tc[2] = tc[1] = tc[0]; + tc[1].other = &tc[2]; + tc[2].other = &tc[1]; + if (!timecounter) + timecounter = &tc[2]; + tc = &tc[1]; + + /* + * Figure out the cost of calling this timecounter. + * XXX: The 1:15 ratio is a guess at reality. + */ + nanotime(&ts0); + for (i = 0; i < 16; i ++) + tc->get_timecount(); + for (i = 0; i < 240; i ++) + tc->get_timedelta(tc); + nanotime(&ts1); + ts1.tv_sec -= ts0.tv_sec; + tc->cost = ts1.tv_sec * 1000000000 + ts1.tv_nsec - ts0.tv_nsec; + tc->cost >>= 8; + printf("Timecounter \"%s\" frequency %lu Hz cost %u ns\n", + tc->name, tc->frequency, tc->cost); + + /* XXX: For now always start using the counter. */ + tc->offset_count = tc->get_timecount(); + nanotime(&ts1); + tc->offset_nano = (u_int64_t)ts1.tv_nsec << 32; + tc->offset_micro = ts1.tv_nsec / 1000; + tc->offset_sec = ts1.tv_sec; + timecounter = tc; +} + +void +set_timecounter(struct timespec *ts) { - struct timeval tv; - microtime(&tv); - ts->tv_sec = tv.tv_sec; - ts->tv_nsec = tv.tv_usec * 1000; + struct timecounter *tc, *tco; + int s; + + s = splclock(); + tc=timecounter->other; + tco = tc->other; + *tc = *timecounter; + tc->other = tco; + tc->offset_sec = ts->tv_sec; + tc->offset_nano = (u_int64_t)ts->tv_nsec << 32; + tc->offset_micro = ts->tv_nsec / 1000; + tc->offset_count = tc->get_timecount(); + time.tv_sec = tc->offset_sec; + time.tv_usec = tc->offset_micro; + timecounter = tc; + splx(s); } + +static struct timecounter * +sync_other_counter(int flag) +{ + struct timecounter *tc, *tco; + u_int32_t delta; + + tc = timecounter->other; + tco = tc->other; + *tc = *timecounter; + tc->other = tco; + delta = tc->get_timedelta(tc); + tc->offset_count += delta; + tc->offset_count &= tc->counter_mask; + tc->offset_nano += (u_int64_t)delta * tc->scale_nano_f; + tc->offset_nano += (u_int64_t)delta * tc->scale_nano_i << 32; + if (flag) + return (tc); + if (tc->offset_nano > 1000000000ULL << 32) { + tc->offset_sec++; + tc->offset_nano -= 1000000000ULL << 32; + } + tc->offset_micro = (tc->offset_nano / 1000) >> 32; + return (tc); +} + +static void +tco_forward(void) +{ + struct timecounter *tc; + u_int32_t time_update; + + tc = sync_other_counter(1); + time_update = 0; + + if (timedelta) { + time_update += tickdelta; + timedelta -= tickdelta; + } + mono_time.tv_usec += time_update + tick; + if (mono_time.tv_usec >= 1000000) { + mono_time.tv_usec -= 1000000; + mono_time.tv_sec++; + } + time_update *= 1000; + tc->offset_nano += (u_int64_t)time_update << 32; + if (tc->offset_nano >= 1000000000ULL << 32) { + tc->offset_nano -= 1000000000ULL << 32; + tc->offset_sec++; + tc->frequency = tc->tweak->frequency; + tc->adjustment = tc->tweak->adjustment; /* XXX remove this ? */ + ntp_update_second(tc); /* XXX only needed if xntpd runs */ + tco_setscales(tc); + } + /* + * Find the usec from the nsec. This is just as fast (one + * multiplication) and prevents skew between the two due + * to rounding errors. (2^32/1000 = 4294967.296) + */ + tc->offset_micro = (tc->offset_nano / 1000) >> 32; + time.tv_usec = tc->offset_micro; + time.tv_sec = tc->offset_sec; + timecounter = tc; +} + +static int +sysctl_kern_timecounter_frequency SYSCTL_HANDLER_ARGS +{ + return (sysctl_handle_opaque(oidp, &timecounter->tweak->frequency, + sizeof(timecounter->tweak->frequency), req)); +} + +static int +sysctl_kern_timecounter_adjustment SYSCTL_HANDLER_ARGS +{ + return (sysctl_handle_opaque(oidp, &timecounter->tweak->adjustment, + sizeof(timecounter->tweak->adjustment), req)); +} + +SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW, 0, ""); + +SYSCTL_PROC(_kern_timecounter, OID_AUTO, frequency, CTLTYPE_INT|CTLFLAG_RW, + 0, sizeof(u_int) , sysctl_kern_timecounter_frequency, "I", ""); + +SYSCTL_PROC(_kern_timecounter, OID_AUTO, adjustment, CTLTYPE_INT|CTLFLAG_RW, + 0, sizeof(int) , sysctl_kern_timecounter_adjustment, "I", ""); diff --git a/sys/kern/kern_time.c b/sys/kern/kern_time.c index 8060c15..fb78ffc 100644 --- a/sys/kern/kern_time.c +++ b/sys/kern/kern_time.c @@ -31,7 +31,7 @@ * SUCH DAMAGE. * * @(#)kern_time.c 8.1 (Berkeley) 6/10/93 - * $Id: kern_time.c,v 1.39 1997/11/06 19:29:16 phk Exp $ + * $Id: kern_time.c,v 1.40 1997/11/07 08:52:58 phk Exp $ */ #include @@ -78,6 +78,7 @@ settime(tv) struct timeval *tv; { struct timeval delta; + struct timespec ts; struct proc *p; int s; @@ -99,7 +100,9 @@ settime(tv) */ delta.tv_sec = tv->tv_sec - time.tv_sec; delta.tv_usec = tv->tv_usec - time.tv_usec; - time = *tv; + ts.tv_sec = tv->tv_sec; + ts.tv_nsec = tv->tv_usec * 1000; + set_timecounter(&ts); /* * XXX should arrange for microtime() to agree with *tv if * it is called now. As it is, it may add up to about @@ -138,13 +141,11 @@ clock_gettime(p, uap) struct proc *p; struct clock_gettime_args *uap; { - struct timeval atv; struct timespec ats; if (SCARG(uap, clock_id) != CLOCK_REALTIME) return (EINVAL); - microtime(&atv); - TIMEVAL_TO_TIMESPEC(&atv, &ats); + nanotime(&ats); return (copyout(&ats, SCARG(uap, tp), sizeof(ats))); } @@ -199,7 +200,7 @@ clock_getres(p, uap) error = 0; if (SCARG(uap, tp)) { ts.tv_sec = 0; - ts.tv_nsec = 1000000000 / hz; + ts.tv_nsec = 1000000000 / timecounter->frequency; error = copyout(&ts, SCARG(uap, tp), sizeof(ts)); } return (error); -- cgit v1.1