summaryrefslogtreecommitdiffstats
path: root/sys/kern
diff options
context:
space:
mode:
authorphk <phk@FreeBSD.org>1998-02-20 16:36:17 +0000
committerphk <phk@FreeBSD.org>1998-02-20 16:36:17 +0000
commit044e1e629691b102791a17fc6db270846b71b01c (patch)
tree73eeeac34c8a9254ecd3686eaa03c76379171a5c /sys/kern
parentfba9e5d6630c11aef2f66bef4dd7fbf45179be87 (diff)
downloadFreeBSD-src-044e1e629691b102791a17fc6db270846b71b01c.zip
FreeBSD-src-044e1e629691b102791a17fc6db270846b71b01c.tar.gz
Replace TOD clock code with more systematic approach.
Highlights: * Simple model for underlying hardware. * Hardware basis for timekeeping can be changed on the fly. * Only one hardware clock responsible for TOD keeping. * Provides a real nanotime() function. * Time granularity: .232E-18 seconds. * Frequency granularity: .238E-12 s/s * Frequency adjustment is continuous in time. * Less overhead for frequency adjustment. * Improves xntpd performance. Reviewed by: bde, bde, bde
Diffstat (limited to 'sys/kern')
-rw-r--r--sys/kern/kern_clock.c335
-rw-r--r--sys/kern/kern_ntptime.c51
-rw-r--r--sys/kern/kern_random.c19
-rw-r--r--sys/kern/kern_tc.c335
-rw-r--r--sys/kern/kern_time.c13
5 files changed, 519 insertions, 234 deletions
diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c
index 20b700b..30bb775 100644
--- a/sys/kern/kern_clock.c
+++ b/sys/kern/kern_clock.c
@@ -1,4 +1,5 @@
/*-
+ * Copyright (c) 1997, 1998 Poul-Henning Kamp <phk@FreeBSD.org>
* Copyright (c) 1982, 1986, 1991, 1993
* The Regents of the University of California. All rights reserved.
* (c) UNIX System Laboratories, Inc.
@@ -36,7 +37,7 @@
* SUCH DAMAGE.
*
* @(#)kern_clock.c 8.5 (Berkeley) 1/21/94
- * $Id: kern_clock.c,v 1.55 1998/02/06 12:13:22 eivind Exp $
+ * $Id: kern_clock.c,v 1.56 1998/02/15 13:55:06 phk Exp $
*/
#include <sys/param.h>
@@ -55,7 +56,6 @@
#include <sys/sysctl.h>
#include <machine/cpu.h>
-#define CLOCK_HAIR /* XXX */
#include <machine/clock.h>
#include <machine/limits.h>
@@ -70,6 +70,9 @@
static void initclocks __P((void *dummy));
SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL)
+static void tco_forward __P((void));
+static void tco_setscales __P((struct timecounter *tc));
+
/* Some of these don't belong here, but it's easiest to concentrate them. */
#if defined(SMP) && defined(BETTER_CLOCK)
long cp_time[CPUSTATES];
@@ -91,55 +94,43 @@ long tk_nin;
long tk_nout;
long tk_rawcc;
+struct timecounter *timecounter;
+
/*
* Clock handling routines.
*
- * This code is written to operate with two timers that run independently of
- * each other. The main clock, running hz times per second, is used to keep
- * track of real time. The second timer handles kernel and user profiling,
- * and does resource use estimation. If the second timer is programmable,
- * it is randomized to avoid aliasing between the two clocks. For example,
- * the randomization prevents an adversary from always giving up the cpu
- * just before its quantum expires. Otherwise, it would never accumulate
- * cpu ticks. The mean frequency of the second timer is stathz.
+ * This code is written to operate with two timers that run independently
+ * of each other.
*
- * If no second timer exists, stathz will be zero; in this case we drive
- * profiling and statistics off the main clock. This WILL NOT be accurate;
- * do not do it unless absolutely necessary.
+ * The main clock, running hz times per second, is used to trigger
+ * interval timers, timeouts and rescheduling as needed.
*
+ * The second timer handles kernel and user profiling, and does resource
+ * use estimation. If the second timer is programmable, it is randomized
+ * to avoid aliasing between the two clocks. For example, the
+ * randomization prevents an adversary from always giving up the cpu
+ * just before its quantum expires. Otherwise, it would never accumulate
+ * cpu ticks. The mean frequency of the second timer is stathz.
+ * If no second timer exists, stathz will be zero; in this case we
+ * drive profiling and statistics off the main clock. This WILL NOT
+ * be accurate; do not do it unless absolutely necessary.
* The statistics clock may (or may not) be run at a higher rate while
- * profiling. This profile clock runs at profhz. We require that profhz
- * be an integral multiple of stathz.
+ * profiling. This profile clock runs at profhz. We require that
+ * profhz be an integral multiple of stathz. If the statistics clock
+ * is running fast, it must be divided by the ratio profhz/stathz for
+ * statistics. (For profiling, every tick counts.)
*
- * If the statistics clock is running fast, it must be divided by the ratio
- * profhz/stathz for statistics. (For profiling, every tick counts.)
- */
-
-/*
- * TODO:
- * allocate more timeout table slots when table overflows.
- */
-
-/*
- * Bump a timeval by a small number of usec's.
+ * Time-of-day is maintained using a "timecounter", which may or may
+ * not be related to the hardware generating the above mentioned
+ * interrupts.
*/
-#define BUMPTIME(t, usec) { \
- register volatile struct timeval *tp = (t); \
- register long us; \
- \
- tp->tv_usec = us = tp->tv_usec + (usec); \
- if (us >= 1000000) { \
- tp->tv_usec = us - 1000000; \
- tp->tv_sec++; \
- } \
-}
int stathz;
int profhz;
static int profprocs;
int ticks;
static int psdiv, pscnt; /* prof => stat divider */
-int psratio; /* ratio: prof / stat */
+int psratio; /* ratio: prof / stat */
volatile struct timeval time;
volatile struct timeval mono_time;
@@ -178,9 +169,6 @@ hardclock(frame)
register struct clockframe *frame;
{
register struct proc *p;
- int time_update;
- struct timeval newtime = time;
- long ltemp;
p = curproc;
if (p) {
@@ -208,55 +196,9 @@ hardclock(frame)
if (stathz == 0)
statclock(frame);
- /*
- * Increment the time-of-day.
- */
- ticks++;
+ tco_forward();
- if (timedelta == 0) {
- time_update = CPU_THISTICKLEN(tick);
- } else {
- time_update = CPU_THISTICKLEN(tick) + tickdelta;
- timedelta -= tickdelta;
- }
- BUMPTIME(&mono_time, time_update);
-
- /*
- * Compute the phase adjustment. If the low-order bits
- * (time_phase) of the update overflow, bump the high-order bits
- * (time_update).
- */
- time_phase += time_adj;
- if (time_phase <= -FINEUSEC) {
- ltemp = -time_phase >> SHIFT_SCALE;
- time_phase += ltemp << SHIFT_SCALE;
- time_update -= ltemp;
- }
- else if (time_phase >= FINEUSEC) {
- ltemp = time_phase >> SHIFT_SCALE;
- time_phase -= ltemp << SHIFT_SCALE;
- time_update += ltemp;
- }
-
- newtime.tv_usec += time_update;
- /*
- * On rollover of the second the phase adjustment to be used for
- * the next second is calculated. Also, the maximum error is
- * increased by the tolerance. If the PPS frequency discipline
- * code is present, the phase is increased to compensate for the
- * CPU clock oscillator frequency error.
- *
- * On a 32-bit machine and given parameters in the timex.h
- * header file, the maximum phase adjustment is +-512 ms and
- * maximum frequency offset is a tad less than) +-512 ppm. On a
- * 64-bit machine, you shouldn't need to ask.
- */
- if (newtime.tv_usec >= 1000000) {
- newtime.tv_usec -= 1000000;
- newtime.tv_sec++;
- ntp_update_second(&newtime.tv_sec);
- }
- CPU_CLOCKUPDATE(&time, &newtime);
+ ticks++;
if (TAILQ_FIRST(&callwheel[ticks & callwheelmask]) != NULL)
setsoftclock();
@@ -315,6 +257,10 @@ hzto(tv)
}
if (sec < 0) {
#ifdef DIAGNOSTIC
+ if (sec == -1 && usec > 0) {
+ sec++;
+ usec -= 1000000;
+ }
printf("hzto: negative time difference %ld sec %ld usec\n",
sec, usec);
#endif
@@ -529,11 +475,212 @@ SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD,
0, 0, sysctl_kern_clockrate, "S,clockinfo","");
void
-nanotime(ts)
- struct timespec *ts;
+microtime(struct timeval *tv)
+{
+ struct timecounter *tc;
+
+ tc = (struct timecounter *)timecounter;
+ tv->tv_sec = tc->offset_sec;
+ tv->tv_usec = tc->offset_micro;
+ tv->tv_usec +=
+ ((u_int64_t)tc->get_timedelta(tc) * tc->scale_micro) >> 32;
+ if (tv->tv_usec >= 1000000) {
+ tv->tv_usec -= 1000000;
+ tv->tv_sec++;
+ }
+}
+
+void
+nanotime(struct timespec *tv)
+{
+ u_int32_t count;
+ u_int64_t delta;
+ struct timecounter *tc;
+
+ tc = (struct timecounter *)timecounter;
+ tv->tv_sec = tc->offset_sec;
+ count = tc->get_timedelta(tc);
+ delta = tc->offset_nano;
+ delta += ((u_int64_t)count * tc->scale_nano_f);
+ delta += ((u_int64_t)count * tc->scale_nano_i) << 32;
+ delta >>= 32;
+ if (delta >= 1000000000) {
+ delta -= 1000000000;
+ tv->tv_sec++;
+ }
+ tv->tv_nsec = delta;
+}
+
+static void
+tco_setscales(struct timecounter *tc)
+{
+ u_int64_t scale;
+
+ scale = 1000000000LL << 32;
+ if (tc->adjustment > 0)
+ scale += (tc->adjustment * 1000LL) << 10;
+ else
+ scale -= (-tc->adjustment * 1000LL) << 10;
+ /* scale += tc->frequency >> 1; */ /* XXX do we want to round ? */
+ scale /= tc->frequency;
+ tc->scale_micro = scale / 1000;
+ tc->scale_nano_f = scale & 0xffffffff;
+ tc->scale_nano_i = scale >> 32;
+}
+
+static u_int
+delta_timecounter(struct timecounter *tc)
+{
+ return((tc->get_timecount() - tc->offset_count) & tc->counter_mask);
+}
+
+void
+init_timecounter(struct timecounter *tc)
+{
+ struct timespec ts0, ts1;
+ int i;
+
+ if (!tc->get_timedelta)
+ tc->get_timedelta = delta_timecounter;
+ tc->adjustment = 0;
+ tco_setscales(tc);
+ tc->offset_count = tc->get_timecount();
+ tc[0].tweak = &tc[0];
+ tc[2] = tc[1] = tc[0];
+ tc[1].other = &tc[2];
+ tc[2].other = &tc[1];
+ if (!timecounter)
+ timecounter = &tc[2];
+ tc = &tc[1];
+
+ /*
+ * Figure out the cost of calling this timecounter.
+ * XXX: The 1:15 ratio is a guess at reality.
+ */
+ nanotime(&ts0);
+ for (i = 0; i < 16; i ++)
+ tc->get_timecount();
+ for (i = 0; i < 240; i ++)
+ tc->get_timedelta(tc);
+ nanotime(&ts1);
+ ts1.tv_sec -= ts0.tv_sec;
+ tc->cost = ts1.tv_sec * 1000000000 + ts1.tv_nsec - ts0.tv_nsec;
+ tc->cost >>= 8;
+ printf("Timecounter \"%s\" frequency %lu Hz cost %u ns\n",
+ tc->name, tc->frequency, tc->cost);
+
+ /* XXX: For now always start using the counter. */
+ tc->offset_count = tc->get_timecount();
+ nanotime(&ts1);
+ tc->offset_nano = (u_int64_t)ts1.tv_nsec << 32;
+ tc->offset_micro = ts1.tv_nsec / 1000;
+ tc->offset_sec = ts1.tv_sec;
+ timecounter = tc;
+}
+
+void
+set_timecounter(struct timespec *ts)
{
- struct timeval tv;
- microtime(&tv);
- ts->tv_sec = tv.tv_sec;
- ts->tv_nsec = tv.tv_usec * 1000;
+ struct timecounter *tc, *tco;
+ int s;
+
+ s = splclock();
+ tc=timecounter->other;
+ tco = tc->other;
+ *tc = *timecounter;
+ tc->other = tco;
+ tc->offset_sec = ts->tv_sec;
+ tc->offset_nano = (u_int64_t)ts->tv_nsec << 32;
+ tc->offset_micro = ts->tv_nsec / 1000;
+ tc->offset_count = tc->get_timecount();
+ time.tv_sec = tc->offset_sec;
+ time.tv_usec = tc->offset_micro;
+ timecounter = tc;
+ splx(s);
}
+
+static struct timecounter *
+sync_other_counter(int flag)
+{
+ struct timecounter *tc, *tco;
+ u_int32_t delta;
+
+ tc = timecounter->other;
+ tco = tc->other;
+ *tc = *timecounter;
+ tc->other = tco;
+ delta = tc->get_timedelta(tc);
+ tc->offset_count += delta;
+ tc->offset_count &= tc->counter_mask;
+ tc->offset_nano += (u_int64_t)delta * tc->scale_nano_f;
+ tc->offset_nano += (u_int64_t)delta * tc->scale_nano_i << 32;
+ if (flag)
+ return (tc);
+ if (tc->offset_nano > 1000000000ULL << 32) {
+ tc->offset_sec++;
+ tc->offset_nano -= 1000000000ULL << 32;
+ }
+ tc->offset_micro = (tc->offset_nano / 1000) >> 32;
+ return (tc);
+}
+
+static void
+tco_forward(void)
+{
+ struct timecounter *tc;
+ u_int32_t time_update;
+
+ tc = sync_other_counter(1);
+ time_update = 0;
+
+ if (timedelta) {
+ time_update += tickdelta;
+ timedelta -= tickdelta;
+ }
+ mono_time.tv_usec += time_update + tick;
+ if (mono_time.tv_usec >= 1000000) {
+ mono_time.tv_usec -= 1000000;
+ mono_time.tv_sec++;
+ }
+ time_update *= 1000;
+ tc->offset_nano += (u_int64_t)time_update << 32;
+ if (tc->offset_nano >= 1000000000ULL << 32) {
+ tc->offset_nano -= 1000000000ULL << 32;
+ tc->offset_sec++;
+ tc->frequency = tc->tweak->frequency;
+ tc->adjustment = tc->tweak->adjustment; /* XXX remove this ? */
+ ntp_update_second(tc); /* XXX only needed if xntpd runs */
+ tco_setscales(tc);
+ }
+ /*
+ * Find the usec from the nsec. This is just as fast (one
+ * multiplication) and prevents skew between the two due
+ * to rounding errors. (2^32/1000 = 4294967.296)
+ */
+ tc->offset_micro = (tc->offset_nano / 1000) >> 32;
+ time.tv_usec = tc->offset_micro;
+ time.tv_sec = tc->offset_sec;
+ timecounter = tc;
+}
+
+static int
+sysctl_kern_timecounter_frequency SYSCTL_HANDLER_ARGS
+{
+ return (sysctl_handle_opaque(oidp, &timecounter->tweak->frequency,
+ sizeof(timecounter->tweak->frequency), req));
+}
+
+static int
+sysctl_kern_timecounter_adjustment SYSCTL_HANDLER_ARGS
+{
+ return (sysctl_handle_opaque(oidp, &timecounter->tweak->adjustment,
+ sizeof(timecounter->tweak->adjustment), req));
+}
+
+SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW, 0, "");
+
+SYSCTL_PROC(_kern_timecounter, OID_AUTO, frequency, CTLTYPE_INT|CTLFLAG_RW,
+ 0, sizeof(u_int) , sysctl_kern_timecounter_frequency, "I", "");
+
+SYSCTL_PROC(_kern_timecounter, OID_AUTO, adjustment, CTLTYPE_INT|CTLFLAG_RW,
+ 0, sizeof(int) , sysctl_kern_timecounter_adjustment, "I", "");
diff --git a/sys/kern/kern_ntptime.c b/sys/kern/kern_ntptime.c
index 102e650..636a5ce 100644
--- a/sys/kern/kern_ntptime.c
+++ b/sys/kern/kern_ntptime.c
@@ -99,6 +99,7 @@ static long time_tolerance = MAXFREQ; /* frequency tolerance (scaled ppm) */
static long time_precision = 1; /* clock precision (us) */
static long time_maxerror = MAXPHASE; /* maximum error (us) */
static long time_esterror = MAXPHASE; /* estimated error (us) */
+static int time_daemon = 0; /* No timedaemon active */
/*
* The following variables establish the state of the PLL/FLL and the
@@ -285,11 +286,28 @@ hardupdate(offset)
time_freq = -time_tolerance;
}
+/*
+ * On rollover of the second the phase adjustment to be used for
+ * the next second is calculated. Also, the maximum error is
+ * increased by the tolerance. If the PPS frequency discipline
+ * code is present, the phase is increased to compensate for the
+ * CPU clock oscillator frequency error.
+ *
+ * On a 32-bit machine and given parameters in the timex.h
+ * header file, the maximum phase adjustment is +-512 ms and
+ * maximum frequency offset is a tad less than) +-512 ppm. On a
+ * 64-bit machine, you shouldn't need to ask.
+ */
void
-ntp_update_second(long *newsec)
+ntp_update_second(struct timecounter *tc)
{
+ u_int32_t *newsec;
long ltemp;
+ if (!time_daemon)
+ return;
+
+ newsec = &tc->offset_sec;
time_maxerror += time_tolerance >> SHIFT_USEC;
/*
@@ -308,7 +326,7 @@ ntp_update_second(long *newsec)
if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
time_offset += ltemp;
- time_adj = -ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
+ time_adj = -ltemp << (SHIFT_SCALE - SHIFT_UPDATE);
} else {
ltemp = time_offset;
if (!(time_status & STA_FLL))
@@ -316,7 +334,7 @@ ntp_update_second(long *newsec)
if (ltemp > (MAXPHASE / MINSEC) << SHIFT_UPDATE)
ltemp = (MAXPHASE / MINSEC) << SHIFT_UPDATE;
time_offset -= ltemp;
- time_adj = ltemp << (SHIFT_SCALE - SHIFT_HZ - SHIFT_UPDATE);
+ time_adj = ltemp << (SHIFT_SCALE - SHIFT_UPDATE);
}
/*
@@ -339,29 +357,12 @@ ntp_update_second(long *newsec)
ltemp = time_freq;
#endif /* PPS_SYNC */
if (ltemp < 0)
- time_adj -= -ltemp >> (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
+ time_adj -= -ltemp << (SHIFT_SCALE - SHIFT_USEC);
else
- time_adj += ltemp >> (SHIFT_USEC + SHIFT_HZ - SHIFT_SCALE);
-
-#if SHIFT_HZ == 7
- /*
- * When the CPU clock oscillator frequency is not a
- * power of two in Hz, the SHIFT_HZ is only an
- * approximate scale factor. In the SunOS kernel, this
- * results in a PLL gain factor of 1/1.28 = 0.78 what it
- * should be. In the following code the overall gain is
- * increased by a factor of 1.25, which results in a
- * residual error less than 3 percent.
- */
- /* Same thing applies for FreeBSD --GAW */
- if (hz == 100) {
- if (time_adj < 0)
- time_adj -= -time_adj >> 2;
- else
- time_adj += time_adj >> 2;
- }
-#endif /* SHIFT_HZ */
+ time_adj += ltemp << (SHIFT_SCALE - SHIFT_USEC);
+ tc->adjustment = time_adj;
+
/* XXX - this is really bogus, but can't be fixed until
xntpd's idea of the system clock is fixed to know how
the user wants leap seconds handled; in the mean time,
@@ -490,6 +491,8 @@ ntp_adjtime(struct proc *p, struct ntp_adjtime_args *uap)
int s;
int error;
+ time_daemon = 1;
+
error = copyin((caddr_t)uap->tp, (caddr_t)&ntv, sizeof(ntv));
if (error)
return error;
diff --git a/sys/kern/kern_random.c b/sys/kern/kern_random.c
index f066949..7fd8364 100644
--- a/sys/kern/kern_random.c
+++ b/sys/kern/kern_random.c
@@ -1,7 +1,7 @@
/*
* random_machdep.c -- A strong random number generator
*
- * $Id: random_machdep.c,v 1.19 1997/10/28 15:58:13 bde Exp $
+ * $Id: random_machdep.c,v 1.20 1997/12/26 20:42:11 phk Exp $
*
* Version 0.95, last modified 18-Oct-95
*
@@ -190,21 +190,8 @@ add_timer_randomness(struct random_bucket *r, struct timer_rand_state *state,
u_int nbits;
u_int32_t time;
-#if defined(I586_CPU) || defined(I686_CPU)
- if (tsc_freq != 0) {
- num ^= (u_int32_t) rdtsc() << 16;
- r->entropy_count += 2;
- } else {
-#endif
- disable_intr();
- outb(TIMER_MODE, TIMER_SEL0 | TIMER_LATCH);
- num ^= inb(TIMER_CNTR0) << 16;
- num ^= inb(TIMER_CNTR0) << 24;
- enable_intr();
- r->entropy_count += 2;
-#if defined(I586_CPU) || defined(I686_CPU)
- }
-#endif
+ num ^= timecounter->get_timecount() << 16;
+ r->entropy_count += 2;
time = ticks;
diff --git a/sys/kern/kern_tc.c b/sys/kern/kern_tc.c
index 20b700b..30bb775 100644
--- a/sys/kern/kern_tc.c
+++ b/sys/kern/kern_tc.c
@@ -1,4 +1,5 @@
/*-
+ * Copyright (c) 1997, 1998 Poul-Henning Kamp <phk@FreeBSD.org>
* Copyright (c) 1982, 1986, 1991, 1993
* The Regents of the University of California. All rights reserved.
* (c) UNIX System Laboratories, Inc.
@@ -36,7 +37,7 @@
* SUCH DAMAGE.
*
* @(#)kern_clock.c 8.5 (Berkeley) 1/21/94
- * $Id: kern_clock.c,v 1.55 1998/02/06 12:13:22 eivind Exp $
+ * $Id: kern_clock.c,v 1.56 1998/02/15 13:55:06 phk Exp $
*/
#include <sys/param.h>
@@ -55,7 +56,6 @@
#include <sys/sysctl.h>
#include <machine/cpu.h>
-#define CLOCK_HAIR /* XXX */
#include <machine/clock.h>
#include <machine/limits.h>
@@ -70,6 +70,9 @@
static void initclocks __P((void *dummy));
SYSINIT(clocks, SI_SUB_CLOCKS, SI_ORDER_FIRST, initclocks, NULL)
+static void tco_forward __P((void));
+static void tco_setscales __P((struct timecounter *tc));
+
/* Some of these don't belong here, but it's easiest to concentrate them. */
#if defined(SMP) && defined(BETTER_CLOCK)
long cp_time[CPUSTATES];
@@ -91,55 +94,43 @@ long tk_nin;
long tk_nout;
long tk_rawcc;
+struct timecounter *timecounter;
+
/*
* Clock handling routines.
*
- * This code is written to operate with two timers that run independently of
- * each other. The main clock, running hz times per second, is used to keep
- * track of real time. The second timer handles kernel and user profiling,
- * and does resource use estimation. If the second timer is programmable,
- * it is randomized to avoid aliasing between the two clocks. For example,
- * the randomization prevents an adversary from always giving up the cpu
- * just before its quantum expires. Otherwise, it would never accumulate
- * cpu ticks. The mean frequency of the second timer is stathz.
+ * This code is written to operate with two timers that run independently
+ * of each other.
*
- * If no second timer exists, stathz will be zero; in this case we drive
- * profiling and statistics off the main clock. This WILL NOT be accurate;
- * do not do it unless absolutely necessary.
+ * The main clock, running hz times per second, is used to trigger
+ * interval timers, timeouts and rescheduling as needed.
*
+ * The second timer handles kernel and user profiling, and does resource
+ * use estimation. If the second timer is programmable, it is randomized
+ * to avoid aliasing between the two clocks. For example, the
+ * randomization prevents an adversary from always giving up the cpu
+ * just before its quantum expires. Otherwise, it would never accumulate
+ * cpu ticks. The mean frequency of the second timer is stathz.
+ * If no second timer exists, stathz will be zero; in this case we
+ * drive profiling and statistics off the main clock. This WILL NOT
+ * be accurate; do not do it unless absolutely necessary.
* The statistics clock may (or may not) be run at a higher rate while
- * profiling. This profile clock runs at profhz. We require that profhz
- * be an integral multiple of stathz.
+ * profiling. This profile clock runs at profhz. We require that
+ * profhz be an integral multiple of stathz. If the statistics clock
+ * is running fast, it must be divided by the ratio profhz/stathz for
+ * statistics. (For profiling, every tick counts.)
*
- * If the statistics clock is running fast, it must be divided by the ratio
- * profhz/stathz for statistics. (For profiling, every tick counts.)
- */
-
-/*
- * TODO:
- * allocate more timeout table slots when table overflows.
- */
-
-/*
- * Bump a timeval by a small number of usec's.
+ * Time-of-day is maintained using a "timecounter", which may or may
+ * not be related to the hardware generating the above mentioned
+ * interrupts.
*/
-#define BUMPTIME(t, usec) { \
- register volatile struct timeval *tp = (t); \
- register long us; \
- \
- tp->tv_usec = us = tp->tv_usec + (usec); \
- if (us >= 1000000) { \
- tp->tv_usec = us - 1000000; \
- tp->tv_sec++; \
- } \
-}
int stathz;
int profhz;
static int profprocs;
int ticks;
static int psdiv, pscnt; /* prof => stat divider */
-int psratio; /* ratio: prof / stat */
+int psratio; /* ratio: prof / stat */
volatile struct timeval time;
volatile struct timeval mono_time;
@@ -178,9 +169,6 @@ hardclock(frame)
register struct clockframe *frame;
{
register struct proc *p;
- int time_update;
- struct timeval newtime = time;
- long ltemp;
p = curproc;
if (p) {
@@ -208,55 +196,9 @@ hardclock(frame)
if (stathz == 0)
statclock(frame);
- /*
- * Increment the time-of-day.
- */
- ticks++;
+ tco_forward();
- if (timedelta == 0) {
- time_update = CPU_THISTICKLEN(tick);
- } else {
- time_update = CPU_THISTICKLEN(tick) + tickdelta;
- timedelta -= tickdelta;
- }
- BUMPTIME(&mono_time, time_update);
-
- /*
- * Compute the phase adjustment. If the low-order bits
- * (time_phase) of the update overflow, bump the high-order bits
- * (time_update).
- */
- time_phase += time_adj;
- if (time_phase <= -FINEUSEC) {
- ltemp = -time_phase >> SHIFT_SCALE;
- time_phase += ltemp << SHIFT_SCALE;
- time_update -= ltemp;
- }
- else if (time_phase >= FINEUSEC) {
- ltemp = time_phase >> SHIFT_SCALE;
- time_phase -= ltemp << SHIFT_SCALE;
- time_update += ltemp;
- }
-
- newtime.tv_usec += time_update;
- /*
- * On rollover of the second the phase adjustment to be used for
- * the next second is calculated. Also, the maximum error is
- * increased by the tolerance. If the PPS frequency discipline
- * code is present, the phase is increased to compensate for the
- * CPU clock oscillator frequency error.
- *
- * On a 32-bit machine and given parameters in the timex.h
- * header file, the maximum phase adjustment is +-512 ms and
- * maximum frequency offset is a tad less than) +-512 ppm. On a
- * 64-bit machine, you shouldn't need to ask.
- */
- if (newtime.tv_usec >= 1000000) {
- newtime.tv_usec -= 1000000;
- newtime.tv_sec++;
- ntp_update_second(&newtime.tv_sec);
- }
- CPU_CLOCKUPDATE(&time, &newtime);
+ ticks++;
if (TAILQ_FIRST(&callwheel[ticks & callwheelmask]) != NULL)
setsoftclock();
@@ -315,6 +257,10 @@ hzto(tv)
}
if (sec < 0) {
#ifdef DIAGNOSTIC
+ if (sec == -1 && usec > 0) {
+ sec++;
+ usec -= 1000000;
+ }
printf("hzto: negative time difference %ld sec %ld usec\n",
sec, usec);
#endif
@@ -529,11 +475,212 @@ SYSCTL_PROC(_kern, KERN_CLOCKRATE, clockrate, CTLTYPE_STRUCT|CTLFLAG_RD,
0, 0, sysctl_kern_clockrate, "S,clockinfo","");
void
-nanotime(ts)
- struct timespec *ts;
+microtime(struct timeval *tv)
+{
+ struct timecounter *tc;
+
+ tc = (struct timecounter *)timecounter;
+ tv->tv_sec = tc->offset_sec;
+ tv->tv_usec = tc->offset_micro;
+ tv->tv_usec +=
+ ((u_int64_t)tc->get_timedelta(tc) * tc->scale_micro) >> 32;
+ if (tv->tv_usec >= 1000000) {
+ tv->tv_usec -= 1000000;
+ tv->tv_sec++;
+ }
+}
+
+void
+nanotime(struct timespec *tv)
+{
+ u_int32_t count;
+ u_int64_t delta;
+ struct timecounter *tc;
+
+ tc = (struct timecounter *)timecounter;
+ tv->tv_sec = tc->offset_sec;
+ count = tc->get_timedelta(tc);
+ delta = tc->offset_nano;
+ delta += ((u_int64_t)count * tc->scale_nano_f);
+ delta += ((u_int64_t)count * tc->scale_nano_i) << 32;
+ delta >>= 32;
+ if (delta >= 1000000000) {
+ delta -= 1000000000;
+ tv->tv_sec++;
+ }
+ tv->tv_nsec = delta;
+}
+
+static void
+tco_setscales(struct timecounter *tc)
+{
+ u_int64_t scale;
+
+ scale = 1000000000LL << 32;
+ if (tc->adjustment > 0)
+ scale += (tc->adjustment * 1000LL) << 10;
+ else
+ scale -= (-tc->adjustment * 1000LL) << 10;
+ /* scale += tc->frequency >> 1; */ /* XXX do we want to round ? */
+ scale /= tc->frequency;
+ tc->scale_micro = scale / 1000;
+ tc->scale_nano_f = scale & 0xffffffff;
+ tc->scale_nano_i = scale >> 32;
+}
+
+static u_int
+delta_timecounter(struct timecounter *tc)
+{
+ return((tc->get_timecount() - tc->offset_count) & tc->counter_mask);
+}
+
+void
+init_timecounter(struct timecounter *tc)
+{
+ struct timespec ts0, ts1;
+ int i;
+
+ if (!tc->get_timedelta)
+ tc->get_timedelta = delta_timecounter;
+ tc->adjustment = 0;
+ tco_setscales(tc);
+ tc->offset_count = tc->get_timecount();
+ tc[0].tweak = &tc[0];
+ tc[2] = tc[1] = tc[0];
+ tc[1].other = &tc[2];
+ tc[2].other = &tc[1];
+ if (!timecounter)
+ timecounter = &tc[2];
+ tc = &tc[1];
+
+ /*
+ * Figure out the cost of calling this timecounter.
+ * XXX: The 1:15 ratio is a guess at reality.
+ */
+ nanotime(&ts0);
+ for (i = 0; i < 16; i ++)
+ tc->get_timecount();
+ for (i = 0; i < 240; i ++)
+ tc->get_timedelta(tc);
+ nanotime(&ts1);
+ ts1.tv_sec -= ts0.tv_sec;
+ tc->cost = ts1.tv_sec * 1000000000 + ts1.tv_nsec - ts0.tv_nsec;
+ tc->cost >>= 8;
+ printf("Timecounter \"%s\" frequency %lu Hz cost %u ns\n",
+ tc->name, tc->frequency, tc->cost);
+
+ /* XXX: For now always start using the counter. */
+ tc->offset_count = tc->get_timecount();
+ nanotime(&ts1);
+ tc->offset_nano = (u_int64_t)ts1.tv_nsec << 32;
+ tc->offset_micro = ts1.tv_nsec / 1000;
+ tc->offset_sec = ts1.tv_sec;
+ timecounter = tc;
+}
+
+void
+set_timecounter(struct timespec *ts)
{
- struct timeval tv;
- microtime(&tv);
- ts->tv_sec = tv.tv_sec;
- ts->tv_nsec = tv.tv_usec * 1000;
+ struct timecounter *tc, *tco;
+ int s;
+
+ s = splclock();
+ tc=timecounter->other;
+ tco = tc->other;
+ *tc = *timecounter;
+ tc->other = tco;
+ tc->offset_sec = ts->tv_sec;
+ tc->offset_nano = (u_int64_t)ts->tv_nsec << 32;
+ tc->offset_micro = ts->tv_nsec / 1000;
+ tc->offset_count = tc->get_timecount();
+ time.tv_sec = tc->offset_sec;
+ time.tv_usec = tc->offset_micro;
+ timecounter = tc;
+ splx(s);
}
+
+static struct timecounter *
+sync_other_counter(int flag)
+{
+ struct timecounter *tc, *tco;
+ u_int32_t delta;
+
+ tc = timecounter->other;
+ tco = tc->other;
+ *tc = *timecounter;
+ tc->other = tco;
+ delta = tc->get_timedelta(tc);
+ tc->offset_count += delta;
+ tc->offset_count &= tc->counter_mask;
+ tc->offset_nano += (u_int64_t)delta * tc->scale_nano_f;
+ tc->offset_nano += (u_int64_t)delta * tc->scale_nano_i << 32;
+ if (flag)
+ return (tc);
+ if (tc->offset_nano > 1000000000ULL << 32) {
+ tc->offset_sec++;
+ tc->offset_nano -= 1000000000ULL << 32;
+ }
+ tc->offset_micro = (tc->offset_nano / 1000) >> 32;
+ return (tc);
+}
+
+static void
+tco_forward(void)
+{
+ struct timecounter *tc;
+ u_int32_t time_update;
+
+ tc = sync_other_counter(1);
+ time_update = 0;
+
+ if (timedelta) {
+ time_update += tickdelta;
+ timedelta -= tickdelta;
+ }
+ mono_time.tv_usec += time_update + tick;
+ if (mono_time.tv_usec >= 1000000) {
+ mono_time.tv_usec -= 1000000;
+ mono_time.tv_sec++;
+ }
+ time_update *= 1000;
+ tc->offset_nano += (u_int64_t)time_update << 32;
+ if (tc->offset_nano >= 1000000000ULL << 32) {
+ tc->offset_nano -= 1000000000ULL << 32;
+ tc->offset_sec++;
+ tc->frequency = tc->tweak->frequency;
+ tc->adjustment = tc->tweak->adjustment; /* XXX remove this ? */
+ ntp_update_second(tc); /* XXX only needed if xntpd runs */
+ tco_setscales(tc);
+ }
+ /*
+ * Find the usec from the nsec. This is just as fast (one
+ * multiplication) and prevents skew between the two due
+ * to rounding errors. (2^32/1000 = 4294967.296)
+ */
+ tc->offset_micro = (tc->offset_nano / 1000) >> 32;
+ time.tv_usec = tc->offset_micro;
+ time.tv_sec = tc->offset_sec;
+ timecounter = tc;
+}
+
+static int
+sysctl_kern_timecounter_frequency SYSCTL_HANDLER_ARGS
+{
+ return (sysctl_handle_opaque(oidp, &timecounter->tweak->frequency,
+ sizeof(timecounter->tweak->frequency), req));
+}
+
+static int
+sysctl_kern_timecounter_adjustment SYSCTL_HANDLER_ARGS
+{
+ return (sysctl_handle_opaque(oidp, &timecounter->tweak->adjustment,
+ sizeof(timecounter->tweak->adjustment), req));
+}
+
+SYSCTL_NODE(_kern, OID_AUTO, timecounter, CTLFLAG_RW, 0, "");
+
+SYSCTL_PROC(_kern_timecounter, OID_AUTO, frequency, CTLTYPE_INT|CTLFLAG_RW,
+ 0, sizeof(u_int) , sysctl_kern_timecounter_frequency, "I", "");
+
+SYSCTL_PROC(_kern_timecounter, OID_AUTO, adjustment, CTLTYPE_INT|CTLFLAG_RW,
+ 0, sizeof(int) , sysctl_kern_timecounter_adjustment, "I", "");
diff --git a/sys/kern/kern_time.c b/sys/kern/kern_time.c
index 8060c15..fb78ffc 100644
--- a/sys/kern/kern_time.c
+++ b/sys/kern/kern_time.c
@@ -31,7 +31,7 @@
* SUCH DAMAGE.
*
* @(#)kern_time.c 8.1 (Berkeley) 6/10/93
- * $Id: kern_time.c,v 1.39 1997/11/06 19:29:16 phk Exp $
+ * $Id: kern_time.c,v 1.40 1997/11/07 08:52:58 phk Exp $
*/
#include <sys/param.h>
@@ -78,6 +78,7 @@ settime(tv)
struct timeval *tv;
{
struct timeval delta;
+ struct timespec ts;
struct proc *p;
int s;
@@ -99,7 +100,9 @@ settime(tv)
*/
delta.tv_sec = tv->tv_sec - time.tv_sec;
delta.tv_usec = tv->tv_usec - time.tv_usec;
- time = *tv;
+ ts.tv_sec = tv->tv_sec;
+ ts.tv_nsec = tv->tv_usec * 1000;
+ set_timecounter(&ts);
/*
* XXX should arrange for microtime() to agree with *tv if
* it is called now. As it is, it may add up to about
@@ -138,13 +141,11 @@ clock_gettime(p, uap)
struct proc *p;
struct clock_gettime_args *uap;
{
- struct timeval atv;
struct timespec ats;
if (SCARG(uap, clock_id) != CLOCK_REALTIME)
return (EINVAL);
- microtime(&atv);
- TIMEVAL_TO_TIMESPEC(&atv, &ats);
+ nanotime(&ats);
return (copyout(&ats, SCARG(uap, tp), sizeof(ats)));
}
@@ -199,7 +200,7 @@ clock_getres(p, uap)
error = 0;
if (SCARG(uap, tp)) {
ts.tv_sec = 0;
- ts.tv_nsec = 1000000000 / hz;
+ ts.tv_nsec = 1000000000 / timecounter->frequency;
error = copyout(&ts, SCARG(uap, tp), sizeof(ts));
}
return (error);
OpenPOWER on IntegriCloud