diff options
-rw-r--r-- | sys/amd64/amd64/tsc.c | 1 | ||||
-rw-r--r-- | sys/i386/i386/geode.c | 15 | ||||
-rw-r--r-- | sys/i386/i386/tsc.c | 1 | ||||
-rw-r--r-- | sys/kern/kern_clock.c | 6 | ||||
-rw-r--r-- | sys/kern/kern_exit.c | 3 | ||||
-rw-r--r-- | sys/kern/kern_proc.c | 2 | ||||
-rw-r--r-- | sys/kern/kern_resource.c | 113 | ||||
-rw-r--r-- | sys/kern/kern_synch.c | 6 | ||||
-rw-r--r-- | sys/kern/kern_tc.c | 138 | ||||
-rw-r--r-- | sys/sparc64/sparc64/tick.c | 10 | ||||
-rw-r--r-- | sys/sys/proc.h | 3 | ||||
-rw-r--r-- | sys/sys/systm.h | 7 |
12 files changed, 217 insertions, 88 deletions
diff --git a/sys/amd64/amd64/tsc.c b/sys/amd64/amd64/tsc.c index 6a5b17c..993991a 100644 --- a/sys/amd64/amd64/tsc.c +++ b/sys/amd64/amd64/tsc.c @@ -77,6 +77,7 @@ init_TSC(void) tsc_freq = tscval[1] - tscval[0]; if (bootverbose) printf("TSC clock: %lu Hz\n", tsc_freq); + set_cputicker(rdtsc, tsc_freq, 1); } diff --git a/sys/i386/i386/geode.c b/sys/i386/i386/geode.c index a32d68c..4dbad89 100644 --- a/sys/i386/i386/geode.c +++ b/sys/i386/i386/geode.c @@ -110,6 +110,20 @@ static struct timecounter geode_timecounter = { 1000 }; +static uint64_t +geode_cputicks(void) +{ + unsigned c; + static unsigned last; + static uint64_t offset; + + c = inl(geode_counter); + if (c < last) + offset += (1LL << 32); + last = c; + return (offset | c); +} + /* * The GEODE watchdog runs from a 32kHz frequency. One period of that is * 31250 nanoseconds which we round down to 2^14 nanoseconds. The watchdog @@ -176,6 +190,7 @@ geode_probe(device_t self) tc_init(&geode_timecounter); EVENTHANDLER_REGISTER(watchdog_list, geode_watchdog, NULL, 0); + set_cputicker(geode_cputicks, 27000000, 0); } } else if (pci_get_devid(self) == 0x0510100b) { gpio = pci_read_config(self, PCIR_BAR(0), 4); diff --git a/sys/i386/i386/tsc.c b/sys/i386/i386/tsc.c index 01c8d72..cce5989 100644 --- a/sys/i386/i386/tsc.c +++ b/sys/i386/i386/tsc.c @@ -86,6 +86,7 @@ init_TSC(void) tsc_freq = tscval[1] - tscval[0]; if (bootverbose) printf("TSC clock: %ju Hz\n", (intmax_t)tsc_freq); + set_cputicker(rdtsc, tsc_freq, 1); } diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c index 0316c3e..d7fb1a0 100644 --- a/sys/kern/kern_clock.c +++ b/sys/kern/kern_clock.c @@ -415,7 +415,7 @@ statclock(int usermode) */ if (p->p_flag & P_SA) thread_statclock(1); - p->p_rux.rux_uticks++; + td->td_uticks++; if (p->p_nice > NZERO) cp_time[CP_NICE]++; else @@ -435,13 +435,13 @@ statclock(int usermode) */ if ((td->td_pflags & TDP_ITHREAD) || td->td_intr_nesting_level >= 2) { - p->p_rux.rux_iticks++; + td->td_iticks++; cp_time[CP_INTR]++; } else { if (p->p_flag & P_SA) thread_statclock(0); td->td_pticks++; - p->p_rux.rux_sticks++; + td->td_sticks++; if (td != PCPU_GET(idlethread)) cp_time[CP_SYS]++; else diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index 8803ac9..ec9a529 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -545,6 +545,9 @@ retry: /* Do the same timestamp bookkeeping that mi_switch() would do. */ new_switchtime = cpu_ticks(); p->p_rux.rux_runtime += (new_switchtime - PCPU_GET(switchtime)); + p->p_rux.rux_uticks += td->td_uticks; + p->p_rux.rux_sticks += td->td_sticks; + p->p_rux.rux_iticks += td->td_iticks; PCPU_SET(switchtime, new_switchtime); PCPU_SET(switchticks, ticks); cnt.v_swtch++; diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c index e02b947..a7386b2 100644 --- a/sys/kern/kern_proc.c +++ b/sys/kern/kern_proc.c @@ -694,7 +694,7 @@ fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp) kp->ki_swtime = p->p_swtime; kp->ki_pid = p->p_pid; kp->ki_nice = p->p_nice; - kp->ki_runtime = p->p_rux.rux_runtime * 1000000 / cpu_tickrate(); + kp->ki_runtime = cputick2usec(p->p_rux.rux_runtime); mtx_unlock_spin(&sched_lock); if ((p->p_sflag & PS_INMEM) && p->p_stats != NULL) { kp->ki_start = p->p_stats->p_start; diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c index 8ecac2e..6096b6d 100644 --- a/sys/kern/kern_resource.c +++ b/sys/kern/kern_resource.c @@ -69,8 +69,6 @@ static struct mtx uihashtbl_mtx; static LIST_HEAD(uihashhead, uidinfo) *uihashtbl; static u_long uihash; /* size of hash table - 1 */ -static void calcru1(struct proc *p, struct rusage_ext *ruxp, - struct timeval *up, struct timeval *sp); static int donice(struct thread *td, struct proc *chgp, int n); static struct uidinfo *uilookup(uid_t uid); @@ -694,57 +692,6 @@ getrlimit(td, uap) return (error); } -/* - * Transform the running time and tick information in proc p into user, - * system, and interrupt time usage. - */ -void -calcru(p, up, sp) - struct proc *p; - struct timeval *up; - struct timeval *sp; -{ - uint64_t bt; - struct rusage_ext rux; - struct thread *td; - int bt_valid; - - PROC_LOCK_ASSERT(p, MA_OWNED); - mtx_assert(&sched_lock, MA_NOTOWNED); - bt_valid = 0; - bt = 0; - mtx_lock_spin(&sched_lock); - rux = p->p_rux; - FOREACH_THREAD_IN_PROC(p, td) { - if (TD_IS_RUNNING(td)) { - /* - * Adjust for the current time slice. This is - * actually fairly important since the error here is - * on the order of a time quantum which is much - * greater than the precision of binuptime(). - */ - KASSERT(td->td_oncpu != NOCPU, - ("%s: running thread has no CPU", __func__)); - if (!bt_valid) { - bt = cpu_ticks(); - bt_valid = 1; - } - /* - * XXX: Doesn't this mean that this quantum will - * XXX: get counted twice if calcru() is called - * XXX: from SIGINFO ? - */ - rux.rux_runtime += - (bt - pcpu_find(td->td_oncpu)->pc_switchtime); - } - } - mtx_unlock_spin(&sched_lock); - calcru1(p, &rux, up, sp); - p->p_rux.rux_uu = rux.rux_uu; - p->p_rux.rux_su = rux.rux_su; - p->p_rux.rux_iu = rux.rux_iu; -} - void calccru(p, up, sp) struct proc *p; @@ -753,35 +700,52 @@ calccru(p, up, sp) { PROC_LOCK_ASSERT(p, MA_OWNED); - calcru1(p, &p->p_crux, up, sp); + calcru(p, up, sp); } -static void -calcru1(p, ruxp, up, sp) - struct proc *p; - struct rusage_ext *ruxp; - struct timeval *up; - struct timeval *sp; +/* + * Transform the running time and tick information in proc p into user, + * system, and interrupt time usage. If appropriate, include the current + * time slice on this CPU. + */ + +void +calcru(struct proc *p, struct timeval *up, struct timeval *sp) { + struct thread *td; + struct rusage_ext *ruxp = &p->p_rux; + uint64_t u; /* {user, system, interrupt, total} {ticks, usec}; previous tu: */ u_int64_t ut, uu, st, su, it, iu, tt, tu, ptu; + PROC_LOCK_ASSERT(p, MA_OWNED); + mtx_assert(&sched_lock, MA_NOTOWNED); + mtx_lock_spin(&sched_lock); + if (curthread->td_proc == p) { + td = curthread; + u = cpu_ticks(); + ruxp->rux_runtime += (u - PCPU_GET(switchtime)); + PCPU_SET(switchtime, u); + ruxp->rux_uticks += td->td_uticks; + td->td_uticks = 0; + ruxp->rux_iticks += td->td_iticks; + td->td_iticks = 0; + ruxp->rux_sticks += td->td_sticks; + td->td_sticks = 0; + } + ut = ruxp->rux_uticks; st = ruxp->rux_sticks; it = ruxp->rux_iticks; + tu = ruxp->rux_runtime; + mtx_unlock_spin(&sched_lock); + tu = cputick2usec(tu); tt = ut + st + it; if (tt == 0) { st = 1; tt = 1; } - tu = (ruxp->rux_runtime * 1000000LL) / cpu_tickrate(); ptu = ruxp->rux_uu + ruxp->rux_su + ruxp->rux_iu; - if (tu < ptu) { - printf( -"calcru: runtime went backwards from %ju usec to %ju usec for pid %d (%s)\n", - (uintmax_t)ptu, (uintmax_t)tu, p->p_pid, p->p_comm); - tu = ptu; - } if ((int64_t)tu < 0) { printf("calcru: negative runtime of %jd usec for pid %d (%s)\n", (intmax_t)tu, p->p_pid, p->p_comm); @@ -792,7 +756,17 @@ calcru1(p, ruxp, up, sp) uu = (tu * ut) / tt; su = (tu * st) / tt; iu = tu - uu - su; - + if (tu < ptu) { + printf( +"calcru: runtime went backwards from %ju usec to %ju usec for pid %d (%s)\n", + (uintmax_t)ptu, (uintmax_t)tu, p->p_pid, p->p_comm); + printf("u %ju:%ju/%ju s %ju:%ju/%ju i %ju:%ju/%ju\n", + (uintmax_t)ut, (uintmax_t)ruxp->rux_uu, uu, + (uintmax_t)st, (uintmax_t)ruxp->rux_su, su, + (uintmax_t)it, (uintmax_t)ruxp->rux_iu, iu); + tu = ptu; + } +#if 0 /* Enforce monotonicity. */ if (uu < ruxp->rux_uu || su < ruxp->rux_su || iu < ruxp->rux_iu) { if (uu < ruxp->rux_uu) @@ -814,6 +788,9 @@ calcru1(p, ruxp, up, sp) KASSERT(iu >= ruxp->rux_iu, ("calcru: monotonisation botch 2")); } + KASSERT(uu + su + iu <= tu, + ("calcru: monotisation botch 3")); +#endif ruxp->rux_uu = uu; ruxp->rux_su = su; ruxp->rux_iu = iu; diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c index 3ccbc15..66fbef9 100644 --- a/sys/kern/kern_synch.c +++ b/sys/kern/kern_synch.c @@ -386,6 +386,12 @@ mi_switch(int flags, struct thread *newtd) */ new_switchtime = cpu_ticks(); p->p_rux.rux_runtime += (new_switchtime - PCPU_GET(switchtime)); + p->p_rux.rux_uticks += td->td_uticks; + td->td_uticks = 0; + p->p_rux.rux_iticks += td->td_iticks; + td->td_iticks = 0; + p->p_rux.rux_sticks += td->td_sticks; + td->td_sticks = 0; td->td_generation++; /* bump preempt-detect counter */ diff --git a/sys/kern/kern_tc.c b/sys/kern/kern_tc.c index 397fd2f..212473e 100644 --- a/sys/kern/kern_tc.c +++ b/sys/kern/kern_tc.c @@ -116,6 +116,7 @@ TC_STATS(nsetclock); #undef TC_STATS static void tc_windup(void); +static void cpu_tick_calibrate(int); static int sysctl_kern_boottime(SYSCTL_HANDLER_ARGS) @@ -364,6 +365,7 @@ tc_setclock(struct timespec *ts) struct timespec ts2; struct bintime bt, bt2; + cpu_tick_calibrate(1); nsetclock++; binuptime(&bt2); timespec2bintime(ts, &bt); @@ -380,6 +382,7 @@ tc_setclock(struct timespec *ts) (intmax_t)ts2.tv_sec, ts2.tv_nsec, (intmax_t)ts->tv_sec, ts->tv_nsec); } + cpu_tick_calibrate(1); } /* @@ -476,8 +479,8 @@ tc_windup(void) * x = a * 2^32 / 10^9 = a * 4.294967296 * * The range of th_adjustment is +/- 5000PPM so inside a 64bit int - * we can only multiply by about 850 without overflowing, but that - * leaves suitably precise fractions for multiply before divide. + * we can only multiply by about 850 without overflowing, that + * leaves no suitably precise fractions for multiply before divide. * * Divide before multiply with a fraction of 2199/512 results in a * systematic undercompensation of 10PPM of th_adjustment. On a @@ -750,11 +753,16 @@ void tc_ticktock(void) { static int count; + static time_t last_calib; if (++count < tc_tick) return; count = 0; tc_windup(); + if (time_uptime != last_calib && !(time_uptime & 0xf)) { + cpu_tick_calibrate(0); + last_calib = time_uptime; + } } static void @@ -784,13 +792,18 @@ inittimecounter(void *dummy) SYSINIT(timecounter, SI_SUB_CLOCKS, SI_ORDER_SECOND, inittimecounter, NULL) +/* Cpu tick handling -------------------------------------------------*/ + +static int cpu_tick_variable; +static uint64_t cpu_tick_frequency; + static uint64_t tc_cpu_ticks(void) { static uint64_t base; static unsigned last; - uint64_t u; + unsigned u; struct timecounter *tc; tc = timehands->th_counter; @@ -801,5 +814,120 @@ tc_cpu_ticks(void) return (u + base); } -uint64_t (*cpu_ticks)(void) = tc_cpu_ticks; -uint64_t (*cpu_tickrate)(void) = tc_getfrequency; +/* + * This function gets called ever 16 seconds on only one designated + * CPU in the system from hardclock() via tc_ticktock(). + * + * Whenever the real time clock is stepped we get called with reset=1 + * to make sure we handle suspend/resume and similar events correctly. + */ + +static void +cpu_tick_calibrate(int reset) +{ + static uint64_t c_last; + uint64_t c_this, c_delta; + static struct bintime t_last; + struct bintime t_this, t_delta; + + if (reset) { + /* The clock was stepped, abort & reset */ + t_last.sec = 0; + return; + } + + /* we don't calibrate fixed rate cputicks */ + if (!cpu_tick_variable) + return; + + getbinuptime(&t_this); + c_this = cpu_ticks(); + if (t_last.sec != 0) { + c_delta = c_this - c_last; + t_delta = t_this; + bintime_sub(&t_delta, &t_last); + if (0 && bootverbose) { + struct timespec ts; + bintime2timespec(&t_delta, &ts); + printf("%ju %ju.%016jx %ju.%09ju", + (uintmax_t)c_delta >> 4, + (uintmax_t)t_delta.sec, (uintmax_t)t_delta.frac, + (uintmax_t)ts.tv_sec, (uintmax_t)ts.tv_nsec); + } + /* + * Validate that 16 +/- 1/256 seconds passed. + * After division by 16 this gives us a precision of + * roughly 250PPM which is sufficient + */ + if (t_delta.sec > 16 || ( + t_delta.sec == 16 && t_delta.frac >= (0x01LL << 56))) { + /* too long */ + if (0 && bootverbose) + printf("\ttoo long\n"); + } else if (t_delta.sec < 15 || + (t_delta.sec == 15 && t_delta.frac <= (0xffLL << 56))) { + /* too short */ + if (0 && bootverbose) + printf("\ttoo short\n"); + } else { + /* just right */ + c_delta >>= 4; + if (c_delta > cpu_tick_frequency) { + if (0 && bootverbose) + printf("\thigher\n"); + cpu_tick_frequency = c_delta; + } else { + if (0 && bootverbose) + printf("\tlower\n"); + } + } + } + c_last = c_this; + t_last = t_this; +} + +void +set_cputicker(cpu_tick_f *func, uint64_t freq, unsigned var) +{ + + if (func == NULL) { + cpu_ticks = tc_cpu_ticks; + } else { + cpu_tick_frequency = freq; + cpu_tick_variable = var; + cpu_ticks = func; + } +} + +uint64_t +cpu_tickrate(void) +{ + + if (cpu_ticks == tc_cpu_ticks) + return (tc_getfrequency()); + return (cpu_tick_frequency); +} + +/* + * We need to be slightly careful converting cputicks to microseconds. + * There is plenty of margin in 64 bits of microseconds (half a million + * years) and in 64 bits at 4 GHz (146 years), but if we do a multiply + * before divide conversion (to retain precision) we find that the + * margin shrinks to 1.5 hours (one millionth of 146y). + * With a three prong approach we never loose significant bits, no + * matter what the cputick rate and length of timeinterval is. + */ + +uint64_t +cputick2usec(uint64_t tick) +{ + + if (tick > 18446744073709551LL) /* floor(2^64 / 1000) */ + return (tick / (cpu_tickrate() / 1000000LL)); + else if (tick > 18446744073709LL) /* floor(2^64 / 1000000) */ + return ((tick * 1000LL) / (cpu_tickrate() / 1000LL)); + else + return ((tick * 1000000LL) / cpu_tickrate()); +} + +cpu_tick_f *cpu_ticks = tc_cpu_ticks; diff --git a/sys/sparc64/sparc64/tick.c b/sys/sparc64/sparc64/tick.c index 6f9f1a8..61f3bae 100644 --- a/sys/sparc64/sparc64/tick.c +++ b/sys/sparc64/sparc64/tick.c @@ -72,13 +72,6 @@ tick_cputicks(void) return (rd(tick)); } -static uint64_t -tick_cputickrate(void) -{ - - return (tick_freq); -} - void cpu_initclocks(void) { @@ -171,8 +164,7 @@ tick_init(u_long clock) */ tick_stop(); - cpu_ticks = tick_cputicks; - cpu_tickrate = tick_cputickrate; + set_cputicker(tick_cputicks, tick_freq, 0); } void diff --git a/sys/sys/proc.h b/sys/sys/proc.h index 3643672..a2a141c 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -283,6 +283,9 @@ struct thread { struct thread *td_standin; /* (k + a) Use this for an upcall. */ struct kse_upcall *td_upcall; /* (k + j) Upcall structure. */ u_int td_pticks; /* (k) Statclock hits for profiling */ + u_int td_sticks; /* (k) Statclock hits in system mode. */ + u_int td_iticks; /* (k) Statclock hits in intr mode. */ + u_int td_uticks; /* (k) Statclock hits in user mode. */ u_int td_uuticks; /* (k) Statclock hits (usr), for UTS. */ u_int td_usticks; /* (k) Statclock hits (sys), for UTS. */ int td_intrval; /* (j) Return value of TDF_INTERRUPT. */ diff --git a/sys/sys/systm.h b/sys/sys/systm.h index 17967d2..901c3c5 100644 --- a/sys/sys/systm.h +++ b/sys/sys/systm.h @@ -238,8 +238,11 @@ int setenv(const char *name, const char *value); int unsetenv(const char *name); int testenv(const char *name); -extern uint64_t (*cpu_ticks)(void); -extern uint64_t (*cpu_tickrate)(void); +typedef uint64_t (cpu_tick_f)(void); +void set_cputicker(cpu_tick_f *func, uint64_t freq, unsigned var); +extern cpu_tick_f *cpu_ticks; +uint64_t cpu_tickrate(void); +uint64_t cputick2usec(uint64_t tick); #ifdef APM_FIXUP_CALLTODO struct timeval; |