summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/amd64/amd64/tsc.c1
-rw-r--r--sys/i386/i386/geode.c15
-rw-r--r--sys/i386/i386/tsc.c1
-rw-r--r--sys/kern/kern_clock.c6
-rw-r--r--sys/kern/kern_exit.c3
-rw-r--r--sys/kern/kern_proc.c2
-rw-r--r--sys/kern/kern_resource.c113
-rw-r--r--sys/kern/kern_synch.c6
-rw-r--r--sys/kern/kern_tc.c138
-rw-r--r--sys/sparc64/sparc64/tick.c10
-rw-r--r--sys/sys/proc.h3
-rw-r--r--sys/sys/systm.h7
12 files changed, 217 insertions, 88 deletions
diff --git a/sys/amd64/amd64/tsc.c b/sys/amd64/amd64/tsc.c
index 6a5b17c..993991a 100644
--- a/sys/amd64/amd64/tsc.c
+++ b/sys/amd64/amd64/tsc.c
@@ -77,6 +77,7 @@ init_TSC(void)
tsc_freq = tscval[1] - tscval[0];
if (bootverbose)
printf("TSC clock: %lu Hz\n", tsc_freq);
+ set_cputicker(rdtsc, tsc_freq, 1);
}
diff --git a/sys/i386/i386/geode.c b/sys/i386/i386/geode.c
index a32d68c..4dbad89 100644
--- a/sys/i386/i386/geode.c
+++ b/sys/i386/i386/geode.c
@@ -110,6 +110,20 @@ static struct timecounter geode_timecounter = {
1000
};
+static uint64_t
+geode_cputicks(void)
+{
+ unsigned c;
+ static unsigned last;
+ static uint64_t offset;
+
+ c = inl(geode_counter);
+ if (c < last)
+ offset += (1LL << 32);
+ last = c;
+ return (offset | c);
+}
+
/*
* The GEODE watchdog runs from a 32kHz frequency. One period of that is
* 31250 nanoseconds which we round down to 2^14 nanoseconds. The watchdog
@@ -176,6 +190,7 @@ geode_probe(device_t self)
tc_init(&geode_timecounter);
EVENTHANDLER_REGISTER(watchdog_list, geode_watchdog,
NULL, 0);
+ set_cputicker(geode_cputicks, 27000000, 0);
}
} else if (pci_get_devid(self) == 0x0510100b) {
gpio = pci_read_config(self, PCIR_BAR(0), 4);
diff --git a/sys/i386/i386/tsc.c b/sys/i386/i386/tsc.c
index 01c8d72..cce5989 100644
--- a/sys/i386/i386/tsc.c
+++ b/sys/i386/i386/tsc.c
@@ -86,6 +86,7 @@ init_TSC(void)
tsc_freq = tscval[1] - tscval[0];
if (bootverbose)
printf("TSC clock: %ju Hz\n", (intmax_t)tsc_freq);
+ set_cputicker(rdtsc, tsc_freq, 1);
}
diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c
index 0316c3e..d7fb1a0 100644
--- a/sys/kern/kern_clock.c
+++ b/sys/kern/kern_clock.c
@@ -415,7 +415,7 @@ statclock(int usermode)
*/
if (p->p_flag & P_SA)
thread_statclock(1);
- p->p_rux.rux_uticks++;
+ td->td_uticks++;
if (p->p_nice > NZERO)
cp_time[CP_NICE]++;
else
@@ -435,13 +435,13 @@ statclock(int usermode)
*/
if ((td->td_pflags & TDP_ITHREAD) ||
td->td_intr_nesting_level >= 2) {
- p->p_rux.rux_iticks++;
+ td->td_iticks++;
cp_time[CP_INTR]++;
} else {
if (p->p_flag & P_SA)
thread_statclock(0);
td->td_pticks++;
- p->p_rux.rux_sticks++;
+ td->td_sticks++;
if (td != PCPU_GET(idlethread))
cp_time[CP_SYS]++;
else
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
index 8803ac9..ec9a529 100644
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c
@@ -545,6 +545,9 @@ retry:
/* Do the same timestamp bookkeeping that mi_switch() would do. */
new_switchtime = cpu_ticks();
p->p_rux.rux_runtime += (new_switchtime - PCPU_GET(switchtime));
+ p->p_rux.rux_uticks += td->td_uticks;
+ p->p_rux.rux_sticks += td->td_sticks;
+ p->p_rux.rux_iticks += td->td_iticks;
PCPU_SET(switchtime, new_switchtime);
PCPU_SET(switchticks, ticks);
cnt.v_swtch++;
diff --git a/sys/kern/kern_proc.c b/sys/kern/kern_proc.c
index e02b947..a7386b2 100644
--- a/sys/kern/kern_proc.c
+++ b/sys/kern/kern_proc.c
@@ -694,7 +694,7 @@ fill_kinfo_proc_only(struct proc *p, struct kinfo_proc *kp)
kp->ki_swtime = p->p_swtime;
kp->ki_pid = p->p_pid;
kp->ki_nice = p->p_nice;
- kp->ki_runtime = p->p_rux.rux_runtime * 1000000 / cpu_tickrate();
+ kp->ki_runtime = cputick2usec(p->p_rux.rux_runtime);
mtx_unlock_spin(&sched_lock);
if ((p->p_sflag & PS_INMEM) && p->p_stats != NULL) {
kp->ki_start = p->p_stats->p_start;
diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c
index 8ecac2e..6096b6d 100644
--- a/sys/kern/kern_resource.c
+++ b/sys/kern/kern_resource.c
@@ -69,8 +69,6 @@ static struct mtx uihashtbl_mtx;
static LIST_HEAD(uihashhead, uidinfo) *uihashtbl;
static u_long uihash; /* size of hash table - 1 */
-static void calcru1(struct proc *p, struct rusage_ext *ruxp,
- struct timeval *up, struct timeval *sp);
static int donice(struct thread *td, struct proc *chgp, int n);
static struct uidinfo *uilookup(uid_t uid);
@@ -694,57 +692,6 @@ getrlimit(td, uap)
return (error);
}
-/*
- * Transform the running time and tick information in proc p into user,
- * system, and interrupt time usage.
- */
-void
-calcru(p, up, sp)
- struct proc *p;
- struct timeval *up;
- struct timeval *sp;
-{
- uint64_t bt;
- struct rusage_ext rux;
- struct thread *td;
- int bt_valid;
-
- PROC_LOCK_ASSERT(p, MA_OWNED);
- mtx_assert(&sched_lock, MA_NOTOWNED);
- bt_valid = 0;
- bt = 0;
- mtx_lock_spin(&sched_lock);
- rux = p->p_rux;
- FOREACH_THREAD_IN_PROC(p, td) {
- if (TD_IS_RUNNING(td)) {
- /*
- * Adjust for the current time slice. This is
- * actually fairly important since the error here is
- * on the order of a time quantum which is much
- * greater than the precision of binuptime().
- */
- KASSERT(td->td_oncpu != NOCPU,
- ("%s: running thread has no CPU", __func__));
- if (!bt_valid) {
- bt = cpu_ticks();
- bt_valid = 1;
- }
- /*
- * XXX: Doesn't this mean that this quantum will
- * XXX: get counted twice if calcru() is called
- * XXX: from SIGINFO ?
- */
- rux.rux_runtime +=
- (bt - pcpu_find(td->td_oncpu)->pc_switchtime);
- }
- }
- mtx_unlock_spin(&sched_lock);
- calcru1(p, &rux, up, sp);
- p->p_rux.rux_uu = rux.rux_uu;
- p->p_rux.rux_su = rux.rux_su;
- p->p_rux.rux_iu = rux.rux_iu;
-}
-
void
calccru(p, up, sp)
struct proc *p;
@@ -753,35 +700,52 @@ calccru(p, up, sp)
{
PROC_LOCK_ASSERT(p, MA_OWNED);
- calcru1(p, &p->p_crux, up, sp);
+ calcru(p, up, sp);
}
-static void
-calcru1(p, ruxp, up, sp)
- struct proc *p;
- struct rusage_ext *ruxp;
- struct timeval *up;
- struct timeval *sp;
+/*
+ * Transform the running time and tick information in proc p into user,
+ * system, and interrupt time usage. If appropriate, include the current
+ * time slice on this CPU.
+ */
+
+void
+calcru(struct proc *p, struct timeval *up, struct timeval *sp)
{
+ struct thread *td;
+ struct rusage_ext *ruxp = &p->p_rux;
+ uint64_t u;
/* {user, system, interrupt, total} {ticks, usec}; previous tu: */
u_int64_t ut, uu, st, su, it, iu, tt, tu, ptu;
+ PROC_LOCK_ASSERT(p, MA_OWNED);
+ mtx_assert(&sched_lock, MA_NOTOWNED);
+ mtx_lock_spin(&sched_lock);
+ if (curthread->td_proc == p) {
+ td = curthread;
+ u = cpu_ticks();
+ ruxp->rux_runtime += (u - PCPU_GET(switchtime));
+ PCPU_SET(switchtime, u);
+ ruxp->rux_uticks += td->td_uticks;
+ td->td_uticks = 0;
+ ruxp->rux_iticks += td->td_iticks;
+ td->td_iticks = 0;
+ ruxp->rux_sticks += td->td_sticks;
+ td->td_sticks = 0;
+ }
+
ut = ruxp->rux_uticks;
st = ruxp->rux_sticks;
it = ruxp->rux_iticks;
+ tu = ruxp->rux_runtime;
+ mtx_unlock_spin(&sched_lock);
+ tu = cputick2usec(tu);
tt = ut + st + it;
if (tt == 0) {
st = 1;
tt = 1;
}
- tu = (ruxp->rux_runtime * 1000000LL) / cpu_tickrate();
ptu = ruxp->rux_uu + ruxp->rux_su + ruxp->rux_iu;
- if (tu < ptu) {
- printf(
-"calcru: runtime went backwards from %ju usec to %ju usec for pid %d (%s)\n",
- (uintmax_t)ptu, (uintmax_t)tu, p->p_pid, p->p_comm);
- tu = ptu;
- }
if ((int64_t)tu < 0) {
printf("calcru: negative runtime of %jd usec for pid %d (%s)\n",
(intmax_t)tu, p->p_pid, p->p_comm);
@@ -792,7 +756,17 @@ calcru1(p, ruxp, up, sp)
uu = (tu * ut) / tt;
su = (tu * st) / tt;
iu = tu - uu - su;
-
+ if (tu < ptu) {
+ printf(
+"calcru: runtime went backwards from %ju usec to %ju usec for pid %d (%s)\n",
+ (uintmax_t)ptu, (uintmax_t)tu, p->p_pid, p->p_comm);
+ printf("u %ju:%ju/%ju s %ju:%ju/%ju i %ju:%ju/%ju\n",
+ (uintmax_t)ut, (uintmax_t)ruxp->rux_uu, uu,
+ (uintmax_t)st, (uintmax_t)ruxp->rux_su, su,
+ (uintmax_t)it, (uintmax_t)ruxp->rux_iu, iu);
+ tu = ptu;
+ }
+#if 0
/* Enforce monotonicity. */
if (uu < ruxp->rux_uu || su < ruxp->rux_su || iu < ruxp->rux_iu) {
if (uu < ruxp->rux_uu)
@@ -814,6 +788,9 @@ calcru1(p, ruxp, up, sp)
KASSERT(iu >= ruxp->rux_iu,
("calcru: monotonisation botch 2"));
}
+ KASSERT(uu + su + iu <= tu,
+ ("calcru: monotisation botch 3"));
+#endif
ruxp->rux_uu = uu;
ruxp->rux_su = su;
ruxp->rux_iu = iu;
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index 3ccbc15..66fbef9 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c
@@ -386,6 +386,12 @@ mi_switch(int flags, struct thread *newtd)
*/
new_switchtime = cpu_ticks();
p->p_rux.rux_runtime += (new_switchtime - PCPU_GET(switchtime));
+ p->p_rux.rux_uticks += td->td_uticks;
+ td->td_uticks = 0;
+ p->p_rux.rux_iticks += td->td_iticks;
+ td->td_iticks = 0;
+ p->p_rux.rux_sticks += td->td_sticks;
+ td->td_sticks = 0;
td->td_generation++; /* bump preempt-detect counter */
diff --git a/sys/kern/kern_tc.c b/sys/kern/kern_tc.c
index 397fd2f..212473e 100644
--- a/sys/kern/kern_tc.c
+++ b/sys/kern/kern_tc.c
@@ -116,6 +116,7 @@ TC_STATS(nsetclock);
#undef TC_STATS
static void tc_windup(void);
+static void cpu_tick_calibrate(int);
static int
sysctl_kern_boottime(SYSCTL_HANDLER_ARGS)
@@ -364,6 +365,7 @@ tc_setclock(struct timespec *ts)
struct timespec ts2;
struct bintime bt, bt2;
+ cpu_tick_calibrate(1);
nsetclock++;
binuptime(&bt2);
timespec2bintime(ts, &bt);
@@ -380,6 +382,7 @@ tc_setclock(struct timespec *ts)
(intmax_t)ts2.tv_sec, ts2.tv_nsec,
(intmax_t)ts->tv_sec, ts->tv_nsec);
}
+ cpu_tick_calibrate(1);
}
/*
@@ -476,8 +479,8 @@ tc_windup(void)
* x = a * 2^32 / 10^9 = a * 4.294967296
*
* The range of th_adjustment is +/- 5000PPM so inside a 64bit int
- * we can only multiply by about 850 without overflowing, but that
- * leaves suitably precise fractions for multiply before divide.
+ * we can only multiply by about 850 without overflowing, that
+ * leaves no suitably precise fractions for multiply before divide.
*
* Divide before multiply with a fraction of 2199/512 results in a
* systematic undercompensation of 10PPM of th_adjustment. On a
@@ -750,11 +753,16 @@ void
tc_ticktock(void)
{
static int count;
+ static time_t last_calib;
if (++count < tc_tick)
return;
count = 0;
tc_windup();
+ if (time_uptime != last_calib && !(time_uptime & 0xf)) {
+ cpu_tick_calibrate(0);
+ last_calib = time_uptime;
+ }
}
static void
@@ -784,13 +792,18 @@ inittimecounter(void *dummy)
SYSINIT(timecounter, SI_SUB_CLOCKS, SI_ORDER_SECOND, inittimecounter, NULL)
+/* Cpu tick handling -------------------------------------------------*/
+
+static int cpu_tick_variable;
+static uint64_t cpu_tick_frequency;
+
static
uint64_t
tc_cpu_ticks(void)
{
static uint64_t base;
static unsigned last;
- uint64_t u;
+ unsigned u;
struct timecounter *tc;
tc = timehands->th_counter;
@@ -801,5 +814,120 @@ tc_cpu_ticks(void)
return (u + base);
}
-uint64_t (*cpu_ticks)(void) = tc_cpu_ticks;
-uint64_t (*cpu_tickrate)(void) = tc_getfrequency;
+/*
+ * This function gets called ever 16 seconds on only one designated
+ * CPU in the system from hardclock() via tc_ticktock().
+ *
+ * Whenever the real time clock is stepped we get called with reset=1
+ * to make sure we handle suspend/resume and similar events correctly.
+ */
+
+static void
+cpu_tick_calibrate(int reset)
+{
+ static uint64_t c_last;
+ uint64_t c_this, c_delta;
+ static struct bintime t_last;
+ struct bintime t_this, t_delta;
+
+ if (reset) {
+ /* The clock was stepped, abort & reset */
+ t_last.sec = 0;
+ return;
+ }
+
+ /* we don't calibrate fixed rate cputicks */
+ if (!cpu_tick_variable)
+ return;
+
+ getbinuptime(&t_this);
+ c_this = cpu_ticks();
+ if (t_last.sec != 0) {
+ c_delta = c_this - c_last;
+ t_delta = t_this;
+ bintime_sub(&t_delta, &t_last);
+ if (0 && bootverbose) {
+ struct timespec ts;
+ bintime2timespec(&t_delta, &ts);
+ printf("%ju %ju.%016jx %ju.%09ju",
+ (uintmax_t)c_delta >> 4,
+ (uintmax_t)t_delta.sec, (uintmax_t)t_delta.frac,
+ (uintmax_t)ts.tv_sec, (uintmax_t)ts.tv_nsec);
+ }
+ /*
+ * Validate that 16 +/- 1/256 seconds passed.
+ * After division by 16 this gives us a precision of
+ * roughly 250PPM which is sufficient
+ */
+ if (t_delta.sec > 16 || (
+ t_delta.sec == 16 && t_delta.frac >= (0x01LL << 56))) {
+ /* too long */
+ if (0 && bootverbose)
+ printf("\ttoo long\n");
+ } else if (t_delta.sec < 15 ||
+ (t_delta.sec == 15 && t_delta.frac <= (0xffLL << 56))) {
+ /* too short */
+ if (0 && bootverbose)
+ printf("\ttoo short\n");
+ } else {
+ /* just right */
+ c_delta >>= 4;
+ if (c_delta > cpu_tick_frequency) {
+ if (0 && bootverbose)
+ printf("\thigher\n");
+ cpu_tick_frequency = c_delta;
+ } else {
+ if (0 && bootverbose)
+ printf("\tlower\n");
+ }
+ }
+ }
+ c_last = c_this;
+ t_last = t_this;
+}
+
+void
+set_cputicker(cpu_tick_f *func, uint64_t freq, unsigned var)
+{
+
+ if (func == NULL) {
+ cpu_ticks = tc_cpu_ticks;
+ } else {
+ cpu_tick_frequency = freq;
+ cpu_tick_variable = var;
+ cpu_ticks = func;
+ }
+}
+
+uint64_t
+cpu_tickrate(void)
+{
+
+ if (cpu_ticks == tc_cpu_ticks)
+ return (tc_getfrequency());
+ return (cpu_tick_frequency);
+}
+
+/*
+ * We need to be slightly careful converting cputicks to microseconds.
+ * There is plenty of margin in 64 bits of microseconds (half a million
+ * years) and in 64 bits at 4 GHz (146 years), but if we do a multiply
+ * before divide conversion (to retain precision) we find that the
+ * margin shrinks to 1.5 hours (one millionth of 146y).
+ * With a three prong approach we never loose significant bits, no
+ * matter what the cputick rate and length of timeinterval is.
+ */
+
+uint64_t
+cputick2usec(uint64_t tick)
+{
+
+ if (tick > 18446744073709551LL) /* floor(2^64 / 1000) */
+ return (tick / (cpu_tickrate() / 1000000LL));
+ else if (tick > 18446744073709LL) /* floor(2^64 / 1000000) */
+ return ((tick * 1000LL) / (cpu_tickrate() / 1000LL));
+ else
+ return ((tick * 1000000LL) / cpu_tickrate());
+}
+
+cpu_tick_f *cpu_ticks = tc_cpu_ticks;
diff --git a/sys/sparc64/sparc64/tick.c b/sys/sparc64/sparc64/tick.c
index 6f9f1a8..61f3bae 100644
--- a/sys/sparc64/sparc64/tick.c
+++ b/sys/sparc64/sparc64/tick.c
@@ -72,13 +72,6 @@ tick_cputicks(void)
return (rd(tick));
}
-static uint64_t
-tick_cputickrate(void)
-{
-
- return (tick_freq);
-}
-
void
cpu_initclocks(void)
{
@@ -171,8 +164,7 @@ tick_init(u_long clock)
*/
tick_stop();
- cpu_ticks = tick_cputicks;
- cpu_tickrate = tick_cputickrate;
+ set_cputicker(tick_cputicks, tick_freq, 0);
}
void
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index 3643672..a2a141c 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -283,6 +283,9 @@ struct thread {
struct thread *td_standin; /* (k + a) Use this for an upcall. */
struct kse_upcall *td_upcall; /* (k + j) Upcall structure. */
u_int td_pticks; /* (k) Statclock hits for profiling */
+ u_int td_sticks; /* (k) Statclock hits in system mode. */
+ u_int td_iticks; /* (k) Statclock hits in intr mode. */
+ u_int td_uticks; /* (k) Statclock hits in user mode. */
u_int td_uuticks; /* (k) Statclock hits (usr), for UTS. */
u_int td_usticks; /* (k) Statclock hits (sys), for UTS. */
int td_intrval; /* (j) Return value of TDF_INTERRUPT. */
diff --git a/sys/sys/systm.h b/sys/sys/systm.h
index 17967d2..901c3c5 100644
--- a/sys/sys/systm.h
+++ b/sys/sys/systm.h
@@ -238,8 +238,11 @@ int setenv(const char *name, const char *value);
int unsetenv(const char *name);
int testenv(const char *name);
-extern uint64_t (*cpu_ticks)(void);
-extern uint64_t (*cpu_tickrate)(void);
+typedef uint64_t (cpu_tick_f)(void);
+void set_cputicker(cpu_tick_f *func, uint64_t freq, unsigned var);
+extern cpu_tick_f *cpu_ticks;
+uint64_t cpu_tickrate(void);
+uint64_t cputick2usec(uint64_t tick);
#ifdef APM_FIXUP_CALLTODO
struct timeval;
OpenPOWER on IntegriCloud