summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/conf/NOTES5
-rw-r--r--sys/conf/options1
-rw-r--r--sys/kern/kern_clock.c3
-rw-r--r--sys/kern/kern_clocksource.c468
-rw-r--r--sys/kern/kern_tc.c59
-rw-r--r--sys/kern/kern_timeout.c760
-rw-r--r--sys/kern/subr_param.c8
-rw-r--r--sys/netinet/tcp_timer.c18
-rw-r--r--sys/sys/_callout.h7
-rw-r--r--sys/sys/callout.h24
-rw-r--r--sys/sys/systm.h1
-rw-r--r--sys/sys/time.h44
12 files changed, 893 insertions, 505 deletions
diff --git a/sys/conf/NOTES b/sys/conf/NOTES
index 5d26093..27c3380 100644
--- a/sys/conf/NOTES
+++ b/sys/conf/NOTES
@@ -259,6 +259,8 @@ options SX_NOINLINE
# SMP Debugging Options:
#
+# CALLOUT_PROFILING enables rudimentary profiling of the callwheel data
+# structure used as backend in callout(9).
# PREEMPTION allows the threads that are in the kernel to be preempted by
# higher priority [interrupt] threads. It helps with interactivity
# and allows interrupt threads to run sooner rather than waiting.
@@ -297,6 +299,9 @@ options LOCK_PROFILING
options MPROF_BUFFERS="1536"
options MPROF_HASH_SIZE="1543"
+# Profiling for the callout(9) backend.
+options CALLOUT_PROFILING
+
# Profiling for internal hash tables.
options SLEEPQUEUE_PROFILING
options TURNSTILE_PROFILING
diff --git a/sys/conf/options b/sys/conf/options
index ab5d153..75d0c97 100644
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -68,6 +68,7 @@ TEXTDUMP_VERBOSE opt_ddb.h
ADAPTIVE_LOCKMGRS
ALQ
AUDIT opt_global.h
+CALLOUT_PROFILING
CAPABILITIES opt_capsicum.h
CAPABILITY_MODE opt_capsicum.h
COMPAT_43 opt_compat.h
diff --git a/sys/kern/kern_clock.c b/sys/kern/kern_clock.c
index 55a2bff..4439ccb 100644
--- a/sys/kern/kern_clock.c
+++ b/sys/kern/kern_clock.c
@@ -460,7 +460,7 @@ hardclock_cpu(int usermode)
if (td->td_intr_frame != NULL)
PMC_SOFT_CALL_TF( , , clock, hard, td->td_intr_frame);
#endif
- callout_tick();
+ callout_process(sbinuptime());
}
/*
@@ -550,7 +550,6 @@ hardclock_cnt(int cnt, int usermode)
if (td->td_intr_frame != NULL)
PMC_SOFT_CALL_TF( , , clock, hard, td->td_intr_frame);
#endif
- callout_tick();
/* We are in charge to handle this tick duty. */
if (newticks > 0) {
/* Dangerous and no need to call these things concurrently. */
diff --git a/sys/kern/kern_clocksource.c b/sys/kern/kern_clocksource.c
index 10732d9..c2bebbe 100644
--- a/sys/kern/kern_clocksource.c
+++ b/sys/kern/kern_clocksource.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2010-2012 Alexander Motin <mav@FreeBSD.org>
+ * Copyright (c) 2010-2013 Alexander Motin <mav@FreeBSD.org>
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -37,6 +37,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/bus.h>
+#include <sys/limits.h>
#include <sys/lock.h>
#include <sys/kdb.h>
#include <sys/ktr.h>
@@ -63,17 +64,14 @@ int cpu_can_deep_sleep = 0; /* C3 state is available. */
int cpu_disable_deep_sleep = 0; /* Timer dies in C3. */
static void setuptimer(void);
-static void loadtimer(struct bintime *now, int first);
+static void loadtimer(sbintime_t now, int first);
static int doconfigtimer(void);
static void configtimer(int start);
static int round_freq(struct eventtimer *et, int freq);
-static void getnextcpuevent(struct bintime *event, int idle);
-static void getnextevent(struct bintime *event);
-static int handleevents(struct bintime *now, int fake);
-#ifdef SMP
-static void cpu_new_callout(int cpu, int ticks);
-#endif
+static sbintime_t getnextcpuevent(int idle);
+static sbintime_t getnextevent(void);
+static int handleevents(sbintime_t now, int fake);
static struct mtx et_hw_mtx;
@@ -94,13 +92,11 @@ static struct mtx et_hw_mtx;
}
static struct eventtimer *timer = NULL;
-static struct bintime timerperiod; /* Timer period for periodic mode. */
-static struct bintime hardperiod; /* hardclock() events period. */
-static struct bintime statperiod; /* statclock() events period. */
-static struct bintime profperiod; /* profclock() events period. */
-static struct bintime nexttick; /* Next global timer tick time. */
-static struct bintime nexthard; /* Next global hardlock() event. */
-static u_int busy = 0; /* Reconfiguration is in progress. */
+static sbintime_t timerperiod; /* Timer period for periodic mode. */
+static sbintime_t statperiod; /* statclock() events period. */
+static sbintime_t profperiod; /* profclock() events period. */
+static sbintime_t nexttick; /* Next global timer tick time. */
+static u_int busy = 1; /* Reconfiguration is in progress. */
static int profiling = 0; /* Profiling events enabled. */
static char timername[32]; /* Wanted timer. */
@@ -116,11 +112,6 @@ TUNABLE_INT("kern.eventtimer.idletick", &idletick);
SYSCTL_UINT(_kern_eventtimer, OID_AUTO, idletick, CTLFLAG_RW, &idletick,
0, "Run periodic events when idle");
-static u_int activetick = 1; /* Run all periodic events when active. */
-TUNABLE_INT("kern.eventtimer.activetick", &activetick);
-SYSCTL_UINT(_kern_eventtimer, OID_AUTO, activetick, CTLFLAG_RW, &activetick,
- 0, "Run all periodic events when active");
-
static int periodic = 0; /* Periodic or one-shot mode. */
static int want_periodic = 0; /* What mode to prefer. */
TUNABLE_INT("kern.eventtimer.periodic", &want_periodic);
@@ -129,31 +120,23 @@ struct pcpu_state {
struct mtx et_hw_mtx; /* Per-CPU timer mutex. */
u_int action; /* Reconfiguration requests. */
u_int handle; /* Immediate handle resuests. */
- struct bintime now; /* Last tick time. */
- struct bintime nextevent; /* Next scheduled event on this CPU. */
- struct bintime nexttick; /* Next timer tick time. */
- struct bintime nexthard; /* Next hardlock() event. */
- struct bintime nextstat; /* Next statclock() event. */
- struct bintime nextprof; /* Next profclock() event. */
+ sbintime_t now; /* Last tick time. */
+ sbintime_t nextevent; /* Next scheduled event on this CPU. */
+ sbintime_t nexttick; /* Next timer tick time. */
+ sbintime_t nexthard; /* Next hardlock() event. */
+ sbintime_t nextstat; /* Next statclock() event. */
+ sbintime_t nextprof; /* Next profclock() event. */
+ sbintime_t nextcall; /* Next callout event. */
+ sbintime_t nextcallopt; /* Next optional callout event. */
#ifdef KDTRACE_HOOKS
- struct bintime nextcyc; /* Next OpenSolaris cyclics event. */
+ sbintime_t nextcyc; /* Next OpenSolaris cyclics event. */
#endif
int ipi; /* This CPU needs IPI. */
int idle; /* This CPU is in idle mode. */
};
static DPCPU_DEFINE(struct pcpu_state, timerstate);
-
-#define FREQ2BT(freq, bt) \
-{ \
- (bt)->sec = 0; \
- (bt)->frac = ((uint64_t)0x8000000000000000 / (freq)) << 1; \
-}
-#define BT2FREQ(bt) \
- (((uint64_t)0x8000000000000000 + ((bt)->frac >> 2)) / \
- ((bt)->frac >> 1))
-
-#define SBT2FREQ(sbt) ((SBT_1S + ((sbt) >> 1)) / (sbt))
+DPCPU_DEFINE(sbintime_t, hardclocktime);
/*
* Timer broadcast IPI handler.
@@ -161,7 +144,7 @@ static DPCPU_DEFINE(struct pcpu_state, timerstate);
int
hardclockintr(void)
{
- struct bintime now;
+ sbintime_t now;
struct pcpu_state *state;
int done;
@@ -169,10 +152,9 @@ hardclockintr(void)
return (FILTER_HANDLED);
state = DPCPU_PTR(timerstate);
now = state->now;
- CTR4(KTR_SPARE2, "ipi at %d: now %d.%08x%08x",
- curcpu, now.sec, (u_int)(now.frac >> 32),
- (u_int)(now.frac & 0xffffffff));
- done = handleevents(&now, 0);
+ CTR3(KTR_SPARE2, "ipi at %d: now %d.%08x",
+ curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff));
+ done = handleevents(now, 0);
return (done ? FILTER_HANDLED : FILTER_STRAY);
}
@@ -180,48 +162,43 @@ hardclockintr(void)
* Handle all events for specified time on this CPU
*/
static int
-handleevents(struct bintime *now, int fake)
+handleevents(sbintime_t now, int fake)
{
- struct bintime t;
+ sbintime_t t, *hct;
struct trapframe *frame;
struct pcpu_state *state;
- uintfptr_t pc;
int usermode;
int done, runs;
- CTR4(KTR_SPARE2, "handle at %d: now %d.%08x%08x",
- curcpu, now->sec, (u_int)(now->frac >> 32),
- (u_int)(now->frac & 0xffffffff));
+ CTR3(KTR_SPARE2, "handle at %d: now %d.%08x",
+ curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff));
done = 0;
if (fake) {
frame = NULL;
usermode = 0;
- pc = 0;
} else {
frame = curthread->td_intr_frame;
usermode = TRAPF_USERMODE(frame);
- pc = TRAPF_PC(frame);
}
state = DPCPU_PTR(timerstate);
runs = 0;
- while (bintime_cmp(now, &state->nexthard, >=)) {
- bintime_addx(&state->nexthard, hardperiod.frac);
+ while (now >= state->nexthard) {
+ state->nexthard += tick_sbt;
runs++;
}
if (runs) {
- if ((timer->et_flags & ET_FLAGS_PERCPU) == 0 &&
- bintime_cmp(&state->nexthard, &nexthard, >))
- nexthard = state->nexthard;
+ hct = DPCPU_PTR(hardclocktime);
+ *hct = state->nexthard - tick_sbt;
if (fake < 2) {
hardclock_cnt(runs, usermode);
done = 1;
}
}
runs = 0;
- while (bintime_cmp(now, &state->nextstat, >=)) {
- bintime_addx(&state->nextstat, statperiod.frac);
+ while (now >= state->nextstat) {
+ state->nextstat += statperiod;
runs++;
}
if (runs && fake < 2) {
@@ -230,31 +207,29 @@ handleevents(struct bintime *now, int fake)
}
if (profiling) {
runs = 0;
- while (bintime_cmp(now, &state->nextprof, >=)) {
- bintime_addx(&state->nextprof, profperiod.frac);
+ while (now >= state->nextprof) {
+ state->nextprof += profperiod;
runs++;
}
if (runs && !fake) {
- profclock_cnt(runs, usermode, pc);
+ profclock_cnt(runs, usermode, TRAPF_PC(frame));
done = 1;
}
} else
state->nextprof = state->nextstat;
+ if (now >= state->nextcallopt) {
+ state->nextcall = state->nextcallopt = INT64_MAX;
+ callout_process(now);
+ }
#ifdef KDTRACE_HOOKS
- if (fake == 0 && cyclic_clock_func != NULL &&
- state->nextcyc.sec != -1 &&
- bintime_cmp(now, &state->nextcyc, >=)) {
- state->nextcyc.sec = -1;
+ if (fake == 0 && now >= state->nextcyc && cyclic_clock_func != NULL) {
+ state->nextcyc = INT64_MAX;
(*cyclic_clock_func)(frame);
}
#endif
- getnextcpuevent(&t, 0);
- if (fake == 2) {
- state->nextevent = t;
- return (done);
- }
+ t = getnextcpuevent(0);
ET_HW_LOCK(state);
if (!busy) {
state->idle = 0;
@@ -268,84 +243,81 @@ handleevents(struct bintime *now, int fake)
/*
* Schedule binuptime of the next event on current CPU.
*/
-static void
-getnextcpuevent(struct bintime *event, int idle)
+static sbintime_t
+getnextcpuevent(int idle)
{
- struct bintime tmp;
+ sbintime_t event;
struct pcpu_state *state;
- int skip;
+ u_int hardfreq;
state = DPCPU_PTR(timerstate);
- /* Handle hardclock() events. */
- *event = state->nexthard;
- if (idle || (!activetick && !profiling &&
- (timer->et_flags & ET_FLAGS_PERCPU) == 0)) {
- skip = idle ? 4 : (stathz / 2);
- if (curcpu == CPU_FIRST() && tc_min_ticktock_freq > skip)
- skip = tc_min_ticktock_freq;
- skip = callout_tickstofirst(hz / skip) - 1;
- CTR2(KTR_SPARE2, "skip at %d: %d", curcpu, skip);
- tmp = hardperiod;
- bintime_mul(&tmp, skip);
- bintime_add(event, &tmp);
- }
+ /* Handle hardclock() events, skipping some if CPU is idle. */
+ event = state->nexthard;
+ if (idle) {
+ hardfreq = (u_int)hz / 2;
+ if (tc_min_ticktock_freq > 2
+#ifdef SMP
+ && curcpu == CPU_FIRST()
+#endif
+ )
+ hardfreq = hz / tc_min_ticktock_freq;
+ if (hardfreq > 1)
+ event += tick_sbt * (hardfreq - 1);
+ }
+ /* Handle callout events. */
+ if (event > state->nextcall)
+ event = state->nextcall;
if (!idle) { /* If CPU is active - handle other types of events. */
- if (bintime_cmp(event, &state->nextstat, >))
- *event = state->nextstat;
- if (profiling && bintime_cmp(event, &state->nextprof, >))
- *event = state->nextprof;
+ if (event > state->nextstat)
+ event = state->nextstat;
+ if (profiling && event > state->nextprof)
+ event = state->nextprof;
}
#ifdef KDTRACE_HOOKS
- if (state->nextcyc.sec != -1 && bintime_cmp(event, &state->nextcyc, >))
- *event = state->nextcyc;
+ if (event > state->nextcyc)
+ event = state->nextcyc;
#endif
+ return (event);
}
/*
* Schedule binuptime of the next event on all CPUs.
*/
-static void
-getnextevent(struct bintime *event)
+static sbintime_t
+getnextevent(void)
{
struct pcpu_state *state;
+ sbintime_t event;
#ifdef SMP
int cpu;
#endif
- int c, nonidle;
+ int c;
state = DPCPU_PTR(timerstate);
- *event = state->nextevent;
- c = curcpu;
- nonidle = !state->idle;
- if ((timer->et_flags & ET_FLAGS_PERCPU) == 0) {
+ event = state->nextevent;
+ c = -1;
#ifdef SMP
- if (smp_started) {
- CPU_FOREACH(cpu) {
- if (curcpu == cpu)
- continue;
- state = DPCPU_ID_PTR(cpu, timerstate);
- nonidle += !state->idle;
- if (bintime_cmp(event, &state->nextevent, >)) {
- *event = state->nextevent;
- c = cpu;
- }
+ if ((timer->et_flags & ET_FLAGS_PERCPU) == 0) {
+ CPU_FOREACH(cpu) {
+ state = DPCPU_ID_PTR(cpu, timerstate);
+ if (event > state->nextevent) {
+ event = state->nextevent;
+ c = cpu;
}
}
-#endif
- if (nonidle != 0 && bintime_cmp(event, &nexthard, >))
- *event = nexthard;
}
- CTR5(KTR_SPARE2, "next at %d: next %d.%08x%08x by %d",
- curcpu, event->sec, (u_int)(event->frac >> 32),
- (u_int)(event->frac & 0xffffffff), c);
+#endif
+ CTR4(KTR_SPARE2, "next at %d: next %d.%08x by %d",
+ curcpu, (int)(event >> 32), (u_int)(event & 0xffffffff), c);
+ return (event);
}
/* Hardware timer callback function. */
static void
timercb(struct eventtimer *et, void *arg)
{
- struct bintime now;
- struct bintime *next;
+ sbintime_t now;
+ sbintime_t *next;
struct pcpu_state *state;
#ifdef SMP
int cpu, bcast;
@@ -360,16 +332,14 @@ timercb(struct eventtimer *et, void *arg)
next = &state->nexttick;
} else
next = &nexttick;
- binuptime(&now);
- if (periodic) {
- *next = now;
- bintime_addx(next, timerperiod.frac); /* Next tick in 1 period. */
- } else
- next->sec = -1; /* Next tick is not scheduled yet. */
+ now = sbinuptime();
+ if (periodic)
+ *next = now + timerperiod;
+ else
+ *next = -1; /* Next tick is not scheduled yet. */
state->now = now;
- CTR4(KTR_SPARE2, "intr at %d: now %d.%08x%08x",
- curcpu, (int)(now.sec), (u_int)(now.frac >> 32),
- (u_int)(now.frac & 0xffffffff));
+ CTR3(KTR_SPARE2, "intr at %d: now %d.%08x",
+ curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff));
#ifdef SMP
/* Prepare broadcasting to other CPUs for non-per-CPU timers. */
@@ -379,8 +349,8 @@ timercb(struct eventtimer *et, void *arg)
state = DPCPU_ID_PTR(cpu, timerstate);
ET_HW_LOCK(state);
state->now = now;
- if (bintime_cmp(&now, &state->nextevent, >=)) {
- state->nextevent.sec++;
+ if (now >= state->nextevent) {
+ state->nextevent += SBT_1S;
if (curcpu != cpu) {
state->ipi = 1;
bcast = 1;
@@ -392,7 +362,7 @@ timercb(struct eventtimer *et, void *arg)
#endif
/* Handle events for this time on this CPU. */
- handleevents(&now, 0);
+ handleevents(now, 0);
#ifdef SMP
/* Broadcast interrupt to other CPUs for non-per-CPU timers. */
@@ -414,11 +384,11 @@ timercb(struct eventtimer *et, void *arg)
* Load new value into hardware timer.
*/
static void
-loadtimer(struct bintime *now, int start)
+loadtimer(sbintime_t now, int start)
{
struct pcpu_state *state;
- struct bintime new;
- struct bintime *next;
+ sbintime_t new;
+ sbintime_t *next;
uint64_t tmp;
int eq;
@@ -433,30 +403,24 @@ loadtimer(struct bintime *now, int start)
* Try to start all periodic timers aligned
* to period to make events synchronous.
*/
- tmp = ((uint64_t)now->sec << 36) + (now->frac >> 28);
- tmp = (tmp % (timerperiod.frac >> 28)) << 28;
- new.sec = 0;
- new.frac = timerperiod.frac - tmp;
- if (new.frac < tmp) /* Left less then passed. */
- bintime_addx(&new, timerperiod.frac);
+ tmp = now % timerperiod;
+ new = timerperiod - tmp;
+ if (new < tmp) /* Left less then passed. */
+ new += timerperiod;
CTR5(KTR_SPARE2, "load p at %d: now %d.%08x first in %d.%08x",
- curcpu, now->sec, (u_int)(now->frac >> 32),
- new.sec, (u_int)(new.frac >> 32));
- *next = new;
- bintime_add(next, now);
- et_start(timer, bttosbt(new), bttosbt(timerperiod));
+ curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff),
+ (int)(new >> 32), (u_int)(new & 0xffffffff));
+ *next = new + now;
+ et_start(timer, new, timerperiod);
}
} else {
- getnextevent(&new);
- eq = bintime_cmp(&new, next, ==);
- CTR5(KTR_SPARE2, "load at %d: next %d.%08x%08x eq %d",
- curcpu, new.sec, (u_int)(new.frac >> 32),
- (u_int)(new.frac & 0xffffffff),
- eq);
+ new = getnextevent();
+ eq = (new == *next);
+ CTR4(KTR_SPARE2, "load at %d: next %d.%08x eq %d",
+ curcpu, (int)(new >> 32), (u_int)(new & 0xffffffff), eq);
if (!eq) {
*next = new;
- bintime_sub(&new, now);
- et_start(timer, bttosbt(new), 0);
+ et_start(timer, new - now, 0);
}
}
}
@@ -478,7 +442,7 @@ setuptimer(void)
while (freq < (profiling ? profhz : stathz))
freq += hz;
freq = round_freq(timer, freq);
- FREQ2BT(freq, &timerperiod);
+ timerperiod = SBT_1S / freq;
}
/*
@@ -487,15 +451,15 @@ setuptimer(void)
static int
doconfigtimer(void)
{
- struct bintime now;
+ sbintime_t now;
struct pcpu_state *state;
state = DPCPU_PTR(timerstate);
switch (atomic_load_acq_int(&state->action)) {
case 1:
- binuptime(&now);
+ now = sbinuptime();
ET_HW_LOCK(state);
- loadtimer(&now, 1);
+ loadtimer(now, 1);
ET_HW_UNLOCK(state);
state->handle = 0;
atomic_store_rel_int(&state->action, 0);
@@ -509,8 +473,8 @@ doconfigtimer(void)
return (1);
}
if (atomic_readandclear_int(&state->handle) && !busy) {
- binuptime(&now);
- handleevents(&now, 0);
+ now = sbinuptime();
+ handleevents(now, 0);
return (1);
}
return (0);
@@ -523,40 +487,45 @@ doconfigtimer(void)
static void
configtimer(int start)
{
- struct bintime now, next;
+ sbintime_t now, next;
struct pcpu_state *state;
int cpu;
if (start) {
setuptimer();
- binuptime(&now);
- }
+ now = sbinuptime();
+ } else
+ now = 0;
critical_enter();
ET_HW_LOCK(DPCPU_PTR(timerstate));
if (start) {
/* Initialize time machine parameters. */
- next = now;
- bintime_addx(&next, timerperiod.frac);
+ next = now + timerperiod;
if (periodic)
nexttick = next;
else
- nexttick.sec = -1;
+ nexttick = -1;
CPU_FOREACH(cpu) {
state = DPCPU_ID_PTR(cpu, timerstate);
state->now = now;
- state->nextevent = next;
+ if (!smp_started && cpu != CPU_FIRST())
+ state->nextevent = INT64_MAX;
+ else
+ state->nextevent = next;
if (periodic)
state->nexttick = next;
else
- state->nexttick.sec = -1;
+ state->nexttick = -1;
state->nexthard = next;
state->nextstat = next;
state->nextprof = next;
+ state->nextcall = next;
+ state->nextcallopt = next;
hardclock_sync(cpu);
}
busy = 0;
/* Start global timer or per-CPU timer of this CPU. */
- loadtimer(&now, 1);
+ loadtimer(now, 1);
} else {
busy = 1;
/* Stop global timer or per-CPU timer of this CPU. */
@@ -629,12 +598,11 @@ cpu_initclocks_bsp(void)
state = DPCPU_ID_PTR(cpu, timerstate);
mtx_init(&state->et_hw_mtx, "et_hw_mtx", NULL, MTX_SPIN);
#ifdef KDTRACE_HOOKS
- state->nextcyc.sec = -1;
+ state->nextcyc = INT64_MAX;
#endif
+ state->nextcall = INT64_MAX;
+ state->nextcallopt = INT64_MAX;
}
-#ifdef SMP
- callout_new_inserted = cpu_new_callout;
-#endif
periodic = want_periodic;
/* Grab requested timer or the best of present. */
if (timername[0])
@@ -698,9 +666,10 @@ cpu_initclocks_bsp(void)
profhz = round_freq(timer, stathz * 64);
}
tick = 1000000 / hz;
- FREQ2BT(hz, &hardperiod);
- FREQ2BT(stathz, &statperiod);
- FREQ2BT(profhz, &profperiod);
+ tick_sbt = SBT_1S / hz;
+ tick_bt = sbttobt(tick_sbt);
+ statperiod = SBT_1S / stathz;
+ profperiod = SBT_1S / profhz;
ET_LOCK();
configtimer(1);
ET_UNLOCK();
@@ -712,18 +681,22 @@ cpu_initclocks_bsp(void)
void
cpu_initclocks_ap(void)
{
- struct bintime now;
+ sbintime_t now;
struct pcpu_state *state;
+ struct thread *td;
state = DPCPU_PTR(timerstate);
- binuptime(&now);
+ now = sbinuptime();
ET_HW_LOCK(state);
state->now = now;
hardclock_sync(curcpu);
- handleevents(&state->now, 2);
- if (timer->et_flags & ET_FLAGS_PERCPU)
- loadtimer(&now, 1);
+ spinlock_enter();
ET_HW_UNLOCK(state);
+ td = curthread;
+ td->td_intr_nesting_level++;
+ handleevents(state->now, 2);
+ td->td_intr_nesting_level--;
+ spinlock_exit();
}
/*
@@ -772,7 +745,7 @@ cpu_stopprofclock(void)
sbintime_t
cpu_idleclock(void)
{
- struct bintime now, t;
+ sbintime_t now, t;
struct pcpu_state *state;
if (idletick || busy ||
@@ -786,19 +759,17 @@ cpu_idleclock(void)
if (periodic)
now = state->now;
else
- binuptime(&now);
- CTR4(KTR_SPARE2, "idle at %d: now %d.%08x%08x",
- curcpu, now.sec, (u_int)(now.frac >> 32),
- (u_int)(now.frac & 0xffffffff));
- getnextcpuevent(&t, 1);
+ now = sbinuptime();
+ CTR3(KTR_SPARE2, "idle at %d: now %d.%08x",
+ curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff));
+ t = getnextcpuevent(1);
ET_HW_LOCK(state);
state->idle = 1;
state->nextevent = t;
if (!periodic)
- loadtimer(&now, 0);
+ loadtimer(now, 0);
ET_HW_UNLOCK(state);
- bintime_sub(&t, &now);
- return (MAX(bttosbt(t), 0));
+ return (MAX(t - now, 0));
}
/*
@@ -807,7 +778,7 @@ cpu_idleclock(void)
void
cpu_activeclock(void)
{
- struct bintime now;
+ sbintime_t now;
struct pcpu_state *state;
struct thread *td;
@@ -817,101 +788,98 @@ cpu_activeclock(void)
if (periodic)
now = state->now;
else
- binuptime(&now);
- CTR4(KTR_SPARE2, "active at %d: now %d.%08x%08x",
- curcpu, now.sec, (u_int)(now.frac >> 32),
- (u_int)(now.frac & 0xffffffff));
+ now = sbinuptime();
+ CTR3(KTR_SPARE2, "active at %d: now %d.%08x",
+ curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff));
spinlock_enter();
td = curthread;
td->td_intr_nesting_level++;
- handleevents(&now, 1);
+ handleevents(now, 1);
td->td_intr_nesting_level--;
spinlock_exit();
}
#ifdef KDTRACE_HOOKS
void
-clocksource_cyc_set(const struct bintime *t)
+clocksource_cyc_set(const struct bintime *bt)
{
- struct bintime now;
+ sbintime_t now, t;
struct pcpu_state *state;
+ /* Do not touch anything if somebody reconfiguring timers. */
+ if (busy)
+ return;
+ t = bttosbt(*bt);
state = DPCPU_PTR(timerstate);
if (periodic)
now = state->now;
else
- binuptime(&now);
+ now = sbinuptime();
- CTR4(KTR_SPARE2, "set_cyc at %d: now %d.%08x%08x",
- curcpu, now.sec, (u_int)(now.frac >> 32),
- (u_int)(now.frac & 0xffffffff));
- CTR4(KTR_SPARE2, "set_cyc at %d: t %d.%08x%08x",
- curcpu, t->sec, (u_int)(t->frac >> 32),
- (u_int)(t->frac & 0xffffffff));
+ CTR5(KTR_SPARE2, "set_cyc at %d: now %d.%08x t %d.%08x",
+ curcpu, (int)(now >> 32), (u_int)(now & 0xffffffff),
+ (int)(t >> 32), (u_int)(t & 0xffffffff));
ET_HW_LOCK(state);
- if (bintime_cmp(t, &state->nextcyc, ==)) {
- ET_HW_UNLOCK(state);
- return;
- }
- state->nextcyc = *t;
- if (bintime_cmp(&state->nextcyc, &state->nextevent, >=)) {
- ET_HW_UNLOCK(state);
- return;
- }
- state->nextevent = state->nextcyc;
+ if (t == state->nextcyc)
+ goto done;
+ state->nextcyc = t;
+ if (t >= state->nextevent)
+ goto done;
+ state->nextevent = t;
if (!periodic)
- loadtimer(&now, 0);
+ loadtimer(now, 0);
+done:
ET_HW_UNLOCK(state);
}
#endif
-#ifdef SMP
-static void
-cpu_new_callout(int cpu, int ticks)
+void
+cpu_new_callout(int cpu, sbintime_t bt, sbintime_t bt_opt)
{
- struct bintime tmp;
struct pcpu_state *state;
- CTR3(KTR_SPARE2, "new co at %d: on %d in %d",
- curcpu, cpu, ticks);
+ /* Do not touch anything if somebody reconfiguring timers. */
+ if (busy)
+ return;
+ CTR6(KTR_SPARE2, "new co at %d: on %d at %d.%08x - %d.%08x",
+ curcpu, cpu, (int)(bt_opt >> 32), (u_int)(bt_opt & 0xffffffff),
+ (int)(bt >> 32), (u_int)(bt & 0xffffffff));
state = DPCPU_ID_PTR(cpu, timerstate);
ET_HW_LOCK(state);
- if (state->idle == 0 || busy) {
- ET_HW_UNLOCK(state);
- return;
- }
+
/*
- * If timer is periodic - just update next event time for target CPU.
- * If timer is global - there is chance it is already programmed.
+ * If there is callout time already set earlier -- do nothing.
+ * This check may appear redundant because we check already in
+ * callout_process() but this double check guarantees we're safe
+ * with respect to race conditions between interrupts execution
+ * and scheduling.
*/
- if (periodic || (timer->et_flags & ET_FLAGS_PERCPU) == 0) {
- tmp = hardperiod;
- bintime_mul(&tmp, ticks - 1);
- bintime_add(&tmp, &state->nexthard);
- if (bintime_cmp(&tmp, &state->nextevent, <))
- state->nextevent = tmp;
- if (periodic ||
- bintime_cmp(&state->nextevent, &nexttick, >=)) {
- ET_HW_UNLOCK(state);
- return;
- }
+ state->nextcallopt = bt_opt;
+ if (bt >= state->nextcall)
+ goto done;
+ state->nextcall = bt;
+ /* If there is some other event set earlier -- do nothing. */
+ if (bt >= state->nextevent)
+ goto done;
+ state->nextevent = bt;
+ /* If timer is periodic -- there is nothing to reprogram. */
+ if (periodic)
+ goto done;
+ /* If timer is global or of the current CPU -- reprogram it. */
+ if ((timer->et_flags & ET_FLAGS_PERCPU) == 0 || cpu == curcpu) {
+ loadtimer(sbinuptime(), 0);
+done:
+ ET_HW_UNLOCK(state);
+ return;
}
- /*
- * Otherwise we have to wake that CPU up, as we can't get present
- * bintime to reprogram global timer from here. If timer is per-CPU,
- * we by definition can't do it from here.
- */
+ /* Otherwise make other CPU to reprogram it. */
+ state->handle = 1;
ET_HW_UNLOCK(state);
- if (timer->et_flags & ET_FLAGS_PERCPU) {
- state->handle = 1;
- ipi_cpu(cpu, IPI_HARDCLOCK);
- } else {
- if (!cpu_idle_wakeup(cpu))
- ipi_cpu(cpu, IPI_AST);
- }
-}
+#ifdef SMP
+ ipi_cpu(cpu, IPI_HARDCLOCK);
#endif
+}
/*
* Report or change the active event timers hardware.
diff --git a/sys/kern/kern_tc.c b/sys/kern/kern_tc.c
index 6e1f486..9fe7ebe 100644
--- a/sys/kern/kern_tc.c
+++ b/sys/kern/kern_tc.c
@@ -22,6 +22,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/kernel.h>
+#include <sys/limits.h>
#ifdef FFCLOCK
#include <sys/lock.h>
#include <sys/mutex.h>
@@ -119,6 +120,21 @@ static int timestepwarnings;
SYSCTL_INT(_kern_timecounter, OID_AUTO, stepwarnings, CTLFLAG_RW,
&timestepwarnings, 0, "Log time steps");
+struct bintime bt_timethreshold;
+struct bintime bt_tickthreshold;
+sbintime_t sbt_timethreshold;
+sbintime_t sbt_tickthreshold;
+struct bintime tc_tick_bt;
+sbintime_t tc_tick_sbt;
+int tc_precexp;
+int tc_timepercentage = TC_DEFAULTPERC;
+TUNABLE_INT("kern.timecounter.alloweddeviation", &tc_timepercentage);
+static int sysctl_kern_timecounter_adjprecision(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_kern_timecounter, OID_AUTO, alloweddeviation,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE, 0, 0,
+ sysctl_kern_timecounter_adjprecision, "I",
+ "Allowed time interval deviation in percents");
+
static void tc_windup(void);
static void cpu_tick_calibrate(int);
@@ -1746,10 +1762,47 @@ tc_ticktock(int cnt)
tc_windup();
}
+static void __inline
+tc_adjprecision(void)
+{
+ int t;
+
+ if (tc_timepercentage > 0) {
+ t = (99 + tc_timepercentage) / tc_timepercentage;
+ tc_precexp = fls(t + (t >> 1)) - 1;
+ FREQ2BT(hz / tc_tick, &bt_timethreshold);
+ FREQ2BT(hz, &bt_tickthreshold);
+ bintime_shift(&bt_timethreshold, tc_precexp);
+ bintime_shift(&bt_tickthreshold, tc_precexp);
+ } else {
+ tc_precexp = 31;
+ bt_timethreshold.sec = INT_MAX;
+ bt_timethreshold.frac = ~(uint64_t)0;
+ bt_tickthreshold = bt_timethreshold;
+ }
+ sbt_timethreshold = bttosbt(bt_timethreshold);
+ sbt_tickthreshold = bttosbt(bt_tickthreshold);
+}
+
+static int
+sysctl_kern_timecounter_adjprecision(SYSCTL_HANDLER_ARGS)
+{
+ int error, val;
+
+ val = tc_timepercentage;
+ error = sysctl_handle_int(oidp, &val, 0, req);
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+ tc_timepercentage = val;
+ tc_adjprecision();
+ return (0);
+}
+
static void
inittimecounter(void *dummy)
{
u_int p;
+ int tick_rate;
/*
* Set the initial timeout to
@@ -1763,6 +1816,12 @@ inittimecounter(void *dummy)
tc_tick = (hz + 500) / 1000;
else
tc_tick = 1;
+ tc_adjprecision();
+ FREQ2BT(hz, &tick_bt);
+ tick_sbt = bttosbt(tick_bt);
+ tick_rate = hz / tc_tick;
+ FREQ2BT(tick_rate, &tc_tick_bt);
+ tc_tick_sbt = bttosbt(tc_tick_bt);
p = (tc_tick * 1000000) / hz;
printf("Timecounters tick every %d.%03u msec\n", p / 1000, p % 1000);
diff --git a/sys/kern/kern_timeout.c b/sys/kern/kern_timeout.c
index a217db1..0787c01 100644
--- a/sys/kern/kern_timeout.c
+++ b/sys/kern/kern_timeout.c
@@ -37,7 +37,11 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_callout_profiling.h"
#include "opt_kdtrace.h"
+#if defined(__arm__)
+#include "opt_timer.h"
+#endif
#include <sys/param.h>
#include <sys/systm.h>
@@ -59,6 +63,10 @@ __FBSDID("$FreeBSD$");
#include <machine/cpu.h>
#endif
+#ifndef NO_EVENTTIMERS
+DPCPU_DECLARE(sbintime_t, hardclocktime);
+#endif
+
SDT_PROVIDER_DEFINE(callout_execute);
SDT_PROBE_DEFINE(callout_execute, kernel, , callout_start, callout-start);
SDT_PROBE_ARGTYPE(callout_execute, kernel, , callout_start, 0,
@@ -67,6 +75,7 @@ SDT_PROBE_DEFINE(callout_execute, kernel, , callout_end, callout-end);
SDT_PROBE_ARGTYPE(callout_execute, kernel, , callout_end, 0,
"struct callout *");
+#ifdef CALLOUT_PROFILING
static int avg_depth;
SYSCTL_INT(_debug, OID_AUTO, to_avg_depth, CTLFLAG_RD, &avg_depth, 0,
"Average number of items examined per softclock call. Units = 1/1000");
@@ -79,6 +88,19 @@ SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls, CTLFLAG_RD, &avg_lockcalls, 0,
static int avg_mpcalls;
SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0,
"Average number of MP callouts made per softclock call. Units = 1/1000");
+static int avg_depth_dir;
+SYSCTL_INT(_debug, OID_AUTO, to_avg_depth_dir, CTLFLAG_RD, &avg_depth_dir, 0,
+ "Average number of direct callouts examined per callout_process call. "
+ "Units = 1/1000");
+static int avg_lockcalls_dir;
+SYSCTL_INT(_debug, OID_AUTO, to_avg_lockcalls_dir, CTLFLAG_RD,
+ &avg_lockcalls_dir, 0, "Average number of lock direct callouts made per "
+ "callout_process call. Units = 1/1000");
+static int avg_mpcalls_dir;
+SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls_dir, CTLFLAG_RD, &avg_mpcalls_dir,
+ 0, "Average number of MP direct callouts made per callout_process call. "
+ "Units = 1/1000");
+#endif
/*
* TODO:
* allocate more timeout table slots when table overflows.
@@ -86,58 +108,63 @@ SYSCTL_INT(_debug, OID_AUTO, to_avg_mpcalls, CTLFLAG_RD, &avg_mpcalls, 0,
u_int callwheelsize, callwheelmask;
/*
- * The callout cpu migration entity represents informations necessary for
- * describing the migrating callout to the new callout cpu.
+ * The callout cpu exec entities represent informations necessary for
+ * describing the state of callouts currently running on the CPU and the ones
+ * necessary for migrating callouts to the new callout cpu. In particular,
+ * the first entry of the array cc_exec_entity holds informations for callout
+ * running in SWI thread context, while the second one holds informations
+ * for callout running directly from hardware interrupt context.
* The cached informations are very important for deferring migration when
* the migrating callout is already running.
*/
-struct cc_mig_ent {
+struct cc_exec {
+ struct callout *cc_next;
+ struct callout *cc_curr;
#ifdef SMP
- void (*ce_migration_func)(void *);
- void *ce_migration_arg;
- int ce_migration_cpu;
- int ce_migration_ticks;
+ void (*ce_migration_func)(void *);
+ void *ce_migration_arg;
+ int ce_migration_cpu;
+ sbintime_t ce_migration_time;
#endif
+ boolean_t cc_cancel;
+ boolean_t cc_waiting;
};
/*
* There is one struct callout_cpu per cpu, holding all relevant
* state for the callout processing thread on the individual CPU.
- * In particular:
- * cc_ticks is incremented once per tick in callout_cpu().
- * It tracks the global 'ticks' but in a way that the individual
- * threads should not worry about races in the order in which
- * hardclock() and hardclock_cpu() run on the various CPUs.
- * cc_softclock is advanced in callout_cpu() to point to the
- * first entry in cc_callwheel that may need handling. In turn,
- * a softclock() is scheduled so it can serve the various entries i
- * such that cc_softclock <= i <= cc_ticks .
- * XXX maybe cc_softclock and cc_ticks should be volatile ?
- *
- * cc_ticks is also used in callout_reset_cpu() to determine
- * when the callout should be served.
*/
struct callout_cpu {
struct mtx_padalign cc_lock;
- struct cc_mig_ent cc_migrating_entity;
+ struct cc_exec cc_exec_entity[2];
struct callout *cc_callout;
- struct callout_tailq *cc_callwheel;
- struct callout_list cc_callfree;
- struct callout *cc_next;
- struct callout *cc_curr;
+ struct callout_list *cc_callwheel;
+ struct callout_tailq cc_expireq;
+ struct callout_slist cc_callfree;
+ sbintime_t cc_firstevent;
+ sbintime_t cc_lastscan;
void *cc_cookie;
- int cc_ticks;
- int cc_softticks;
- int cc_cancel;
- int cc_waiting;
- int cc_firsttick;
+ u_int cc_bucket;
};
+#define cc_exec_curr cc_exec_entity[0].cc_curr
+#define cc_exec_next cc_exec_entity[0].cc_next
+#define cc_exec_cancel cc_exec_entity[0].cc_cancel
+#define cc_exec_waiting cc_exec_entity[0].cc_waiting
+#define cc_exec_curr_dir cc_exec_entity[1].cc_curr
+#define cc_exec_next_dir cc_exec_entity[1].cc_next
+#define cc_exec_cancel_dir cc_exec_entity[1].cc_cancel
+#define cc_exec_waiting_dir cc_exec_entity[1].cc_waiting
+
#ifdef SMP
-#define cc_migration_func cc_migrating_entity.ce_migration_func
-#define cc_migration_arg cc_migrating_entity.ce_migration_arg
-#define cc_migration_cpu cc_migrating_entity.ce_migration_cpu
-#define cc_migration_ticks cc_migrating_entity.ce_migration_ticks
+#define cc_migration_func cc_exec_entity[0].ce_migration_func
+#define cc_migration_arg cc_exec_entity[0].ce_migration_arg
+#define cc_migration_cpu cc_exec_entity[0].ce_migration_cpu
+#define cc_migration_time cc_exec_entity[0].ce_migration_time
+#define cc_migration_func_dir cc_exec_entity[1].ce_migration_func
+#define cc_migration_arg_dir cc_exec_entity[1].ce_migration_arg
+#define cc_migration_cpu_dir cc_exec_entity[1].ce_migration_cpu
+#define cc_migration_time_dir cc_exec_entity[1].ce_migration_time
struct callout_cpu cc_cpu[MAXCPU];
#define CPUBLOCK MAXCPU
@@ -153,39 +180,48 @@ struct callout_cpu cc_cpu;
#define CC_LOCK_ASSERT(cc) mtx_assert(&(cc)->cc_lock, MA_OWNED)
static int timeout_cpu;
-void (*callout_new_inserted)(int cpu, int ticks) = NULL;
+
+static void softclock_call_cc(struct callout *c, struct callout_cpu *cc,
+#ifdef CALLOUT_PROFILING
+ int *mpcalls, int *lockcalls, int *gcalls,
+#endif
+ int direct);
static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures");
/**
* Locked by cc_lock:
- * cc_curr - If a callout is in progress, it is curr_callout.
- * If curr_callout is non-NULL, threads waiting in
+ * cc_curr - If a callout is in progress, it is cc_curr.
+ * If cc_curr is non-NULL, threads waiting in
* callout_drain() will be woken up as soon as the
* relevant callout completes.
- * cc_cancel - Changing to 1 with both callout_lock and c_lock held
+ * cc_cancel - Changing to 1 with both callout_lock and cc_lock held
* guarantees that the current callout will not run.
* The softclock() function sets this to 0 before it
* drops callout_lock to acquire c_lock, and it calls
* the handler only if curr_cancelled is still 0 after
- * c_lock is successfully acquired.
+ * cc_lock is successfully acquired.
* cc_waiting - If a thread is waiting in callout_drain(), then
* callout_wait is nonzero. Set only when
- * curr_callout is non-NULL.
+ * cc_curr is non-NULL.
*/
/*
- * Resets the migration entity tied to a specific callout cpu.
+ * Resets the execution entity tied to a specific callout cpu.
*/
static void
-cc_cme_cleanup(struct callout_cpu *cc)
+cc_cce_cleanup(struct callout_cpu *cc, int direct)
{
+ cc->cc_exec_entity[direct].cc_curr = NULL;
+ cc->cc_exec_entity[direct].cc_next = NULL;
+ cc->cc_exec_entity[direct].cc_cancel = FALSE;
+ cc->cc_exec_entity[direct].cc_waiting = FALSE;
#ifdef SMP
- cc->cc_migration_cpu = CPUBLOCK;
- cc->cc_migration_ticks = 0;
- cc->cc_migration_func = NULL;
- cc->cc_migration_arg = NULL;
+ cc->cc_exec_entity[direct].ce_migration_cpu = CPUBLOCK;
+ cc->cc_exec_entity[direct].ce_migration_time = 0;
+ cc->cc_exec_entity[direct].ce_migration_func = NULL;
+ cc->cc_exec_entity[direct].ce_migration_arg = NULL;
#endif
}
@@ -193,11 +229,11 @@ cc_cme_cleanup(struct callout_cpu *cc)
* Checks if migration is requested by a specific callout cpu.
*/
static int
-cc_cme_migrating(struct callout_cpu *cc)
+cc_cce_migrating(struct callout_cpu *cc, int direct)
{
#ifdef SMP
- return (cc->cc_migration_cpu != CPUBLOCK);
+ return (cc->cc_exec_entity[direct].ce_migration_cpu != CPUBLOCK);
#else
return (0);
#endif
@@ -225,7 +261,7 @@ kern_timeout_callwheel_alloc(caddr_t v)
cc->cc_callout = (struct callout *)v;
v = (caddr_t)(cc->cc_callout + ncallout);
- cc->cc_callwheel = (struct callout_tailq *)v;
+ cc->cc_callwheel = (struct callout_list *)v;
v = (caddr_t)(cc->cc_callwheel + callwheelsize);
return(v);
}
@@ -238,10 +274,12 @@ callout_cpu_init(struct callout_cpu *cc)
mtx_init(&cc->cc_lock, "callout", NULL, MTX_SPIN | MTX_RECURSE);
SLIST_INIT(&cc->cc_callfree);
- for (i = 0; i < callwheelsize; i++) {
- TAILQ_INIT(&cc->cc_callwheel[i]);
- }
- cc_cme_cleanup(cc);
+ for (i = 0; i < callwheelsize; i++)
+ LIST_INIT(&cc->cc_callwheel[i]);
+ TAILQ_INIT(&cc->cc_expireq);
+ cc->cc_firstevent = INT64_MAX;
+ for (i = 0; i < 2; i++)
+ cc_cce_cleanup(cc, i);
if (cc->cc_callout == NULL)
return;
for (i = 0; i < ncallout; i++) {
@@ -320,7 +358,7 @@ start_softclock(void *dummy)
panic("died while creating standard software ithreads");
cc->cc_callout = NULL; /* Only cpu0 handles timeout(). */
cc->cc_callwheel = malloc(
- sizeof(struct callout_tailq) * callwheelsize, M_CALLOUT,
+ sizeof(struct callout_list) * callwheelsize, M_CALLOUT,
M_WAITOK);
callout_cpu_init(cc);
}
@@ -329,64 +367,148 @@ start_softclock(void *dummy)
SYSINIT(start_softclock, SI_SUB_SOFTINTR, SI_ORDER_FIRST, start_softclock, NULL);
+#define CC_HASH_SHIFT 8
+
+static inline u_int
+callout_hash(sbintime_t sbt)
+{
+
+ return (sbt >> (32 - CC_HASH_SHIFT));
+}
+
+static inline u_int
+callout_get_bucket(sbintime_t sbt)
+{
+
+ return (callout_hash(sbt) & callwheelmask);
+}
+
void
-callout_tick(void)
+callout_process(sbintime_t now)
{
+ struct callout *tmp, *tmpn;
struct callout_cpu *cc;
- int need_softclock;
- int bucket;
+ struct callout_list *sc;
+ sbintime_t first, last, max, tmp_max;
+ uint32_t lookahead;
+ u_int firstb, lastb, nowb;
+#ifdef CALLOUT_PROFILING
+ int depth_dir = 0, mpcalls_dir = 0, lockcalls_dir = 0;
+#endif
- /*
- * Process callouts at a very low cpu priority, so we don't keep the
- * relatively high clock interrupt priority any longer than necessary.
- */
- need_softclock = 0;
cc = CC_SELF();
mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET);
- cc->cc_firsttick = cc->cc_ticks = ticks;
- for (; (cc->cc_softticks - cc->cc_ticks) <= 0; cc->cc_softticks++) {
- bucket = cc->cc_softticks & callwheelmask;
- if (!TAILQ_EMPTY(&cc->cc_callwheel[bucket])) {
- need_softclock = 1;
- break;
- }
+
+ /* Compute the buckets of the last scan and present times. */
+ firstb = callout_hash(cc->cc_lastscan);
+ cc->cc_lastscan = now;
+ nowb = callout_hash(now);
+
+ /* Compute the last bucket and minimum time of the bucket after it. */
+ if (nowb == firstb)
+ lookahead = (SBT_1S / 16);
+ else if (nowb - firstb == 1)
+ lookahead = (SBT_1S / 8);
+ else
+ lookahead = (SBT_1S / 2);
+ first = last = now;
+ first += (lookahead / 2);
+ last += lookahead;
+ last &= (0xffffffffffffffffLLU << (32 - CC_HASH_SHIFT));
+ lastb = callout_hash(last) - 1;
+ max = last;
+
+ /*
+ * Check if we wrapped around the entire wheel from the last scan.
+ * In case, we need to scan entirely the wheel for pending callouts.
+ */
+ if (lastb - firstb >= callwheelsize) {
+ lastb = firstb + callwheelsize - 1;
+ if (nowb - firstb >= callwheelsize)
+ nowb = lastb;
}
+
+ /* Iterate callwheel from firstb to nowb and then up to lastb. */
+ do {
+ sc = &cc->cc_callwheel[firstb & callwheelmask];
+ tmp = LIST_FIRST(sc);
+ while (tmp != NULL) {
+ /* Run the callout if present time within allowed. */
+ if (tmp->c_time <= now) {
+ /*
+ * Consumer told us the callout may be run
+ * directly from hardware interrupt context.
+ */
+ if (tmp->c_flags & CALLOUT_DIRECT) {
+#ifdef CALLOUT_PROFILING
+ ++depth_dir;
+#endif
+ cc->cc_exec_next_dir =
+ LIST_NEXT(tmp, c_links.le);
+ cc->cc_bucket = firstb & callwheelmask;
+ LIST_REMOVE(tmp, c_links.le);
+ softclock_call_cc(tmp, cc,
+#ifdef CALLOUT_PROFILING
+ &mpcalls_dir, &lockcalls_dir, NULL,
+#endif
+ 1);
+ tmp = cc->cc_exec_next_dir;
+ } else {
+ tmpn = LIST_NEXT(tmp, c_links.le);
+ LIST_REMOVE(tmp, c_links.le);
+ TAILQ_INSERT_TAIL(&cc->cc_expireq,
+ tmp, c_links.tqe);
+ tmp->c_flags |= CALLOUT_PROCESSED;
+ tmp = tmpn;
+ }
+ continue;
+ }
+ /* Skip events from distant future. */
+ if (tmp->c_time >= max)
+ goto next;
+ /*
+ * Event minimal time is bigger than present maximal
+ * time, so it cannot be aggregated.
+ */
+ if (tmp->c_time > last) {
+ lastb = nowb;
+ goto next;
+ }
+ /* Update first and last time, respecting this event. */
+ if (tmp->c_time < first)
+ first = tmp->c_time;
+ tmp_max = tmp->c_time + tmp->c_precision;
+ if (tmp_max < last)
+ last = tmp_max;
+next:
+ tmp = LIST_NEXT(tmp, c_links.le);
+ }
+ /* Proceed with the next bucket. */
+ firstb++;
+ /*
+ * Stop if we looked after present time and found
+ * some event we can't execute at now.
+ * Stop if we looked far enough into the future.
+ */
+ } while (((int)(firstb - lastb)) <= 0);
+ cc->cc_firstevent = last;
+#ifndef NO_EVENTTIMERS
+ cpu_new_callout(curcpu, last, first);
+#endif
+#ifdef CALLOUT_PROFILING
+ avg_depth_dir += (depth_dir * 1000 - avg_depth_dir) >> 8;
+ avg_mpcalls_dir += (mpcalls_dir * 1000 - avg_mpcalls_dir) >> 8;
+ avg_lockcalls_dir += (lockcalls_dir * 1000 - avg_lockcalls_dir) >> 8;
+#endif
mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET);
/*
* swi_sched acquires the thread lock, so we don't want to call it
* with cc_lock held; incorrect locking order.
*/
- if (need_softclock)
+ if (!TAILQ_EMPTY(&cc->cc_expireq))
swi_sched(cc->cc_cookie, 0);
}
-int
-callout_tickstofirst(int limit)
-{
- struct callout_cpu *cc;
- struct callout *c;
- struct callout_tailq *sc;
- int curticks;
- int skip = 1;
-
- cc = CC_SELF();
- mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET);
- curticks = cc->cc_ticks;
- while( skip < ncallout && skip < limit ) {
- sc = &cc->cc_callwheel[ (curticks+skip) & callwheelmask ];
- /* search scanning ticks */
- TAILQ_FOREACH( c, sc, c_links.tqe ){
- if (c->c_time - curticks <= ncallout)
- goto out;
- }
- skip++;
- }
-out:
- cc->cc_firsttick = curticks + skip;
- mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET);
- return (skip);
-}
-
static struct callout_cpu *
callout_lock(struct callout *c)
{
@@ -412,26 +534,41 @@ callout_lock(struct callout *c)
}
static void
-callout_cc_add(struct callout *c, struct callout_cpu *cc, int to_ticks,
- void (*func)(void *), void *arg, int cpu)
+callout_cc_add(struct callout *c, struct callout_cpu *cc,
+ sbintime_t sbt, sbintime_t precision, void (*func)(void *),
+ void *arg, int cpu, int flags)
{
+ int bucket;
CC_LOCK_ASSERT(cc);
-
- if (to_ticks <= 0)
- to_ticks = 1;
+ if (sbt < cc->cc_lastscan)
+ sbt = cc->cc_lastscan;
c->c_arg = arg;
c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING);
+ if (flags & C_DIRECT_EXEC)
+ c->c_flags |= CALLOUT_DIRECT;
+ c->c_flags &= ~CALLOUT_PROCESSED;
c->c_func = func;
- c->c_time = ticks + to_ticks;
- TAILQ_INSERT_TAIL(&cc->cc_callwheel[c->c_time & callwheelmask],
- c, c_links.tqe);
- if ((c->c_time - cc->cc_firsttick) < 0 &&
- callout_new_inserted != NULL) {
- cc->cc_firsttick = c->c_time;
- (*callout_new_inserted)(cpu,
- to_ticks + (ticks - cc->cc_ticks));
+ c->c_time = sbt;
+ c->c_precision = precision;
+ bucket = callout_get_bucket(c->c_time);
+ CTR3(KTR_CALLOUT, "precision set for %p: %d.%08x",
+ c, (int)(c->c_precision >> 32),
+ (u_int)(c->c_precision & 0xffffffff));
+ LIST_INSERT_HEAD(&cc->cc_callwheel[bucket], c, c_links.le);
+ if (cc->cc_bucket == bucket)
+ cc->cc_exec_next_dir = c;
+#ifndef NO_EVENTTIMERS
+ /*
+ * Inform the eventtimers(4) subsystem there's a new callout
+ * that has been inserted, but only if really required.
+ */
+ sbt = c->c_time + c->c_precision;
+ if (sbt < cc->cc_firstevent) {
+ cc->cc_firstevent = sbt;
+ cpu_new_callout(cpu, sbt, c->c_time);
}
+#endif
}
static void
@@ -445,8 +582,11 @@ callout_cc_del(struct callout *c, struct callout_cpu *cc)
}
static void
-softclock_call_cc(struct callout *c, struct callout_cpu *cc, int *mpcalls,
- int *lockcalls, int *gcalls)
+softclock_call_cc(struct callout *c, struct callout_cpu *cc,
+#ifdef CALLOUT_PROFILING
+ int *mpcalls, int *lockcalls, int *gcalls,
+#endif
+ int direct)
{
void (*c_func)(void *);
void *c_arg;
@@ -457,12 +597,13 @@ softclock_call_cc(struct callout *c, struct callout_cpu *cc, int *mpcalls,
struct callout_cpu *new_cc;
void (*new_func)(void *);
void *new_arg;
- int new_cpu, new_ticks;
+ int flags, new_cpu;
+ sbintime_t new_time;
#endif
-#ifdef DIAGNOSTIC
- struct bintime bt1, bt2;
+#if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING)
+ sbintime_t bt1, bt2;
struct timespec ts2;
- static uint64_t maxdt = 36893488147419102LL; /* 2 msec */
+ static sbintime_t maxdt = 2 * SBT_1MS; /* 2 msec */
static timeout_t *lastfunc;
#endif
@@ -479,8 +620,8 @@ softclock_call_cc(struct callout *c, struct callout_cpu *cc, int *mpcalls,
c->c_flags = CALLOUT_LOCAL_ALLOC;
else
c->c_flags &= ~CALLOUT_PENDING;
- cc->cc_curr = c;
- cc->cc_cancel = 0;
+ cc->cc_exec_entity[direct].cc_curr = c;
+ cc->cc_exec_entity[direct].cc_cancel = FALSE;
CC_UNLOCK(cc);
if (c_lock != NULL) {
class->lc_lock(c_lock, sharedlock);
@@ -488,29 +629,34 @@ softclock_call_cc(struct callout *c, struct callout_cpu *cc, int *mpcalls,
* The callout may have been cancelled
* while we switched locks.
*/
- if (cc->cc_cancel) {
+ if (cc->cc_exec_entity[direct].cc_cancel) {
class->lc_unlock(c_lock);
goto skip;
}
/* The callout cannot be stopped now. */
- cc->cc_cancel = 1;
-
+ cc->cc_exec_entity[direct].cc_cancel = TRUE;
if (c_lock == &Giant.lock_object) {
+#ifdef CALLOUT_PROFILING
(*gcalls)++;
- CTR3(KTR_CALLOUT, "callout %p func %p arg %p",
+#endif
+ CTR3(KTR_CALLOUT, "callout giant %p func %p arg %p",
c, c_func, c_arg);
} else {
+#ifdef CALLOUT_PROFILING
(*lockcalls)++;
+#endif
CTR3(KTR_CALLOUT, "callout lock %p func %p arg %p",
c, c_func, c_arg);
}
} else {
+#ifdef CALLOUT_PROFILING
(*mpcalls)++;
- CTR3(KTR_CALLOUT, "callout mpsafe %p func %p arg %p",
+#endif
+ CTR3(KTR_CALLOUT, "callout %p func %p arg %p",
c, c_func, c_arg);
}
#ifdef DIAGNOSTIC
- binuptime(&bt1);
+ sbt1 = sbinuptime();
#endif
THREAD_NO_SLEEPING();
SDT_PROBE(callout_execute, kernel, , callout_start, c, 0, 0, 0, 0);
@@ -518,16 +664,16 @@ softclock_call_cc(struct callout *c, struct callout_cpu *cc, int *mpcalls,
SDT_PROBE(callout_execute, kernel, , callout_end, c, 0, 0, 0, 0);
THREAD_SLEEPING_OK();
#ifdef DIAGNOSTIC
- binuptime(&bt2);
- bintime_sub(&bt2, &bt1);
- if (bt2.frac > maxdt) {
- if (lastfunc != c_func || bt2.frac > maxdt * 2) {
- bintime2timespec(&bt2, &ts2);
+ bt2 = sbinuptime();
+ bt2 -= bt1;
+ if (bt2 > maxdt) {
+ if (lastfunc != c_func || bt2 > maxdt * 2) {
+ ts2 = sbttots(bt2);
printf(
"Expensive timeout(9) function: %p(%p) %jd.%09ld s\n",
c_func, c_arg, (intmax_t)ts2.tv_sec, ts2.tv_nsec);
}
- maxdt = bt2.frac;
+ maxdt = bt2;
lastfunc = c_func;
}
#endif
@@ -536,17 +682,17 @@ softclock_call_cc(struct callout *c, struct callout_cpu *cc, int *mpcalls,
class->lc_unlock(c_lock);
skip:
CC_LOCK(cc);
- KASSERT(cc->cc_curr == c, ("mishandled cc_curr"));
- cc->cc_curr = NULL;
- if (cc->cc_waiting) {
+ KASSERT(cc->cc_exec_entity[direct].cc_curr == c, ("mishandled cc_curr"));
+ cc->cc_exec_entity[direct].cc_curr = NULL;
+ if (cc->cc_exec_entity[direct].cc_waiting) {
/*
* There is someone waiting for the
* callout to complete.
* If the callout was scheduled for
* migration just cancel it.
*/
- if (cc_cme_migrating(cc)) {
- cc_cme_cleanup(cc);
+ if (cc_cce_migrating(cc, direct)) {
+ cc_cce_cleanup(cc, direct);
/*
* It should be assert here that the callout is not
@@ -554,11 +700,11 @@ skip:
*/
c->c_flags &= ~CALLOUT_DFRMIGRATION;
}
- cc->cc_waiting = 0;
+ cc->cc_exec_entity[direct].cc_waiting = FALSE;
CC_UNLOCK(cc);
- wakeup(&cc->cc_waiting);
+ wakeup(&cc->cc_exec_entity[direct].cc_waiting);
CC_LOCK(cc);
- } else if (cc_cme_migrating(cc)) {
+ } else if (cc_cce_migrating(cc, direct)) {
KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0,
("Migrating legacy callout %p", c));
#ifdef SMP
@@ -566,11 +712,11 @@ skip:
* If the callout was scheduled for
* migration just perform it now.
*/
- new_cpu = cc->cc_migration_cpu;
- new_ticks = cc->cc_migration_ticks;
- new_func = cc->cc_migration_func;
- new_arg = cc->cc_migration_arg;
- cc_cme_cleanup(cc);
+ new_cpu = cc->cc_exec_entity[direct].ce_migration_cpu;
+ new_time = cc->cc_exec_entity[direct].ce_migration_time;
+ new_func = cc->cc_exec_entity[direct].ce_migration_func;
+ new_arg = cc->cc_exec_entity[direct].ce_migration_arg;
+ cc_cce_cleanup(cc, direct);
/*
* It should be assert here that the callout is not destroyed
@@ -588,8 +734,9 @@ skip:
c->c_flags &= ~CALLOUT_DFRMIGRATION;
new_cc = callout_cpu_switch(c, cc, new_cpu);
- callout_cc_add(c, new_cc, new_ticks, new_func, new_arg,
- new_cpu);
+ flags = (direct) ? C_DIRECT_EXEC : 0;
+ callout_cc_add(c, new_cc, new_time, c->c_precision, new_func,
+ new_arg, new_cpu, flags);
CC_UNLOCK(new_cc);
CC_LOCK(cc);
#else
@@ -632,63 +779,29 @@ softclock(void *arg)
{
struct callout_cpu *cc;
struct callout *c;
- struct callout_tailq *bucket;
- int curticks;
- int steps; /* #steps since we last allowed interrupts */
- int depth;
- int mpcalls;
- int lockcalls;
- int gcalls;
-
-#ifndef MAX_SOFTCLOCK_STEPS
-#define MAX_SOFTCLOCK_STEPS 100 /* Maximum allowed value of steps. */
-#endif /* MAX_SOFTCLOCK_STEPS */
-
- mpcalls = 0;
- lockcalls = 0;
- gcalls = 0;
- depth = 0;
- steps = 0;
+#ifdef CALLOUT_PROFILING
+ int depth = 0, gcalls = 0, lockcalls = 0, mpcalls = 0;
+#endif
+
cc = (struct callout_cpu *)arg;
CC_LOCK(cc);
- while (cc->cc_softticks - 1 != cc->cc_ticks) {
- /*
- * cc_softticks may be modified by hard clock, so cache
- * it while we work on a given bucket.
- */
- curticks = cc->cc_softticks;
- cc->cc_softticks++;
- bucket = &cc->cc_callwheel[curticks & callwheelmask];
- c = TAILQ_FIRST(bucket);
- while (c != NULL) {
- depth++;
- if (c->c_time != curticks) {
- c = TAILQ_NEXT(c, c_links.tqe);
- ++steps;
- if (steps >= MAX_SOFTCLOCK_STEPS) {
- cc->cc_next = c;
- /* Give interrupts a chance. */
- CC_UNLOCK(cc);
- ; /* nothing */
- CC_LOCK(cc);
- c = cc->cc_next;
- steps = 0;
- }
- } else {
- cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
- TAILQ_REMOVE(bucket, c, c_links.tqe);
- softclock_call_cc(c, cc, &mpcalls,
- &lockcalls, &gcalls);
- steps = 0;
- c = cc->cc_next;
- }
- }
+ while ((c = TAILQ_FIRST(&cc->cc_expireq)) != NULL) {
+ TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
+ softclock_call_cc(c, cc,
+#ifdef CALLOUT_PROFILING
+ &mpcalls, &lockcalls, &gcalls,
+#endif
+ 0);
+#ifdef CALLOUT_PROFILING
+ ++depth;
+#endif
}
+#ifdef CALLOUT_PROFILING
avg_depth += (depth * 1000 - avg_depth) >> 8;
avg_mpcalls += (mpcalls * 1000 - avg_mpcalls) >> 8;
avg_lockcalls += (lockcalls * 1000 - avg_lockcalls) >> 8;
avg_gcalls += (gcalls * 1000 - avg_gcalls) >> 8;
- cc->cc_next = NULL;
+#endif
CC_UNLOCK(cc);
}
@@ -778,28 +891,71 @@ callout_handle_init(struct callout_handle *handle)
* callout_deactivate() - marks the callout as having been serviced
*/
int
-callout_reset_on(struct callout *c, int to_ticks, void (*ftn)(void *),
- void *arg, int cpu)
+callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t precision,
+ void (*ftn)(void *), void *arg, int cpu, int flags)
{
+ sbintime_t to_sbt, pr;
struct callout_cpu *cc;
- int cancelled = 0;
+ int cancelled, direct;
+ cancelled = 0;
+ if (flags & C_ABSOLUTE) {
+ to_sbt = sbt;
+ } else {
+ if ((flags & C_HARDCLOCK) && (sbt < tick_sbt))
+ sbt = tick_sbt;
+ if ((flags & C_HARDCLOCK) ||
+#ifdef NO_EVENTTIMERS
+ sbt >= sbt_timethreshold) {
+ to_sbt = getsbinuptime();
+
+ /* Add safety belt for the case of hz > 1000. */
+ to_sbt += tc_tick_sbt - tick_sbt;
+#else
+ sbt >= sbt_tickthreshold) {
+ /*
+ * Obtain the time of the last hardclock() call on
+ * this CPU directly from the kern_clocksource.c.
+ * This value is per-CPU, but it is equal for all
+ * active ones.
+ */
+#ifdef __LP64__
+ to_sbt = DPCPU_GET(hardclocktime);
+#else
+ spinlock_enter();
+ to_sbt = DPCPU_GET(hardclocktime);
+ spinlock_exit();
+#endif
+#endif
+ if ((flags & C_HARDCLOCK) == 0)
+ to_sbt += tick_sbt;
+ } else
+ to_sbt = sbinuptime();
+ to_sbt += sbt;
+ pr = ((C_PRELGET(flags) < 0) ? sbt >> tc_precexp :
+ sbt >> C_PRELGET(flags));
+ if (pr > precision)
+ precision = pr;
+ }
/*
* Don't allow migration of pre-allocated callouts lest they
* become unbalanced.
*/
if (c->c_flags & CALLOUT_LOCAL_ALLOC)
cpu = c->c_cpu;
+ direct = (c->c_flags & CALLOUT_DIRECT) != 0;
+ KASSERT(!direct || c->c_lock == NULL,
+ ("%s: direct callout %p has lock", __func__, c));
cc = callout_lock(c);
- if (cc->cc_curr == c) {
+ if (cc->cc_exec_entity[direct].cc_curr == c) {
/*
* We're being asked to reschedule a callout which is
* currently in progress. If there is a lock then we
* can cancel the callout if it has not really started.
*/
- if (c->c_lock != NULL && !cc->cc_cancel)
- cancelled = cc->cc_cancel = 1;
- if (cc->cc_waiting) {
+ if (c->c_lock != NULL && !cc->cc_exec_entity[direct].cc_cancel)
+ cancelled = cc->cc_exec_entity[direct].cc_cancel = TRUE;
+ if (cc->cc_exec_entity[direct].cc_waiting) {
/*
* Someone has called callout_drain to kill this
* callout. Don't reschedule.
@@ -812,12 +968,12 @@ callout_reset_on(struct callout *c, int to_ticks, void (*ftn)(void *),
}
}
if (c->c_flags & CALLOUT_PENDING) {
- if (cc->cc_next == c) {
- cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
- }
- TAILQ_REMOVE(&cc->cc_callwheel[c->c_time & callwheelmask], c,
- c_links.tqe);
-
+ if ((c->c_flags & CALLOUT_PROCESSED) == 0) {
+ if (cc->cc_exec_next_dir == c)
+ cc->cc_exec_next_dir = LIST_NEXT(c, c_links.le);
+ LIST_REMOVE(c, c_links.le);
+ } else
+ TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
cancelled = 1;
c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING);
}
@@ -829,15 +985,17 @@ callout_reset_on(struct callout *c, int to_ticks, void (*ftn)(void *),
* to a more appropriate moment.
*/
if (c->c_cpu != cpu) {
- if (cc->cc_curr == c) {
- cc->cc_migration_cpu = cpu;
- cc->cc_migration_ticks = to_ticks;
- cc->cc_migration_func = ftn;
- cc->cc_migration_arg = arg;
+ if (cc->cc_exec_entity[direct].cc_curr == c) {
+ cc->cc_exec_entity[direct].ce_migration_cpu = cpu;
+ cc->cc_exec_entity[direct].ce_migration_time
+ = to_sbt;
+ cc->cc_exec_entity[direct].ce_migration_func = ftn;
+ cc->cc_exec_entity[direct].ce_migration_arg = arg;
c->c_flags |= CALLOUT_DFRMIGRATION;
- CTR5(KTR_CALLOUT,
- "migration of %p func %p arg %p in %d to %u deferred",
- c, c->c_func, c->c_arg, to_ticks, cpu);
+ CTR6(KTR_CALLOUT,
+ "migration of %p func %p arg %p in %d.%08x to %u deferred",
+ c, c->c_func, c->c_arg, (int)(to_sbt >> 32),
+ (u_int)(to_sbt & 0xffffffff), cpu);
CC_UNLOCK(cc);
return (cancelled);
}
@@ -845,9 +1003,10 @@ callout_reset_on(struct callout *c, int to_ticks, void (*ftn)(void *),
}
#endif
- callout_cc_add(c, cc, to_ticks, ftn, arg, cpu);
- CTR5(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d",
- cancelled ? "re" : "", c, c->c_func, c->c_arg, to_ticks);
+ callout_cc_add(c, cc, to_sbt, precision, ftn, arg, cpu, flags);
+ CTR6(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d.%08x",
+ cancelled ? "re" : "", c, c->c_func, c->c_arg, (int)(to_sbt >> 32),
+ (u_int)(to_sbt & 0xffffffff));
CC_UNLOCK(cc);
return (cancelled);
@@ -875,7 +1034,7 @@ _callout_stop_safe(c, safe)
{
struct callout_cpu *cc, *old_cc;
struct lock_class *class;
- int use_lock, sq_locked;
+ int direct, sq_locked, use_lock;
/*
* Some old subsystems don't hold Giant while running a callout_stop(),
@@ -891,7 +1050,7 @@ _callout_stop_safe(c, safe)
}
} else
use_lock = 0;
-
+ direct = (c->c_flags & CALLOUT_DIRECT) != 0;
sq_locked = 0;
old_cc = NULL;
again:
@@ -905,7 +1064,7 @@ again:
if (sq_locked != 0 && cc != old_cc) {
#ifdef SMP
CC_UNLOCK(cc);
- sleepq_release(&old_cc->cc_waiting);
+ sleepq_release(&old_cc->cc_exec_entity[direct].cc_waiting);
sq_locked = 0;
old_cc = NULL;
goto again;
@@ -926,12 +1085,13 @@ again:
* If it wasn't on the queue and it isn't the current
* callout, then we can't stop it, so just bail.
*/
- if (cc->cc_curr != c) {
+ if (cc->cc_exec_entity[direct].cc_curr != c) {
CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p",
c, c->c_func, c->c_arg);
CC_UNLOCK(cc);
if (sq_locked)
- sleepq_release(&cc->cc_waiting);
+ sleepq_release(
+ &cc->cc_exec_entity[direct].cc_waiting);
return (0);
}
@@ -942,8 +1102,7 @@ again:
* just wait for the current invocation to
* finish.
*/
- while (cc->cc_curr == c) {
-
+ while (cc->cc_exec_entity[direct].cc_curr == c) {
/*
* Use direct calls to sleepqueue interface
* instead of cv/msleep in order to avoid
@@ -963,7 +1122,8 @@ again:
*/
if (!sq_locked) {
CC_UNLOCK(cc);
- sleepq_lock(&cc->cc_waiting);
+ sleepq_lock(
+ &cc->cc_exec_entity[direct].cc_waiting);
sq_locked = 1;
old_cc = cc;
goto again;
@@ -975,13 +1135,16 @@ again:
* will be packed up, just let softclock()
* take care of it.
*/
- cc->cc_waiting = 1;
+ cc->cc_exec_entity[direct].cc_waiting = TRUE;
DROP_GIANT();
CC_UNLOCK(cc);
- sleepq_add(&cc->cc_waiting,
+ sleepq_add(
+ &cc->cc_exec_entity[direct].cc_waiting,
&cc->cc_lock.lock_object, "codrain",
SLEEPQ_SLEEP, 0);
- sleepq_wait(&cc->cc_waiting, 0);
+ sleepq_wait(
+ &cc->cc_exec_entity[direct].cc_waiting,
+ 0);
sq_locked = 0;
old_cc = NULL;
@@ -989,7 +1152,8 @@ again:
PICKUP_GIANT();
CC_LOCK(cc);
}
- } else if (use_lock && !cc->cc_cancel) {
+ } else if (use_lock &&
+ !cc->cc_exec_entity[direct].cc_cancel) {
/*
* The current callout is waiting for its
* lock which we hold. Cancel the callout
@@ -997,10 +1161,10 @@ again:
* lock, the callout will be skipped in
* softclock().
*/
- cc->cc_cancel = 1;
+ cc->cc_exec_entity[direct].cc_cancel = TRUE;
CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
c, c->c_func, c->c_arg);
- KASSERT(!cc_cme_migrating(cc),
+ KASSERT(!cc_cce_migrating(cc, direct),
("callout wrongly scheduled for migration"));
CC_UNLOCK(cc);
KASSERT(!sq_locked, ("sleepqueue chain locked"));
@@ -1019,16 +1183,18 @@ again:
return (0);
}
if (sq_locked)
- sleepq_release(&cc->cc_waiting);
+ sleepq_release(&cc->cc_exec_entity[direct].cc_waiting);
c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING);
CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p",
c, c->c_func, c->c_arg);
- if (cc->cc_next == c)
- cc->cc_next = TAILQ_NEXT(c, c_links.tqe);
- TAILQ_REMOVE(&cc->cc_callwheel[c->c_time & callwheelmask], c,
- c_links.tqe);
+ if ((c->c_flags & CALLOUT_PROCESSED) == 0) {
+ if (cc->cc_exec_next_dir == c)
+ cc->cc_exec_next_dir = LIST_NEXT(c, c_links.le);
+ LIST_REMOVE(c, c_links.le);
+ } else
+ TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe);
callout_cc_del(c, cc);
CC_UNLOCK(cc);
@@ -1135,3 +1301,121 @@ adjust_timeout_calltodo(time_change)
return;
}
#endif /* APM_FIXUP_CALLTODO */
+
+static int
+flssbt(sbintime_t sbt)
+{
+
+ sbt += (uint64_t)sbt >> 1;
+ if (sizeof(long) >= sizeof(sbintime_t))
+ return (flsl(sbt));
+ if (sbt >= SBT_1S)
+ return (flsl(((uint64_t)sbt) >> 32) + 32);
+ return (flsl(sbt));
+}
+
+/*
+ * Dump immediate statistic snapshot of the scheduled callouts.
+ */
+static int
+sysctl_kern_callout_stat(SYSCTL_HANDLER_ARGS)
+{
+ struct callout *tmp;
+ struct callout_cpu *cc;
+ struct callout_list *sc;
+ sbintime_t maxpr, maxt, medpr, medt, now, spr, st, t;
+ int ct[64], cpr[64], ccpbk[32];
+ int error, val, i, count, tcum, pcum, maxc, c, medc;
+#ifdef SMP
+ int cpu;
+#endif
+
+ val = 0;
+ error = sysctl_handle_int(oidp, &val, 0, req);
+ if (error != 0 || req->newptr == NULL)
+ return (error);
+ count = maxc = 0;
+ st = spr = maxt = maxpr = 0;
+ bzero(ccpbk, sizeof(ccpbk));
+ bzero(ct, sizeof(ct));
+ bzero(cpr, sizeof(cpr));
+ now = sbinuptime();
+#ifdef SMP
+ CPU_FOREACH(cpu) {
+ cc = CC_CPU(cpu);
+#else
+ cc = CC_CPU(timeout_cpu);
+#endif
+ CC_LOCK(cc);
+ for (i = 0; i < callwheelsize; i++) {
+ sc = &cc->cc_callwheel[i];
+ c = 0;
+ LIST_FOREACH(tmp, sc, c_links.le) {
+ c++;
+ t = tmp->c_time - now;
+ if (t < 0)
+ t = 0;
+ st += t / SBT_1US;
+ spr += tmp->c_precision / SBT_1US;
+ if (t > maxt)
+ maxt = t;
+ if (tmp->c_precision > maxpr)
+ maxpr = tmp->c_precision;
+ ct[flssbt(t)]++;
+ cpr[flssbt(tmp->c_precision)]++;
+ }
+ if (c > maxc)
+ maxc = c;
+ ccpbk[fls(c + c / 2)]++;
+ count += c;
+ }
+ CC_UNLOCK(cc);
+#ifdef SMP
+ }
+#endif
+
+ for (i = 0, tcum = 0; i < 64 && tcum < count / 2; i++)
+ tcum += ct[i];
+ medt = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0;
+ for (i = 0, pcum = 0; i < 64 && pcum < count / 2; i++)
+ pcum += cpr[i];
+ medpr = (i >= 2) ? (((sbintime_t)1) << (i - 2)) : 0;
+ for (i = 0, c = 0; i < 32 && c < count / 2; i++)
+ c += ccpbk[i];
+ medc = (i >= 2) ? (1 << (i - 2)) : 0;
+
+ printf("Scheduled callouts statistic snapshot:\n");
+ printf(" Callouts: %6d Buckets: %6d*%-3d Bucket size: 0.%06ds\n",
+ count, callwheelsize, mp_ncpus, 1000000 >> CC_HASH_SHIFT);
+ printf(" C/Bk: med %5d avg %6d.%06jd max %6d\n",
+ medc,
+ count / callwheelsize / mp_ncpus,
+ (uint64_t)count * 1000000 / callwheelsize / mp_ncpus % 1000000,
+ maxc);
+ printf(" Time: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n",
+ medt / SBT_1S, (medt & 0xffffffff) * 1000000 >> 32,
+ (st / count) / 1000000, (st / count) % 1000000,
+ maxt / SBT_1S, (maxt & 0xffffffff) * 1000000 >> 32);
+ printf(" Prec: med %5jd.%06jds avg %6jd.%06jds max %6jd.%06jds\n",
+ medpr / SBT_1S, (medpr & 0xffffffff) * 1000000 >> 32,
+ (spr / count) / 1000000, (spr / count) % 1000000,
+ maxpr / SBT_1S, (maxpr & 0xffffffff) * 1000000 >> 32);
+ printf(" Distribution: \tbuckets\t time\t tcum\t"
+ " prec\t pcum\n");
+ for (i = 0, tcum = pcum = 0; i < 64; i++) {
+ if (ct[i] == 0 && cpr[i] == 0)
+ continue;
+ t = (i != 0) ? (((sbintime_t)1) << (i - 1)) : 0;
+ tcum += ct[i];
+ pcum += cpr[i];
+ printf(" %10jd.%06jds\t 2**%d\t%7d\t%7d\t%7d\t%7d\n",
+ t / SBT_1S, (t & 0xffffffff) * 1000000 >> 32,
+ i - 1 - (32 - CC_HASH_SHIFT),
+ ct[i], tcum, cpr[i], pcum);
+ }
+ return (error);
+}
+SYSCTL_PROC(_kern, OID_AUTO, callout_stat,
+ CTLTYPE_INT | CTLFLAG_RW | CTLFLAG_MPSAFE,
+ 0, 0, sysctl_kern_callout_stat, "I",
+ "Dump immediate statistic snapshot of the scheduled callouts");
diff --git a/sys/kern/subr_param.c b/sys/kern/subr_param.c
index f36c769..0a3580b 100644
--- a/sys/kern/subr_param.c
+++ b/sys/kern/subr_param.c
@@ -81,8 +81,10 @@ __FBSDID("$FreeBSD$");
static int sysctl_kern_vm_guest(SYSCTL_HANDLER_ARGS);
-int hz;
-int tick;
+int hz; /* system clock's frequency */
+int tick; /* usec per tick (1000000 / hz) */
+struct bintime tick_bt; /* bintime per tick (1s / hz) */
+sbintime_t tick_sbt;
int maxusers; /* base tunable */
int maxproc; /* maximum # of processes */
int maxprocperuid; /* max # of procs per user */
@@ -221,6 +223,8 @@ init_param1(void)
if (hz == -1)
hz = vm_guest > VM_GUEST_NO ? HZ_VM : HZ;
tick = 1000000 / hz;
+ tick_sbt = SBT_1S / hz;
+ tick_bt = sbttobt(tick_sbt);
#ifdef VM_SWZONE_SIZE_MAX
maxswzone = VM_SWZONE_SIZE_MAX;
diff --git a/sys/netinet/tcp_timer.c b/sys/netinet/tcp_timer.c
index 48444c1..bde7503 100644
--- a/sys/netinet/tcp_timer.c
+++ b/sys/netinet/tcp_timer.c
@@ -719,20 +719,24 @@ tcp_timer_active(struct tcpcb *tp, int timer_type)
#define ticks_to_msecs(t) (1000*(t) / hz)
void
-tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer, struct xtcp_timer *xtimer)
+tcp_timer_to_xtimer(struct tcpcb *tp, struct tcp_timer *timer,
+ struct xtcp_timer *xtimer)
{
- bzero(xtimer, sizeof(struct xtcp_timer));
+ sbintime_t now;
+
+ bzero(xtimer, sizeof(*xtimer));
if (timer == NULL)
return;
+ now = getsbinuptime();
if (callout_active(&timer->tt_delack))
- xtimer->tt_delack = ticks_to_msecs(timer->tt_delack.c_time - ticks);
+ xtimer->tt_delack = (timer->tt_delack.c_time - now) / SBT_1MS;
if (callout_active(&timer->tt_rexmt))
- xtimer->tt_rexmt = ticks_to_msecs(timer->tt_rexmt.c_time - ticks);
+ xtimer->tt_rexmt = (timer->tt_rexmt.c_time - now) / SBT_1MS;
if (callout_active(&timer->tt_persist))
- xtimer->tt_persist = ticks_to_msecs(timer->tt_persist.c_time - ticks);
+ xtimer->tt_persist = (timer->tt_persist.c_time - now) / SBT_1MS;
if (callout_active(&timer->tt_keep))
- xtimer->tt_keep = ticks_to_msecs(timer->tt_keep.c_time - ticks);
+ xtimer->tt_keep = (timer->tt_keep.c_time - now) / SBT_1MS;
if (callout_active(&timer->tt_2msl))
- xtimer->tt_2msl = ticks_to_msecs(timer->tt_2msl.c_time - ticks);
+ xtimer->tt_2msl = (timer->tt_2msl.c_time - now) / SBT_1MS;
xtimer->t_rcvtime = ticks_to_msecs(ticks - tp->t_rcvtime);
}
diff --git a/sys/sys/_callout.h b/sys/sys/_callout.h
index b8c3ce9..e186aec 100644
--- a/sys/sys/_callout.h
+++ b/sys/sys/_callout.h
@@ -42,15 +42,18 @@
struct lock_object;
-SLIST_HEAD(callout_list, callout);
+LIST_HEAD(callout_list, callout);
+SLIST_HEAD(callout_slist, callout);
TAILQ_HEAD(callout_tailq, callout);
struct callout {
union {
+ LIST_ENTRY(callout) le;
SLIST_ENTRY(callout) sle;
TAILQ_ENTRY(callout) tqe;
} c_links;
- int c_time; /* ticks to the event */
+ sbintime_t c_time; /* ticks to the event */
+ sbintime_t c_precision; /* delta allowed wrt opt */
void *c_arg; /* function argument */
void (*c_func)(void *); /* function to call */
struct lock_object *c_lock; /* lock to handle */
diff --git a/sys/sys/callout.h b/sys/sys/callout.h
index 95b9a32..7a4dec9 100644
--- a/sys/sys/callout.h
+++ b/sys/sys/callout.h
@@ -47,6 +47,16 @@
#define CALLOUT_RETURNUNLOCKED 0x0010 /* handler returns with mtx unlocked */
#define CALLOUT_SHAREDLOCK 0x0020 /* callout lock held in shared mode */
#define CALLOUT_DFRMIGRATION 0x0040 /* callout in deferred migration mode */
+#define CALLOUT_PROCESSED 0x0080 /* callout in wheel or processing list? */
+#define CALLOUT_DIRECT 0x0100 /* allow exec from hw int context */
+
+#define C_DIRECT_EXEC 0x0001 /* direct execution of callout */
+#define C_PRELBITS 7
+#define C_PRELRANGE ((1 << C_PRELBITS) - 1)
+#define C_PREL(x) (((x) + 1) << 1)
+#define C_PRELGET(x) (int)((((x) >> 1) & C_PRELRANGE) - 1)
+#define C_HARDCLOCK 0x0100 /* align to hardclock() calls */
+#define C_ABSOLUTE 0x0200 /* event time is absolute. */
struct callout_handle {
struct callout *callout;
@@ -67,7 +77,15 @@ void _callout_init_lock(struct callout *, struct lock_object *, int);
_callout_init_lock((c), ((rw) != NULL) ? &(rw)->lock_object : \
NULL, (flags))
#define callout_pending(c) ((c)->c_flags & CALLOUT_PENDING)
-int callout_reset_on(struct callout *, int, void (*)(void *), void *, int);
+int callout_reset_sbt_on(struct callout *, sbintime_t, sbintime_t,
+ void (*)(void *), void *, int, int);
+#define callout_reset_sbt(c, sbt, pr, fn, arg, flags) \
+ callout_reset_sbt_on((c), (sbt), (pr), (fn), (arg), (c)->c_cpu, flags)
+#define callout_reset_sbt_curcpu(c, sbt, pr, fn, arg, flags) \
+ callout_reset_sbt_on((c), (sbt), (pr), (fn), (arg), PCPU_GET(cpuid), flags)
+#define callout_reset_on(c, to_ticks, fn, arg, cpu) \
+ callout_reset_sbt_on((c), (tick_sbt * (to_ticks)), 0, (fn), (arg), \
+ (cpu), C_HARDCLOCK)
#define callout_reset(c, on_tick, fn, arg) \
callout_reset_on((c), (on_tick), (fn), (arg), (c)->c_cpu)
#define callout_reset_curcpu(c, on_tick, fn, arg) \
@@ -78,9 +96,7 @@ int callout_schedule_on(struct callout *, int, int);
callout_schedule_on((c), (on_tick), PCPU_GET(cpuid))
#define callout_stop(c) _callout_stop_safe(c, 0)
int _callout_stop_safe(struct callout *, int);
-void callout_tick(void);
-int callout_tickstofirst(int limit);
-extern void (*callout_new_inserted)(int cpu, int ticks);
+void callout_process(sbintime_t now);
#endif
diff --git a/sys/sys/systm.h b/sys/sys/systm.h
index 11a9cf9..85fa1c8 100644
--- a/sys/sys/systm.h
+++ b/sys/sys/systm.h
@@ -269,6 +269,7 @@ void cpu_startprofclock(void);
void cpu_stopprofclock(void);
sbintime_t cpu_idleclock(void);
void cpu_activeclock(void);
+void cpu_new_callout(int cpu, sbintime_t bt, sbintime_t bt_opt);
extern int cpu_can_deep_sleep;
extern int cpu_disable_deep_sleep;
diff --git a/sys/sys/time.h b/sys/sys/time.h
index a735ad8..761a123 100644
--- a/sys/sys/time.h
+++ b/sys/sys/time.h
@@ -102,6 +102,21 @@ bintime_mul(struct bintime *bt, u_int x)
bt->frac = (p2 << 32) | (p1 & 0xffffffffull);
}
+static __inline void
+bintime_shift(struct bintime *bt, int exp)
+{
+
+ if (exp > 0) {
+ bt->sec <<= exp;
+ bt->sec |= bt->frac >> (64 - exp);
+ bt->frac <<= exp;
+ } else if (exp < 0) {
+ bt->frac >>= -exp;
+ bt->frac |= (uint64_t)bt->sec << (64 + exp);
+ bt->sec >>= -exp;
+ }
+}
+
#define bintime_clear(a) ((a)->sec = (a)->frac = 0)
#define bintime_isset(a) ((a)->sec || (a)->frac)
#define bintime_cmp(a, b, cmp) \
@@ -357,6 +372,16 @@ extern volatile time_t time_second;
extern volatile time_t time_uptime;
extern struct bintime boottimebin;
extern struct timeval boottime;
+extern struct bintime tc_tick_bt;
+extern sbintime_t tc_tick_sbt;
+extern struct bintime tick_bt;
+extern sbintime_t tick_sbt;
+extern int tc_precexp;
+extern int tc_timepercentage;
+extern struct bintime bt_timethreshold;
+extern struct bintime bt_tickthreshold;
+extern sbintime_t sbt_timethreshold;
+extern sbintime_t sbt_tickthreshold;
/*
* Functions for looking at our clock: [get]{bin,nano,micro}[up]time()
@@ -421,6 +446,25 @@ int ratecheck(struct timeval *, const struct timeval *);
void timevaladd(struct timeval *t1, const struct timeval *t2);
void timevalsub(struct timeval *t1, const struct timeval *t2);
int tvtohz(struct timeval *tv);
+
+#define TC_DEFAULTPERC 5
+
+#define BT2FREQ(bt) \
+ (((uint64_t)0x8000000000000000 + ((bt)->frac >> 2)) / \
+ ((bt)->frac >> 1))
+
+#define SBT2FREQ(sbt) ((SBT_1S + ((sbt) >> 1)) / (sbt))
+
+#define FREQ2BT(freq, bt) \
+{ \
+ (bt)->sec = 0; \
+ (bt)->frac = ((uint64_t)0x8000000000000000 / (freq)) << 1; \
+}
+
+#define TIMESEL(sbt, sbt2) \
+ (((sbt2) >= sbt_timethreshold) ? \
+ ((*(sbt) = getsbinuptime()), 1) : ((*(sbt) = sbinuptime()), 0))
+
#else /* !_KERNEL */
#include <time.h>
OpenPOWER on IntegriCloud