diff options
author | dim <dim@FreeBSD.org> | 2015-01-25 23:43:12 +0000 |
---|---|---|
committer | dim <dim@FreeBSD.org> | 2015-01-25 23:43:12 +0000 |
commit | 26ab20c8dc70a806abf158a557db896c283ac441 (patch) | |
tree | f358c5333ff6b8e93e1f4fd1f739daeedb61448d /sys/kern/kern_timeout.c | |
parent | 7db7b571b9ca5f0f069fd72715b1e2d05940dc75 (diff) | |
parent | 588a6eb29f37c0081497dc155776215a84099f02 (diff) | |
download | FreeBSD-src-26ab20c8dc70a806abf158a557db896c283ac441.zip FreeBSD-src-26ab20c8dc70a806abf158a557db896c283ac441.tar.gz |
Merge ^/head r277327 through r277718.
Diffstat (limited to 'sys/kern/kern_timeout.c')
-rw-r--r-- | sys/kern/kern_timeout.c | 1053 |
1 files changed, 533 insertions, 520 deletions
diff --git a/sys/kern/kern_timeout.c b/sys/kern/kern_timeout.c index 4336faa..13822fd 100644 --- a/sys/kern/kern_timeout.c +++ b/sys/kern/kern_timeout.c @@ -54,8 +54,6 @@ __FBSDID("$FreeBSD$"); #include <sys/lock.h> #include <sys/malloc.h> #include <sys/mutex.h> -#include <sys/rmlock.h> -#include <sys/rwlock.h> #include <sys/proc.h> #include <sys/sdt.h> #include <sys/sleepqueue.h> @@ -126,216 +124,37 @@ SYSCTL_INT(_kern, OID_AUTO, pin_pcpu_swi, CTLFLAG_RDTUN | CTLFLAG_NOFETCH, &pin_ */ u_int callwheelsize, callwheelmask; -typedef void callout_mutex_op_t(struct lock_object *); -typedef int callout_owned_op_t(struct lock_object *); - -struct callout_mutex_ops { - callout_mutex_op_t *lock; - callout_mutex_op_t *unlock; - callout_owned_op_t *owned; -}; - -enum { - CALLOUT_LC_UNUSED_0, - CALLOUT_LC_UNUSED_1, - CALLOUT_LC_UNUSED_2, - CALLOUT_LC_UNUSED_3, - CALLOUT_LC_SPIN, - CALLOUT_LC_MUTEX, - CALLOUT_LC_RW, - CALLOUT_LC_RM, -}; - -static void -callout_mutex_op_none(struct lock_object *lock) -{ -} - -static int -callout_owned_op_none(struct lock_object *lock) -{ - return (0); -} - -static void -callout_mutex_lock(struct lock_object *lock) -{ - mtx_lock((struct mtx *)lock); -} - -static void -callout_mutex_unlock(struct lock_object *lock) -{ - mtx_unlock((struct mtx *)lock); -} - -static void -callout_mutex_lock_spin(struct lock_object *lock) -{ - mtx_lock_spin((struct mtx *)lock); -} - -static void -callout_mutex_unlock_spin(struct lock_object *lock) -{ - mtx_unlock_spin((struct mtx *)lock); -} - -static int -callout_mutex_owned(struct lock_object *lock) -{ - return (mtx_owned((struct mtx *)lock)); -} - -static void -callout_rm_wlock(struct lock_object *lock) -{ - rm_wlock((struct rmlock *)lock); -} - -static void -callout_rm_wunlock(struct lock_object *lock) -{ - rm_wunlock((struct rmlock *)lock); -} - -static int -callout_rm_owned(struct lock_object *lock) -{ - return (rm_wowned((struct rmlock *)lock)); -} - -static void -callout_rw_wlock(struct lock_object *lock) -{ - rw_wlock((struct rwlock *)lock); -} - -static void -callout_rw_wunlock(struct lock_object *lock) -{ - rw_wunlock((struct rwlock *)lock); -} - -static int -callout_rw_owned(struct lock_object *lock) -{ - return (rw_wowned((struct rwlock *)lock)); -} - -static const struct callout_mutex_ops callout_mutex_ops[8] = { - [CALLOUT_LC_UNUSED_0] = { - .lock = callout_mutex_op_none, - .unlock = callout_mutex_op_none, - .owned = callout_owned_op_none, - }, - [CALLOUT_LC_UNUSED_1] = { - .lock = callout_mutex_op_none, - .unlock = callout_mutex_op_none, - .owned = callout_owned_op_none, - }, - [CALLOUT_LC_UNUSED_2] = { - .lock = callout_mutex_op_none, - .unlock = callout_mutex_op_none, - .owned = callout_owned_op_none, - }, - [CALLOUT_LC_UNUSED_3] = { - .lock = callout_mutex_op_none, - .unlock = callout_mutex_op_none, - .owned = callout_owned_op_none, - }, - [CALLOUT_LC_SPIN] = { - .lock = callout_mutex_lock_spin, - .unlock = callout_mutex_unlock_spin, - .owned = callout_mutex_owned, - }, - [CALLOUT_LC_MUTEX] = { - .lock = callout_mutex_lock, - .unlock = callout_mutex_unlock, - .owned = callout_mutex_owned, - }, - [CALLOUT_LC_RW] = { - .lock = callout_rw_wlock, - .unlock = callout_rw_wunlock, - .owned = callout_rw_owned, - }, - [CALLOUT_LC_RM] = { - .lock = callout_rm_wlock, - .unlock = callout_rm_wunlock, - .owned = callout_rm_owned, - }, -}; - -static void -callout_lock_client(int c_flags, struct lock_object *c_lock) -{ - callout_mutex_ops[CALLOUT_GET_LC(c_flags)].lock(c_lock); -} - -static void -callout_unlock_client(int c_flags, struct lock_object *c_lock) -{ - callout_mutex_ops[CALLOUT_GET_LC(c_flags)].unlock(c_lock); -} - -#ifdef SMP -static int -callout_lock_owned_client(int c_flags, struct lock_object *c_lock) -{ - return (callout_mutex_ops[CALLOUT_GET_LC(c_flags)].owned(c_lock)); -} -#endif - /* - * The callout CPU exec structure represent information necessary for - * describing the state of callouts currently running on the CPU and - * for handling deferred callout restarts. - * - * In particular, the first entry of the array cc_exec_entity holds - * information for callouts running from the SWI thread context, while - * the second one holds information for callouts running directly from - * the hardware interrupt context. + * The callout cpu exec entities represent informations necessary for + * describing the state of callouts currently running on the CPU and the ones + * necessary for migrating callouts to the new callout cpu. In particular, + * the first entry of the array cc_exec_entity holds informations for callout + * running in SWI thread context, while the second one holds informations + * for callout running directly from hardware interrupt context. + * The cached informations are very important for deferring migration when + * the migrating callout is already running. */ struct cc_exec { - /* - * The "cc_curr" points to the currently executing callout and - * is protected by the "cc_lock" spinlock. If no callback is - * currently executing it is equal to "NULL". - */ + struct callout *cc_next; struct callout *cc_curr; - /* - * The "cc_restart_args" structure holds the argument for a - * deferred callback restart and is protected by the "cc_lock" - * spinlock. The structure is only valid if "cc_restart" is - * "true". If "cc_restart" is "false" the information in the - * "cc_restart_args" structure shall be ignored. - */ - struct callout_args cc_restart_args; - bool cc_restart; - /* - * The "cc_cancel" variable allows the currently pending - * callback to be atomically cancelled. This field is write - * protected by the "cc_lock" spinlock. - */ - bool cc_cancel; - /* - * The "cc_drain_fn" points to a function which shall be - * called with the argument stored in "cc_drain_arg" when an - * asynchronous drain is performed. This field is write - * protected by the "cc_lock" spinlock. - */ - callout_func_t *cc_drain_fn; - void *cc_drain_arg; +#ifdef SMP + void (*ce_migration_func)(void *); + void *ce_migration_arg; + int ce_migration_cpu; + sbintime_t ce_migration_time; + sbintime_t ce_migration_prec; +#endif + bool cc_cancel; + bool cc_waiting; }; /* - * There is one "struct callout_cpu" per CPU, holding all relevant + * There is one struct callout_cpu per cpu, holding all relevant * state for the callout processing thread on the individual CPU. */ struct callout_cpu { struct mtx_padalign cc_lock; struct cc_exec cc_exec_entity[2]; - struct callout *cc_exec_next_dir; struct callout *cc_callout; struct callout_list *cc_callwheel; struct callout_tailq cc_expireq; @@ -347,7 +166,27 @@ struct callout_cpu { char cc_ktr_event_name[20]; }; +#define cc_exec_curr cc_exec_entity[0].cc_curr +#define cc_exec_next cc_exec_entity[0].cc_next +#define cc_exec_cancel cc_exec_entity[0].cc_cancel +#define cc_exec_waiting cc_exec_entity[0].cc_waiting +#define cc_exec_curr_dir cc_exec_entity[1].cc_curr +#define cc_exec_next_dir cc_exec_entity[1].cc_next +#define cc_exec_cancel_dir cc_exec_entity[1].cc_cancel +#define cc_exec_waiting_dir cc_exec_entity[1].cc_waiting + #ifdef SMP +#define cc_migration_func cc_exec_entity[0].ce_migration_func +#define cc_migration_arg cc_exec_entity[0].ce_migration_arg +#define cc_migration_cpu cc_exec_entity[0].ce_migration_cpu +#define cc_migration_time cc_exec_entity[0].ce_migration_time +#define cc_migration_prec cc_exec_entity[0].ce_migration_prec +#define cc_migration_func_dir cc_exec_entity[1].ce_migration_func +#define cc_migration_arg_dir cc_exec_entity[1].ce_migration_arg +#define cc_migration_cpu_dir cc_exec_entity[1].ce_migration_cpu +#define cc_migration_time_dir cc_exec_entity[1].ce_migration_time +#define cc_migration_prec_dir cc_exec_entity[1].ce_migration_prec + struct callout_cpu cc_cpu[MAXCPU]; #define CPUBLOCK MAXCPU #define CC_CPU(cpu) (&cc_cpu[(cpu)]) @@ -372,9 +211,60 @@ static void softclock_call_cc(struct callout *c, struct callout_cpu *cc, static MALLOC_DEFINE(M_CALLOUT, "callout", "Callout datastructures"); +/** + * Locked by cc_lock: + * cc_curr - If a callout is in progress, it is cc_curr. + * If cc_curr is non-NULL, threads waiting in + * callout_drain() will be woken up as soon as the + * relevant callout completes. + * cc_cancel - Changing to 1 with both callout_lock and cc_lock held + * guarantees that the current callout will not run. + * The softclock() function sets this to 0 before it + * drops callout_lock to acquire c_lock, and it calls + * the handler only if curr_cancelled is still 0 after + * cc_lock is successfully acquired. + * cc_waiting - If a thread is waiting in callout_drain(), then + * callout_wait is nonzero. Set only when + * cc_curr is non-NULL. + */ + +/* + * Resets the execution entity tied to a specific callout cpu. + */ +static void +cc_cce_cleanup(struct callout_cpu *cc, int direct) +{ + + cc->cc_exec_entity[direct].cc_curr = NULL; + cc->cc_exec_entity[direct].cc_next = NULL; + cc->cc_exec_entity[direct].cc_cancel = false; + cc->cc_exec_entity[direct].cc_waiting = false; +#ifdef SMP + cc->cc_exec_entity[direct].ce_migration_cpu = CPUBLOCK; + cc->cc_exec_entity[direct].ce_migration_time = 0; + cc->cc_exec_entity[direct].ce_migration_prec = 0; + cc->cc_exec_entity[direct].ce_migration_func = NULL; + cc->cc_exec_entity[direct].ce_migration_arg = NULL; +#endif +} + +/* + * Checks if migration is requested by a specific callout cpu. + */ +static int +cc_cce_migrating(struct callout_cpu *cc, int direct) +{ + +#ifdef SMP + return (cc->cc_exec_entity[direct].ce_migration_cpu != CPUBLOCK); +#else + return (0); +#endif +} + /* - * Kernel low level callwheel initialization called from cpu0 during - * kernel startup: + * Kernel low level callwheel initialization + * called on cpu0 during kernel startup. */ static void callout_callwheel_init(void *dummy) @@ -434,6 +324,8 @@ callout_cpu_init(struct callout_cpu *cc, int cpu) LIST_INIT(&cc->cc_callwheel[i]); TAILQ_INIT(&cc->cc_expireq); cc->cc_firstevent = SBT_MAX; + for (i = 0; i < 2; i++) + cc_cce_cleanup(cc, i); snprintf(cc->cc_ktr_event_name, sizeof(cc->cc_ktr_event_name), "callwheel cpu %d", cpu); if (cc->cc_callout == NULL) /* Only cpu0 handles timeout(9) */ @@ -441,11 +333,41 @@ callout_cpu_init(struct callout_cpu *cc, int cpu) for (i = 0; i < ncallout; i++) { c = &cc->cc_callout[i]; callout_init(c, 0); - c->c_flags |= CALLOUT_LOCAL_ALLOC; + c->c_flags = CALLOUT_LOCAL_ALLOC; SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); } } +#ifdef SMP +/* + * Switches the cpu tied to a specific callout. + * The function expects a locked incoming callout cpu and returns with + * locked outcoming callout cpu. + */ +static struct callout_cpu * +callout_cpu_switch(struct callout *c, struct callout_cpu *cc, int new_cpu) +{ + struct callout_cpu *new_cc; + + MPASS(c != NULL && cc != NULL); + CC_LOCK_ASSERT(cc); + + /* + * Avoid interrupts and preemption firing after the callout cpu + * is blocked in order to avoid deadlocks as the new thread + * may be willing to acquire the callout cpu lock. + */ + c->c_cpu = CPUBLOCK; + spinlock_enter(); + CC_UNLOCK(cc); + new_cc = CC_CPU(new_cpu); + CC_LOCK(new_cc); + spinlock_exit(); + c->c_cpu = new_cpu; + return (new_cc); +} +#endif + /* * Start standard softclock thread. */ @@ -522,8 +444,9 @@ callout_process(sbintime_t now) #ifdef CALLOUT_PROFILING int depth_dir = 0, mpcalls_dir = 0, lockcalls_dir = 0; #endif + cc = CC_SELF(); - CC_LOCK(cc); + mtx_lock_spin_flags(&cc->cc_lock, MTX_QUIET); /* Compute the buckets of the last scan and present times. */ firstb = callout_hash(cc->cc_lastscan); @@ -626,7 +549,7 @@ next: avg_mpcalls_dir += (mpcalls_dir * 1000 - avg_mpcalls_dir) >> 8; avg_lockcalls_dir += (lockcalls_dir * 1000 - avg_lockcalls_dir) >> 8; #endif - CC_UNLOCK(cc); + mtx_unlock_spin_flags(&cc->cc_lock, MTX_QUIET); /* * swi_sched acquires the thread lock, so we don't want to call it * with cc_lock held; incorrect locking order. @@ -639,55 +562,49 @@ static struct callout_cpu * callout_lock(struct callout *c) { struct callout_cpu *cc; - cc = CC_CPU(c->c_cpu); - CC_LOCK(cc); + int cpu; + + for (;;) { + cpu = c->c_cpu; +#ifdef SMP + if (cpu == CPUBLOCK) { + while (c->c_cpu == CPUBLOCK) + cpu_spinwait(); + continue; + } +#endif + cc = CC_CPU(cpu); + CC_LOCK(cc); + if (cpu == c->c_cpu) + break; + CC_UNLOCK(cc); + } return (cc); } -static struct callout_cpu * -callout_cc_add_locked(struct callout *c, struct callout_cpu *cc, - struct callout_args *coa, bool can_swap_cpu) +static void +callout_cc_add(struct callout *c, struct callout_cpu *cc, + sbintime_t sbt, sbintime_t precision, void (*func)(void *), + void *arg, int cpu, int flags) { -#ifndef NO_EVENTTIMERS - sbintime_t sbt; -#endif int bucket; CC_LOCK_ASSERT(cc); - - /* update flags before swapping locks, if any */ - c->c_flags &= ~(CALLOUT_PROCESSED | CALLOUT_DIRECT | CALLOUT_DEFRESTART); - if (coa->flags & C_DIRECT_EXEC) - c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING | CALLOUT_DIRECT); - else - c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING); - -#ifdef SMP - /* - * Check if we are changing the CPU on which the callback - * should be executed and if we have a lock protecting us: - */ - if (can_swap_cpu != false && coa->cpu != c->c_cpu && - callout_lock_owned_client(c->c_flags, c->c_lock) != 0) { - CC_UNLOCK(cc); - c->c_cpu = coa->cpu; - cc = callout_lock(c); - } -#endif - if (coa->time < cc->cc_lastscan) - coa->time = cc->cc_lastscan; - c->c_arg = coa->arg; - c->c_func = coa->func; - c->c_time = coa->time; - c->c_precision = coa->precision; - + if (sbt < cc->cc_lastscan) + sbt = cc->cc_lastscan; + c->c_arg = arg; + c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING); + if (flags & C_DIRECT_EXEC) + c->c_flags |= CALLOUT_DIRECT; + c->c_flags &= ~CALLOUT_PROCESSED; + c->c_func = func; + c->c_time = sbt; + c->c_precision = precision; bucket = callout_get_bucket(c->c_time); CTR3(KTR_CALLOUT, "precision set for %p: %d.%08x", c, (int)(c->c_precision >> 32), (u_int)(c->c_precision & 0xffffffff)); LIST_INSERT_HEAD(&cc->cc_callwheel[bucket], c, c_links.le); - - /* Ensure we are first to be scanned, if called via a callback */ if (cc->cc_bucket == bucket) cc->cc_exec_next_dir = c; #ifndef NO_EVENTTIMERS @@ -700,16 +617,17 @@ callout_cc_add_locked(struct callout *c, struct callout_cpu *cc, sbt = c->c_time + c->c_precision; if (sbt < cc->cc_firstevent) { cc->cc_firstevent = sbt; - cpu_new_callout(coa->cpu, sbt, c->c_time); + cpu_new_callout(cpu, sbt, c->c_time); } #endif - return (cc); } static void callout_cc_del(struct callout *c, struct callout_cpu *cc) { + if ((c->c_flags & CALLOUT_LOCAL_ALLOC) == 0) + return; c->c_func = NULL; SLIST_INSERT_HEAD(&cc->cc_callfree, c, c_links.sle); } @@ -721,10 +639,20 @@ softclock_call_cc(struct callout *c, struct callout_cpu *cc, #endif int direct) { - callout_func_t *c_func; + struct rm_priotracker tracker; + void (*c_func)(void *); void *c_arg; + struct lock_class *class; struct lock_object *c_lock; + uintptr_t lock_status; int c_flags; +#ifdef SMP + struct callout_cpu *new_cc; + void (*new_func)(void *); + void *new_arg; + int flags, new_cpu; + sbintime_t new_prec, new_time; +#endif #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) sbintime_t sbt1, sbt2; struct timespec ts2; @@ -735,39 +663,37 @@ softclock_call_cc(struct callout *c, struct callout_cpu *cc, KASSERT((c->c_flags & (CALLOUT_PENDING | CALLOUT_ACTIVE)) == (CALLOUT_PENDING | CALLOUT_ACTIVE), ("softclock_call_cc: pend|act %p %x", c, c->c_flags)); + class = (c->c_lock != NULL) ? LOCK_CLASS(c->c_lock) : NULL; + lock_status = 0; + if (c->c_flags & CALLOUT_SHAREDLOCK) { + if (class == &lock_class_rm) + lock_status = (uintptr_t)&tracker; + else + lock_status = 1; + } c_lock = c->c_lock; c_func = c->c_func; c_arg = c->c_arg; c_flags = c->c_flags; - - /* remove pending bit */ - c->c_flags &= ~CALLOUT_PENDING; - - /* reset our local state */ + if (c->c_flags & CALLOUT_LOCAL_ALLOC) + c->c_flags = CALLOUT_LOCAL_ALLOC; + else + c->c_flags &= ~CALLOUT_PENDING; cc->cc_exec_entity[direct].cc_curr = c; - cc->cc_exec_entity[direct].cc_restart = false; - cc->cc_exec_entity[direct].cc_drain_fn = NULL; - cc->cc_exec_entity[direct].cc_drain_arg = NULL; - + cc->cc_exec_entity[direct].cc_cancel = false; + CC_UNLOCK(cc); if (c_lock != NULL) { - cc->cc_exec_entity[direct].cc_cancel = false; - CC_UNLOCK(cc); - - /* unlocked region for switching locks */ - - callout_lock_client(c_flags, c_lock); - + class->lc_lock(c_lock, lock_status); /* - * Check if the callout may have been cancelled while - * we were switching locks. Even though the callout is - * specifying a lock, it might not be certain this - * lock is locked when starting and stopping callouts. + * The callout may have been cancelled + * while we switched locks. */ - CC_LOCK(cc); if (cc->cc_exec_entity[direct].cc_cancel) { - callout_unlock_client(c_flags, c_lock); - goto skip_cc_locked; + class->lc_unlock(c_lock); + goto skip; } + /* The callout cannot be stopped now. */ + cc->cc_exec_entity[direct].cc_cancel = true; if (c_lock == &Giant.lock_object) { #ifdef CALLOUT_PROFILING (*gcalls)++; @@ -788,11 +714,6 @@ softclock_call_cc(struct callout *c, struct callout_cpu *cc, CTR3(KTR_CALLOUT, "callout %p func %p arg %p", c, c_func, c_arg); } - /* The callout cannot be stopped now! */ - cc->cc_exec_entity[direct].cc_cancel = true; - CC_UNLOCK(cc); - - /* unlocked region */ KTR_STATE3(KTR_SCHED, "callout", cc->cc_ktr_event_name, "running", "func:%p", c_func, "arg:%p", c_arg, "direct:%d", direct); #if defined(DIAGNOSTIC) || defined(CALLOUT_PROFILING) @@ -819,40 +740,85 @@ softclock_call_cc(struct callout *c, struct callout_cpu *cc, #endif KTR_STATE0(KTR_SCHED, "callout", cc->cc_ktr_event_name, "idle"); CTR1(KTR_CALLOUT, "callout %p finished", c); - - /* - * At this point the callback structure might have been freed, - * so we need to check the previously copied value of - * "c->c_flags": - */ if ((c_flags & CALLOUT_RETURNUNLOCKED) == 0) - callout_unlock_client(c_flags, c_lock); - + class->lc_unlock(c_lock); +skip: CC_LOCK(cc); - -skip_cc_locked: KASSERT(cc->cc_exec_entity[direct].cc_curr == c, ("mishandled cc_curr")); cc->cc_exec_entity[direct].cc_curr = NULL; - - /* Check if there is anything which needs draining */ - if (cc->cc_exec_entity[direct].cc_drain_fn != NULL) { + if (cc->cc_exec_entity[direct].cc_waiting) { /* - * Unlock the CPU callout last, so that any use of - * structures belonging to the callout are complete: + * There is someone waiting for the + * callout to complete. + * If the callout was scheduled for + * migration just cancel it. */ + if (cc_cce_migrating(cc, direct)) { + cc_cce_cleanup(cc, direct); + + /* + * It should be assert here that the callout is not + * destroyed but that is not easy. + */ + c->c_flags &= ~CALLOUT_DFRMIGRATION; + } + cc->cc_exec_entity[direct].cc_waiting = false; CC_UNLOCK(cc); - /* call drain function unlocked */ - cc->cc_exec_entity[direct].cc_drain_fn( - cc->cc_exec_entity[direct].cc_drain_arg); + wakeup(&cc->cc_exec_entity[direct].cc_waiting); CC_LOCK(cc); - } else if (c_flags & CALLOUT_LOCAL_ALLOC) { - /* return callout back to freelist */ - callout_cc_del(c, cc); - } else if (cc->cc_exec_entity[direct].cc_restart) { - /* [re-]schedule callout, if any */ - cc = callout_cc_add_locked(c, cc, - &cc->cc_exec_entity[direct].cc_restart_args, false); + } else if (cc_cce_migrating(cc, direct)) { + KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0, + ("Migrating legacy callout %p", c)); +#ifdef SMP + /* + * If the callout was scheduled for + * migration just perform it now. + */ + new_cpu = cc->cc_exec_entity[direct].ce_migration_cpu; + new_time = cc->cc_exec_entity[direct].ce_migration_time; + new_prec = cc->cc_exec_entity[direct].ce_migration_prec; + new_func = cc->cc_exec_entity[direct].ce_migration_func; + new_arg = cc->cc_exec_entity[direct].ce_migration_arg; + cc_cce_cleanup(cc, direct); + + /* + * It should be assert here that the callout is not destroyed + * but that is not easy. + * + * As first thing, handle deferred callout stops. + */ + if ((c->c_flags & CALLOUT_DFRMIGRATION) == 0) { + CTR3(KTR_CALLOUT, + "deferred cancelled %p func %p arg %p", + c, new_func, new_arg); + callout_cc_del(c, cc); + return; + } + c->c_flags &= ~CALLOUT_DFRMIGRATION; + + new_cc = callout_cpu_switch(c, cc, new_cpu); + flags = (direct) ? C_DIRECT_EXEC : 0; + callout_cc_add(c, new_cc, new_time, new_prec, new_func, + new_arg, new_cpu, flags); + CC_UNLOCK(new_cc); + CC_LOCK(cc); +#else + panic("migration should not happen"); +#endif } + /* + * If the current callout is locally allocated (from + * timeout(9)) then put it on the freelist. + * + * Note: we need to check the cached copy of c_flags because + * if it was not local, then it's not safe to deref the + * callout pointer. + */ + KASSERT((c_flags & CALLOUT_LOCAL_ALLOC) == 0 || + c->c_flags == CALLOUT_LOCAL_ALLOC, + ("corrupted callout")); + if (c_flags & CALLOUT_LOCAL_ALLOC) + callout_cc_del(c, cc); } /* @@ -933,11 +899,10 @@ timeout(timeout_t *ftn, void *arg, int to_ticks) /* XXX Attempt to malloc first */ panic("timeout table full"); SLIST_REMOVE_HEAD(&cc->cc_callfree, c_links.sle); + callout_reset(new, to_ticks, ftn, arg); handle.callout = new; CC_UNLOCK(cc); - callout_reset(new, to_ticks, ftn, arg); - return (handle); } @@ -945,7 +910,6 @@ void untimeout(timeout_t *ftn, void *arg, struct callout_handle handle) { struct callout_cpu *cc; - bool match; /* * Check for a handle that was initialized @@ -956,11 +920,9 @@ untimeout(timeout_t *ftn, void *arg, struct callout_handle handle) return; cc = callout_lock(handle.callout); - match = (handle.callout->c_func == ftn && handle.callout->c_arg == arg); - CC_UNLOCK(cc); - - if (match) + if (handle.callout->c_func == ftn && handle.callout->c_arg == arg) callout_stop(handle.callout); + CC_UNLOCK(cc); } void @@ -969,119 +931,6 @@ callout_handle_init(struct callout_handle *handle) handle->callout = NULL; } -static int -callout_restart_async(struct callout *c, struct callout_args *coa, - callout_func_t *drain_fn, void *drain_arg) -{ - struct callout_cpu *cc; - int cancelled; - int direct; - - cc = callout_lock(c); - - /* Figure out if the callout is direct or not */ - direct = ((c->c_flags & CALLOUT_DIRECT) != 0); - - /* - * Check if the callback is currently scheduled for - * completion: - */ - if (cc->cc_exec_entity[direct].cc_curr == c) { - /* - * Try to prevent the callback from running by setting - * the "cc_cancel" variable to "true". Also check if - * the callout was previously subject to a deferred - * callout restart: - */ - if (cc->cc_exec_entity[direct].cc_cancel == false || - (c->c_flags & CALLOUT_DEFRESTART) != 0) { - cc->cc_exec_entity[direct].cc_cancel = true; - cancelled = 1; - } else { - cancelled = 0; - } - - /* - * Prevent callback restart if "callout_drain_xxx()" - * is being called or we are stopping the callout or - * the callback was preallocated by us: - */ - if (cc->cc_exec_entity[direct].cc_drain_fn != NULL || - coa == NULL || (c->c_flags & CALLOUT_LOCAL_ALLOC) != 0) { - CTR4(KTR_CALLOUT, "%s %p func %p arg %p", - cancelled ? "cancelled and draining" : "draining", - c, c->c_func, c->c_arg); - - /* clear old flags, if any */ - c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING | - CALLOUT_DEFRESTART | CALLOUT_PROCESSED); - - /* clear restart flag, if any */ - cc->cc_exec_entity[direct].cc_restart = false; - - /* set drain function, if any */ - if (drain_fn != NULL) { - cc->cc_exec_entity[direct].cc_drain_fn = drain_fn; - cc->cc_exec_entity[direct].cc_drain_arg = drain_arg; - cancelled |= 2; /* XXX define the value */ - } - } else { - CTR4(KTR_CALLOUT, "%s %p func %p arg %p", - cancelled ? "cancelled and restarting" : "restarting", - c, c->c_func, c->c_arg); - - /* get us back into the game */ - c->c_flags |= (CALLOUT_ACTIVE | CALLOUT_PENDING | - CALLOUT_DEFRESTART); - c->c_flags &= ~CALLOUT_PROCESSED; - - /* enable deferred restart */ - cc->cc_exec_entity[direct].cc_restart = true; - - /* store arguments for the deferred restart, if any */ - cc->cc_exec_entity[direct].cc_restart_args = *coa; - } - } else { - /* stop callout */ - if (c->c_flags & CALLOUT_PENDING) { - /* - * The callback has not yet been executed, and - * we simply just need to unlink it: - */ - if ((c->c_flags & CALLOUT_PROCESSED) == 0) { - if (cc->cc_exec_next_dir == c) - cc->cc_exec_next_dir = LIST_NEXT(c, c_links.le); - LIST_REMOVE(c, c_links.le); - } else { - TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); - } - cancelled = 1; - } else { - cancelled = 0; - } - - CTR4(KTR_CALLOUT, "%s %p func %p arg %p", - cancelled ? "rescheduled" : "scheduled", - c, c->c_func, c->c_arg); - - /* [re-]schedule callout, if any */ - if (coa != NULL) { - cc = callout_cc_add_locked(c, cc, coa, true); - } else { - /* clear old flags, if any */ - c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING | - CALLOUT_DEFRESTART | CALLOUT_PROCESSED); - - /* return callback to pre-allocated list, if any */ - if ((c->c_flags & CALLOUT_LOCAL_ALLOC) && cancelled != 0) { - callout_cc_del(c, cc); - } - } - } - CC_UNLOCK(cc); - return (cancelled); -} - /* * New interface; clients allocate their own callout structures. * @@ -1100,32 +949,25 @@ callout_restart_async(struct callout *c, struct callout_args *coa, */ int callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t precision, - callout_func_t *ftn, void *arg, int cpu, int flags) + void (*ftn)(void *), void *arg, int cpu, int flags) { - struct callout_args coa; - - /* store arguments for callout add function */ - coa.func = ftn; - coa.arg = arg; - coa.precision = precision; - coa.flags = flags; - coa.cpu = cpu; - - /* compute the rest of the arguments needed */ - if (coa.flags & C_ABSOLUTE) { - coa.time = sbt; - } else { - sbintime_t pr; + sbintime_t to_sbt, pr; + struct callout_cpu *cc; + int cancelled, direct; - if ((coa.flags & C_HARDCLOCK) && (sbt < tick_sbt)) + cancelled = 0; + if (flags & C_ABSOLUTE) { + to_sbt = sbt; + } else { + if ((flags & C_HARDCLOCK) && (sbt < tick_sbt)) sbt = tick_sbt; - if ((coa.flags & C_HARDCLOCK) || + if ((flags & C_HARDCLOCK) || #ifdef NO_EVENTTIMERS sbt >= sbt_timethreshold) { - coa.time = getsbinuptime(); + to_sbt = getsbinuptime(); /* Add safety belt for the case of hz > 1000. */ - coa.time += tc_tick_sbt - tick_sbt; + to_sbt += tc_tick_sbt - tick_sbt; #else sbt >= sbt_tickthreshold) { /* @@ -1135,29 +977,101 @@ callout_reset_sbt_on(struct callout *c, sbintime_t sbt, sbintime_t precision, * active ones. */ #ifdef __LP64__ - coa.time = DPCPU_GET(hardclocktime); + to_sbt = DPCPU_GET(hardclocktime); #else spinlock_enter(); - coa.time = DPCPU_GET(hardclocktime); + to_sbt = DPCPU_GET(hardclocktime); spinlock_exit(); #endif #endif - if ((coa.flags & C_HARDCLOCK) == 0) - coa.time += tick_sbt; + if ((flags & C_HARDCLOCK) == 0) + to_sbt += tick_sbt; } else - coa.time = sbinuptime(); - if (SBT_MAX - coa.time < sbt) - coa.time = SBT_MAX; + to_sbt = sbinuptime(); + if (SBT_MAX - to_sbt < sbt) + to_sbt = SBT_MAX; else - coa.time += sbt; - pr = ((C_PRELGET(coa.flags) < 0) ? sbt >> tc_precexp : - sbt >> C_PRELGET(coa.flags)); - if (pr > coa.precision) - coa.precision = pr; + to_sbt += sbt; + pr = ((C_PRELGET(flags) < 0) ? sbt >> tc_precexp : + sbt >> C_PRELGET(flags)); + if (pr > precision) + precision = pr; + } + /* + * Don't allow migration of pre-allocated callouts lest they + * become unbalanced. + */ + if (c->c_flags & CALLOUT_LOCAL_ALLOC) + cpu = c->c_cpu; + direct = (c->c_flags & CALLOUT_DIRECT) != 0; + KASSERT(!direct || c->c_lock == NULL, + ("%s: direct callout %p has lock", __func__, c)); + cc = callout_lock(c); + if (cc->cc_exec_entity[direct].cc_curr == c) { + /* + * We're being asked to reschedule a callout which is + * currently in progress. If there is a lock then we + * can cancel the callout if it has not really started. + */ + if (c->c_lock != NULL && !cc->cc_exec_entity[direct].cc_cancel) + cancelled = cc->cc_exec_entity[direct].cc_cancel = true; + if (cc->cc_exec_entity[direct].cc_waiting) { + /* + * Someone has called callout_drain to kill this + * callout. Don't reschedule. + */ + CTR4(KTR_CALLOUT, "%s %p func %p arg %p", + cancelled ? "cancelled" : "failed to cancel", + c, c->c_func, c->c_arg); + CC_UNLOCK(cc); + return (cancelled); + } + } + if (c->c_flags & CALLOUT_PENDING) { + if ((c->c_flags & CALLOUT_PROCESSED) == 0) { + if (cc->cc_exec_next_dir == c) + cc->cc_exec_next_dir = LIST_NEXT(c, c_links.le); + LIST_REMOVE(c, c_links.le); + } else + TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); + cancelled = 1; + c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING); + } + +#ifdef SMP + /* + * If the callout must migrate try to perform it immediately. + * If the callout is currently running, just defer the migration + * to a more appropriate moment. + */ + if (c->c_cpu != cpu) { + if (cc->cc_exec_entity[direct].cc_curr == c) { + cc->cc_exec_entity[direct].ce_migration_cpu = cpu; + cc->cc_exec_entity[direct].ce_migration_time + = to_sbt; + cc->cc_exec_entity[direct].ce_migration_prec + = precision; + cc->cc_exec_entity[direct].ce_migration_func = ftn; + cc->cc_exec_entity[direct].ce_migration_arg = arg; + c->c_flags |= CALLOUT_DFRMIGRATION; + CTR6(KTR_CALLOUT, + "migration of %p func %p arg %p in %d.%08x to %u deferred", + c, c->c_func, c->c_arg, (int)(to_sbt >> 32), + (u_int)(to_sbt & 0xffffffff), cpu); + CC_UNLOCK(cc); + return (cancelled); + } + cc = callout_cpu_switch(c, cc, cpu); } +#endif - /* get callback started, if any */ - return (callout_restart_async(c, &coa, NULL, NULL)); + callout_cc_add(c, cc, to_sbt, precision, ftn, arg, cpu, flags); + CTR6(KTR_CALLOUT, "%sscheduled %p func %p arg %p in %d.%08x", + cancelled ? "re" : "", c, c->c_func, c->c_arg, (int)(to_sbt >> 32), + (u_int)(to_sbt & 0xffffffff)); + CC_UNLOCK(cc); + + return (cancelled); } /* @@ -1176,105 +1090,204 @@ callout_schedule(struct callout *c, int to_ticks) } int -callout_stop(struct callout *c) +_callout_stop_safe(struct callout *c, int safe) { - /* get callback stopped, if any */ - return (callout_restart_async(c, NULL, NULL, NULL)); -} - -static void -callout_drain_function(void *arg) -{ - wakeup(arg); -} - -int -callout_drain_async(struct callout *c, callout_func_t *fn, void *arg) -{ - /* get callback stopped, if any */ - return (callout_restart_async(c, NULL, fn, arg) & 2); -} - -int -callout_drain(struct callout *c) -{ - int cancelled; - - WITNESS_WARN(WARN_GIANTOK | WARN_SLEEPOK, NULL, - "Draining callout"); - - callout_lock_client(c->c_flags, c->c_lock); - - /* at this point the "c->c_cpu" field is not changing */ + struct callout_cpu *cc, *old_cc; + struct lock_class *class; + int direct, sq_locked, use_lock; - cancelled = callout_drain_async(c, &callout_drain_function, c); - - if (cancelled != 0) { - struct callout_cpu *cc; - int direct; + /* + * Some old subsystems don't hold Giant while running a callout_stop(), + * so just discard this check for the moment. + */ + if (!safe && c->c_lock != NULL) { + if (c->c_lock == &Giant.lock_object) + use_lock = mtx_owned(&Giant); + else { + use_lock = 1; + class = LOCK_CLASS(c->c_lock); + class->lc_assert(c->c_lock, LA_XLOCKED); + } + } else + use_lock = 0; + direct = (c->c_flags & CALLOUT_DIRECT) != 0; + sq_locked = 0; + old_cc = NULL; +again: + cc = callout_lock(c); - CTR3(KTR_CALLOUT, "need to drain %p func %p arg %p", - c, c->c_func, c->c_arg); + /* + * If the callout was migrating while the callout cpu lock was + * dropped, just drop the sleepqueue lock and check the states + * again. + */ + if (sq_locked != 0 && cc != old_cc) { +#ifdef SMP + CC_UNLOCK(cc); + sleepq_release(&old_cc->cc_exec_entity[direct].cc_waiting); + sq_locked = 0; + old_cc = NULL; + goto again; +#else + panic("migration should not happen"); +#endif + } - cc = callout_lock(c); - direct = ((c->c_flags & CALLOUT_DIRECT) != 0); + /* + * If the callout isn't pending, it's not on the queue, so + * don't attempt to remove it from the queue. We can try to + * stop it by other means however. + */ + if (!(c->c_flags & CALLOUT_PENDING)) { + c->c_flags &= ~CALLOUT_ACTIVE; /* - * We've gotten our callout CPU lock, it is safe to - * drop the initial lock: + * If it wasn't on the queue and it isn't the current + * callout, then we can't stop it, so just bail. */ - callout_unlock_client(c->c_flags, c->c_lock); - - /* Wait for drain to complete */ + if (cc->cc_exec_entity[direct].cc_curr != c) { + CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", + c, c->c_func, c->c_arg); + CC_UNLOCK(cc); + if (sq_locked) + sleepq_release( + &cc->cc_exec_entity[direct].cc_waiting); + return (0); + } - while (cc->cc_exec_entity[direct].cc_curr == c) - msleep_spin(c, (struct mtx *)&cc->cc_lock, "codrain", 0); + if (safe) { + /* + * The current callout is running (or just + * about to run) and blocking is allowed, so + * just wait for the current invocation to + * finish. + */ + while (cc->cc_exec_entity[direct].cc_curr == c) { + /* + * Use direct calls to sleepqueue interface + * instead of cv/msleep in order to avoid + * a LOR between cc_lock and sleepqueue + * chain spinlocks. This piece of code + * emulates a msleep_spin() call actually. + * + * If we already have the sleepqueue chain + * locked, then we can safely block. If we + * don't already have it locked, however, + * we have to drop the cc_lock to lock + * it. This opens several races, so we + * restart at the beginning once we have + * both locks. If nothing has changed, then + * we will end up back here with sq_locked + * set. + */ + if (!sq_locked) { + CC_UNLOCK(cc); + sleepq_lock( + &cc->cc_exec_entity[direct].cc_waiting); + sq_locked = 1; + old_cc = cc; + goto again; + } + /* + * Migration could be cancelled here, but + * as long as it is still not sure when it + * will be packed up, just let softclock() + * take care of it. + */ + cc->cc_exec_entity[direct].cc_waiting = true; + DROP_GIANT(); + CC_UNLOCK(cc); + sleepq_add( + &cc->cc_exec_entity[direct].cc_waiting, + &cc->cc_lock.lock_object, "codrain", + SLEEPQ_SLEEP, 0); + sleepq_wait( + &cc->cc_exec_entity[direct].cc_waiting, + 0); + sq_locked = 0; + old_cc = NULL; + + /* Reacquire locks previously released. */ + PICKUP_GIANT(); + CC_LOCK(cc); + } + } else if (use_lock && + !cc->cc_exec_entity[direct].cc_cancel) { + /* + * The current callout is waiting for its + * lock which we hold. Cancel the callout + * and return. After our caller drops the + * lock, the callout will be skipped in + * softclock(). + */ + cc->cc_exec_entity[direct].cc_cancel = true; + CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", + c, c->c_func, c->c_arg); + KASSERT(!cc_cce_migrating(cc, direct), + ("callout wrongly scheduled for migration")); + CC_UNLOCK(cc); + KASSERT(!sq_locked, ("sleepqueue chain locked")); + return (1); + } else if ((c->c_flags & CALLOUT_DFRMIGRATION) != 0) { + c->c_flags &= ~CALLOUT_DFRMIGRATION; + CTR3(KTR_CALLOUT, "postponing stop %p func %p arg %p", + c, c->c_func, c->c_arg); + CC_UNLOCK(cc); + return (1); + } + CTR3(KTR_CALLOUT, "failed to stop %p func %p arg %p", + c, c->c_func, c->c_arg); CC_UNLOCK(cc); - } else { - callout_unlock_client(c->c_flags, c->c_lock); + KASSERT(!sq_locked, ("sleepqueue chain still locked")); + return (0); } + if (sq_locked) + sleepq_release(&cc->cc_exec_entity[direct].cc_waiting); + + c->c_flags &= ~(CALLOUT_ACTIVE | CALLOUT_PENDING); CTR3(KTR_CALLOUT, "cancelled %p func %p arg %p", c, c->c_func, c->c_arg); + if ((c->c_flags & CALLOUT_PROCESSED) == 0) { + if (cc->cc_exec_next_dir == c) + cc->cc_exec_next_dir = LIST_NEXT(c, c_links.le); + LIST_REMOVE(c, c_links.le); + } else + TAILQ_REMOVE(&cc->cc_expireq, c, c_links.tqe); + callout_cc_del(c, cc); - return (cancelled & 1); + CC_UNLOCK(cc); + return (1); } void callout_init(struct callout *c, int mpsafe) { + bzero(c, sizeof *c); if (mpsafe) { - _callout_init_lock(c, NULL, CALLOUT_RETURNUNLOCKED); + c->c_lock = NULL; + c->c_flags = CALLOUT_RETURNUNLOCKED; } else { - _callout_init_lock(c, &Giant.lock_object, 0); + c->c_lock = &Giant.lock_object; + c->c_flags = 0; } + c->c_cpu = timeout_cpu; } void _callout_init_lock(struct callout *c, struct lock_object *lock, int flags) { bzero(c, sizeof *c); - KASSERT((flags & ~CALLOUT_RETURNUNLOCKED) == 0, - ("callout_init_lock: bad flags 0x%08x", flags)); - flags &= CALLOUT_RETURNUNLOCKED; - if (lock != NULL) { - struct lock_class *class = LOCK_CLASS(lock); - if (class == &lock_class_mtx_sleep) - flags |= CALLOUT_SET_LC(CALLOUT_LC_MUTEX); - else if (class == &lock_class_mtx_spin) - flags |= CALLOUT_SET_LC(CALLOUT_LC_SPIN); - else if (class == &lock_class_rm) - flags |= CALLOUT_SET_LC(CALLOUT_LC_RM); - else if (class == &lock_class_rw) - flags |= CALLOUT_SET_LC(CALLOUT_LC_RW); - else - panic("callout_init_lock: Unsupported lock class '%s'\n", class->lc_name); - } else { - flags |= CALLOUT_SET_LC(CALLOUT_LC_UNUSED_0); - } c->c_lock = lock; - c->c_flags = flags; + KASSERT((flags & ~(CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK)) == 0, + ("callout_init_lock: bad flags %d", flags)); + KASSERT(lock != NULL || (flags & CALLOUT_RETURNUNLOCKED) == 0, + ("callout_init_lock: CALLOUT_RETURNUNLOCKED with no lock")); + KASSERT(lock == NULL || !(LOCK_CLASS(lock)->lc_flags & + (LC_SPINLOCK | LC_SLEEPABLE)), ("%s: invalid lock class", + __func__)); + c->c_flags = flags & (CALLOUT_RETURNUNLOCKED | CALLOUT_SHAREDLOCK); c->c_cpu = timeout_cpu; } |