diff options
Diffstat (limited to 'sys/kern/kern_fail.c')
-rw-r--r-- | sys/kern/kern_fail.c | 782 |
1 files changed, 649 insertions, 133 deletions
diff --git a/sys/kern/kern_fail.c b/sys/kern/kern_fail.c index 3737aa3..ec466dd 100644 --- a/sys/kern/kern_fail.c +++ b/sys/kern/kern_fail.c @@ -52,17 +52,25 @@ #include <sys/cdefs.h> __FBSDID("$FreeBSD$"); +#include "opt_stack.h" + #include <sys/ctype.h> #include <sys/errno.h> #include <sys/fail.h> #include <sys/kernel.h> #include <sys/libkern.h> +#include <sys/limits.h> #include <sys/lock.h> #include <sys/malloc.h> #include <sys/mutex.h> #include <sys/proc.h> #include <sys/sbuf.h> +#include <sys/sleepqueue.h> +#include <sys/sx.h> +#include <sys/sysctl.h> +#include <sys/types.h> +#include <machine/atomic.h> #include <machine/stdarg.h> #ifdef ILOG_DEFINE_FOR_FILE @@ -72,11 +80,45 @@ ILOG_DEFINE_FOR_FILE(L_ISI_FAIL_POINT, L_ILOG, fail_point); static MALLOC_DEFINE(M_FAIL_POINT, "Fail Points", "fail points system"); #define fp_free(ptr) free(ptr, M_FAIL_POINT) #define fp_malloc(size, flags) malloc((size), M_FAIL_POINT, (flags)) +#define fs_free(ptr) fp_free(ptr) +#define fs_malloc() fp_malloc(sizeof(struct fail_point_setting), \ + M_WAITOK | M_ZERO) + + /** + * These define the wchans that are used for sleeping, pausing respectively. + * They are chosen arbitrarily but need to be distinct to the failpoint and + * the sleep/pause distinction. + */ +#define FP_SLEEP_CHANNEL(fp) (void*)(fp) +#define FP_PAUSE_CHANNEL(fp) __DEVOLATILE(void*, &fp->fp_setting) + +/** + * Don't allow more than this many entries in a fail point set by sysctl. + * The 99.99...% case is to have 1 entry. I can't imagine having this many + * entries, so it should not limit us. Saves on re-mallocs while holding + * a non-sleepable lock. + */ +#define FP_MAX_ENTRY_COUNT 20 + +/* Used to drain sbufs to the sysctl output */ +int fail_sysctl_drain_func(void *, const char *, int); + +/* Head of tailq of struct fail_point_entry */ +TAILQ_HEAD(fail_point_entry_queue, fail_point_entry); + +/** + * fp entries garbage list; outstanding entries are cleaned up in the + * garbage collector + */ +STAILQ_HEAD(fail_point_setting_garbage, fail_point_setting); +static struct fail_point_setting_garbage fp_setting_garbage = + STAILQ_HEAD_INITIALIZER(fp_setting_garbage); +static struct mtx mtx_garbage_list; +MTX_SYSINIT(mtx_garbage_list, &mtx_garbage_list, "fail point garbage mtx", + MTX_SPIN); -static struct mtx g_fp_mtx; -MTX_SYSINIT(g_fp_mtx, &g_fp_mtx, "fail point mtx", MTX_DEF); -#define FP_LOCK() mtx_lock(&g_fp_mtx) -#define FP_UNLOCK() mtx_unlock(&g_fp_mtx) +static struct sx sx_fp_set; +SX_SYSINIT(sx_fp_set, &sx_fp_set, "fail point set sx"); /** * Failpoint types. @@ -90,7 +132,11 @@ enum fail_point_t { FAIL_POINT_BREAK, /**< break into the debugger */ FAIL_POINT_PRINT, /**< print a message */ FAIL_POINT_SLEEP, /**< sleep for some msecs */ - FAIL_POINT_NUMTYPES + FAIL_POINT_PAUSE, /**< sleep until failpoint is set to off */ + FAIL_POINT_YIELD, /**< yield the cpu */ + FAIL_POINT_DELAY, /**< busy wait the cpu */ + FAIL_POINT_NUMTYPES, + FAIL_POINT_INVALID = -1 }; static struct { @@ -104,53 +150,307 @@ static struct { [FAIL_POINT_BREAK] = FP_TYPE_NM_LEN("break"), [FAIL_POINT_PRINT] = FP_TYPE_NM_LEN("print"), [FAIL_POINT_SLEEP] = FP_TYPE_NM_LEN("sleep"), + [FAIL_POINT_PAUSE] = FP_TYPE_NM_LEN("pause"), + [FAIL_POINT_YIELD] = FP_TYPE_NM_LEN("yield"), + [FAIL_POINT_DELAY] = FP_TYPE_NM_LEN("delay"), }; +#define FE_COUNT_UNTRACKED (INT_MIN) + /** * Internal structure tracking a single term of a complete failpoint. * @ingroup failpoint_private */ struct fail_point_entry { - enum fail_point_t fe_type; /**< type of entry */ + volatile bool fe_stale; + enum fail_point_t fe_type; /**< type of entry */ int fe_arg; /**< argument to type (e.g. return value) */ int fe_prob; /**< likelihood of firing in millionths */ - int fe_count; /**< number of times to fire, 0 means always */ + int fe_count; /**< number of times to fire, -1 means infinite */ pid_t fe_pid; /**< only fail for this process */ - TAILQ_ENTRY(fail_point_entry) fe_entries; /**< next entry in fail point */ + struct fail_point *fe_parent; /**< backpointer to fp */ + TAILQ_ENTRY(fail_point_entry) fe_entries; /**< next entry ptr */ }; +struct fail_point_setting { + STAILQ_ENTRY(fail_point_setting) fs_garbage_link; + struct fail_point_entry_queue fp_entry_queue; + struct fail_point * fs_parent; + struct mtx feq_mtx; /* Gives fail_point_pause something to do. */ +}; + +/** + * Defines stating the equivalent of probablilty one (100%) + */ +enum { + PROB_MAX = 1000000, /* probability between zero and this number */ + PROB_DIGITS = 6 /* number of zero's in above number */ +}; + +/* Get a ref on an fp's fp_setting */ +static inline struct fail_point_setting *fail_point_setting_get_ref( + struct fail_point *fp); +/* Release a ref on an fp_setting */ +static inline void fail_point_setting_release_ref(struct fail_point *fp); +/* Allocate and initialize a struct fail_point_setting */ +static struct fail_point_setting *fail_point_setting_new(struct + fail_point *); +/* Free a struct fail_point_setting */ +static void fail_point_setting_destroy(struct fail_point_setting *fp_setting); +/* Allocate and initialize a struct fail_point_entry */ +static struct fail_point_entry *fail_point_entry_new(struct + fail_point_setting *); +/* Free a struct fail_point_entry */ +static void fail_point_entry_destroy(struct fail_point_entry *fp_entry); +/* Append fp setting to garbage list */ +static inline void fail_point_setting_garbage_append( + struct fail_point_setting *fp_setting); +/* Swap fp's setting with fp_setting_new */ +static inline struct fail_point_setting * + fail_point_swap_settings(struct fail_point *fp, + struct fail_point_setting *fp_setting_new); +/* Free up any zero-ref setting in the garbage queue */ +static void fail_point_garbage_collect(void); +/* If this fail point's setting are empty, then swap it out to NULL. */ +static inline void fail_point_eval_swap_out(struct fail_point *fp, + struct fail_point_setting *fp_setting); + +bool +fail_point_is_off(struct fail_point *fp) +{ + bool return_val; + struct fail_point_setting *fp_setting; + struct fail_point_entry *ent; + + return_val = true; + + fp_setting = fail_point_setting_get_ref(fp); + if (fp_setting != NULL) { + TAILQ_FOREACH(ent, &fp_setting->fp_entry_queue, + fe_entries) { + if (!ent->fe_stale) { + return_val = false; + break; + } + } + } + fail_point_setting_release_ref(fp); + + return (return_val); +} + +/* Allocate and initialize a struct fail_point_setting */ +static struct fail_point_setting * +fail_point_setting_new(struct fail_point *fp) +{ + struct fail_point_setting *fs_new; + + fs_new = fs_malloc(); + fs_new->fs_parent = fp; + TAILQ_INIT(&fs_new->fp_entry_queue); + mtx_init(&fs_new->feq_mtx, "fail point entries", NULL, MTX_SPIN); + + fail_point_setting_garbage_append(fs_new); + + return (fs_new); +} + +/* Free a struct fail_point_setting */ +static void +fail_point_setting_destroy(struct fail_point_setting *fp_setting) +{ + struct fail_point_entry *ent; + + while (!TAILQ_EMPTY(&fp_setting->fp_entry_queue)) { + ent = TAILQ_FIRST(&fp_setting->fp_entry_queue); + TAILQ_REMOVE(&fp_setting->fp_entry_queue, ent, fe_entries); + fail_point_entry_destroy(ent); + } + + fs_free(fp_setting); +} + +/* Allocate and initialize a struct fail_point_entry */ +static struct fail_point_entry * +fail_point_entry_new(struct fail_point_setting *fp_setting) +{ + struct fail_point_entry *fp_entry; + + fp_entry = fp_malloc(sizeof(struct fail_point_entry), + M_WAITOK | M_ZERO); + fp_entry->fe_parent = fp_setting->fs_parent; + fp_entry->fe_prob = PROB_MAX; + fp_entry->fe_pid = NO_PID; + fp_entry->fe_count = FE_COUNT_UNTRACKED; + TAILQ_INSERT_TAIL(&fp_setting->fp_entry_queue, fp_entry, + fe_entries); + + return (fp_entry); +} + +/* Free a struct fail_point_entry */ +static void +fail_point_entry_destroy(struct fail_point_entry *fp_entry) +{ + + fp_free(fp_entry); +} + +/* Get a ref on an fp's fp_setting */ +static inline struct fail_point_setting * +fail_point_setting_get_ref(struct fail_point *fp) +{ + struct fail_point_setting *fp_setting; + + /* Invariant: if we have a ref, our pointer to fp_setting is safe */ + atomic_add_acq_32(&fp->fp_ref_cnt, 1); + fp_setting = fp->fp_setting; + + return (fp_setting); +} + +/* Release a ref on an fp_setting */ +static inline void +fail_point_setting_release_ref(struct fail_point *fp) +{ + + KASSERT(&fp->fp_ref_cnt > 0, ("Attempting to deref w/no refs")); + atomic_subtract_rel_32(&fp->fp_ref_cnt, 1); +} + +/* Append fp entries to fp garbage list */ +static inline void +fail_point_setting_garbage_append(struct fail_point_setting *fp_setting) +{ + + mtx_lock_spin(&mtx_garbage_list); + STAILQ_INSERT_TAIL(&fp_setting_garbage, fp_setting, + fs_garbage_link); + mtx_unlock_spin(&mtx_garbage_list); +} + +/* Swap fp's entries with fp_setting_new */ +static struct fail_point_setting * +fail_point_swap_settings(struct fail_point *fp, + struct fail_point_setting *fp_setting_new) +{ + struct fail_point_setting *fp_setting_old; + + fp_setting_old = fp->fp_setting; + fp->fp_setting = fp_setting_new; + + return (fp_setting_old); +} + +static inline void +fail_point_eval_swap_out(struct fail_point *fp, + struct fail_point_setting *fp_setting) +{ + + /* We may have already been swapped out and replaced; ignore. */ + if (fp->fp_setting == fp_setting) + fail_point_swap_settings(fp, NULL); +} + +/* Free up any zero-ref entries in the garbage queue */ +static void +fail_point_garbage_collect() +{ + struct fail_point_setting *fs_current, *fs_next; + struct fail_point_setting_garbage fp_ents_free_list; + + /** + * We will transfer the entries to free to fp_ents_free_list while holding + * the spin mutex, then free it after we drop the lock. This avoids + * triggering witness due to sleepable mutexes in the memory + * allocator. + */ + STAILQ_INIT(&fp_ents_free_list); + + mtx_lock_spin(&mtx_garbage_list); + STAILQ_FOREACH_SAFE(fs_current, &fp_setting_garbage, fs_garbage_link, + fs_next) { + if (fs_current->fs_parent->fp_setting != fs_current && + fs_current->fs_parent->fp_ref_cnt == 0) { + STAILQ_REMOVE(&fp_setting_garbage, fs_current, + fail_point_setting, fs_garbage_link); + STAILQ_INSERT_HEAD(&fp_ents_free_list, fs_current, + fs_garbage_link); + } + } + mtx_unlock_spin(&mtx_garbage_list); + + STAILQ_FOREACH_SAFE(fs_current, &fp_ents_free_list, fs_garbage_link, + fs_next) + fail_point_setting_destroy(fs_current); +} + +/* Drain out all refs from this fail point */ +static inline void +fail_point_drain(struct fail_point *fp, int expected_ref) +{ + struct fail_point_setting *entries; + + entries = fail_point_swap_settings(fp, NULL); + /** + * We have unpaused all threads; so we will wait no longer + * than the time taken for the longest remaining sleep, or + * the length of time of a long-running code block. + */ + while (fp->fp_ref_cnt > expected_ref) { + wakeup(FP_PAUSE_CHANNEL(fp)); + tsleep(&fp, PWAIT, "fail_point_drain", hz / 100); + } + fail_point_swap_settings(fp, entries); +} + static inline void -fail_point_sleep(struct fail_point *fp, struct fail_point_entry *ent, - int msecs, enum fail_point_return_code *pret) +fail_point_pause(struct fail_point *fp, enum fail_point_return_code *pret, + struct mtx *mtx_sleep) { - /* convert from millisecs to ticks, rounding up */ - int timo = ((msecs * hz) + 999) / 1000; + + if (fp->fp_pre_sleep_fn) + fp->fp_pre_sleep_fn(fp->fp_pre_sleep_arg); + + msleep_spin(FP_PAUSE_CHANNEL(fp), mtx_sleep, "failpt", 0); + + if (fp->fp_post_sleep_fn) + fp->fp_post_sleep_fn(fp->fp_post_sleep_arg); +} + +static inline void +fail_point_sleep(struct fail_point *fp, int msecs, + enum fail_point_return_code *pret) +{ + int timo; + + /* Convert from millisecs to ticks, rounding up */ + timo = howmany(msecs * hz, 1000); if (timo > 0) { - if (fp->fp_sleep_fn == NULL) { - msleep(fp, &g_fp_mtx, PWAIT, "failpt", timo); + if (!(fp->fp_flags & FAIL_POINT_USE_TIMEOUT_PATH)) { + if (fp->fp_pre_sleep_fn) + fp->fp_pre_sleep_fn(fp->fp_pre_sleep_arg); + + tsleep(FP_SLEEP_CHANNEL(fp), PWAIT, "failpt", timo); + + if (fp->fp_post_sleep_fn) + fp->fp_post_sleep_fn(fp->fp_post_sleep_arg); } else { - timeout(fp->fp_sleep_fn, fp->fp_sleep_arg, timo); + if (fp->fp_pre_sleep_fn) + fp->fp_pre_sleep_fn(fp->fp_pre_sleep_arg); + + timeout(fp->fp_post_sleep_fn, fp->fp_post_sleep_arg, + timo); *pret = FAIL_POINT_RC_QUEUED; } } } - -/** - * Defines stating the equivalent of probablilty one (100%) - */ -enum { - PROB_MAX = 1000000, /* probability between zero and this number */ - PROB_DIGITS = 6, /* number of zero's in above number */ -}; - -static char *parse_fail_point(struct fail_point_entries *, char *); -static char *parse_term(struct fail_point_entries *, char *); +static char *parse_fail_point(struct fail_point_setting *, char *); +static char *parse_term(struct fail_point_setting *, char *); static char *parse_number(int *out_units, int *out_decimal, char *); static char *parse_type(struct fail_point_entry *, char *); -static void free_entry(struct fail_point_entries *, struct fail_point_entry *); -static void clear_entries(struct fail_point_entries *); /** * Initialize a fail_point. The name is formed in a printf-like fashion @@ -167,7 +467,7 @@ fail_point_init(struct fail_point *fp, const char *fmt, ...) char *name; int n; - TAILQ_INIT(&fp->fp_entries); + fp->fp_setting = NULL; fp->fp_flags = 0; /* Figure out the size of the name. */ @@ -185,25 +485,33 @@ fail_point_init(struct fail_point *fp, const char *fmt, ...) fp->fp_name = name; fp->fp_location = ""; fp->fp_flags |= FAIL_POINT_DYNAMIC_NAME; - fp->fp_sleep_fn = NULL; - fp->fp_sleep_arg = NULL; + fp->fp_pre_sleep_fn = NULL; + fp->fp_pre_sleep_arg = NULL; + fp->fp_post_sleep_fn = NULL; + fp->fp_post_sleep_arg = NULL; } /** - * Free the resources held by a fail_point. - * + * Free the resources held by a fail_point, and wake any paused threads. + * Thou shalt not allow threads to hit this fail point after you enter this + * function, nor shall you call this multiple times for a given fp. * @ingroup failpoint */ void fail_point_destroy(struct fail_point *fp) { + fail_point_drain(fp, 0); + if ((fp->fp_flags & FAIL_POINT_DYNAMIC_NAME) != 0) { fp_free(__DECONST(void *, fp->fp_name)); fp->fp_name = NULL; } fp->fp_flags = 0; - clear_entries(&fp->fp_entries); + + sx_xlock(&sx_fp_set); + fail_point_garbage_collect(); + sx_xunlock(&sx_fp_set); } /** @@ -216,21 +524,51 @@ fail_point_destroy(struct fail_point *fp) enum fail_point_return_code fail_point_eval_nontrivial(struct fail_point *fp, int *return_value) { - enum fail_point_return_code ret = FAIL_POINT_RC_CONTINUE; - struct fail_point_entry *ent, *next; + bool execute = false; + struct fail_point_entry *ent; + struct fail_point_setting *fp_setting; + enum fail_point_return_code ret; + int cont; + int count; int msecs; + int usecs; + + ret = FAIL_POINT_RC_CONTINUE; + cont = 0; /* don't continue by default */ + + fp_setting = fail_point_setting_get_ref(fp); + if (fp_setting == NULL) + goto abort; - FP_LOCK(); + TAILQ_FOREACH(ent, &fp_setting->fp_entry_queue, fe_entries) { - TAILQ_FOREACH_SAFE(ent, &fp->fp_entries, fe_entries, next) { - int cont = 0; /* don't continue by default */ + if (ent->fe_stale) + continue; if (ent->fe_prob < PROB_MAX && ent->fe_prob < random() % PROB_MAX) continue; + if (ent->fe_pid != NO_PID && ent->fe_pid != curproc->p_pid) continue; + if (ent->fe_count != FE_COUNT_UNTRACKED) { + count = ent->fe_count; + while (count > 0) { + if (atomic_cmpset_32(&ent->fe_count, count, count - 1)) { + count--; + execute = true; + break; + } + count = ent->fe_count; + } + if (execute == false) + /* We lost the race; consider the entry stale and bail now */ + continue; + if (count == 0) + ent->fe_stale = true; + } + switch (ent->fe_type) { case FAIL_POINT_PANIC: panic("fail point %s panicking", fp->fp_name); @@ -244,7 +582,7 @@ fail_point_eval_nontrivial(struct fail_point *fp, int *return_value) case FAIL_POINT_BREAK: printf("fail point %s breaking to debugger\n", - fp->fp_name); + fp->fp_name); breakpoint(); break; @@ -254,51 +592,95 @@ fail_point_eval_nontrivial(struct fail_point *fp, int *return_value) break; case FAIL_POINT_SLEEP: - /* - * Free the entry now if necessary, since - * we're about to drop the mutex and sleep. - */ msecs = ent->fe_arg; - if (ent->fe_count > 0 && --ent->fe_count == 0) { - free_entry(&fp->fp_entries, ent); - ent = NULL; - } - if (msecs) - fail_point_sleep(fp, ent, msecs, &ret); + fail_point_sleep(fp, msecs, &ret); + break; + + case FAIL_POINT_PAUSE: + /** + * Pausing is inherently strange with multiple + * entries given our design. That is because some + * entries could be unreachable, for instance in cases like: + * pause->return. We can never reach the return entry. + * The sysctl layer actually truncates all entries after + * a pause for this reason. + */ + mtx_lock_spin(&fp_setting->feq_mtx); + fail_point_pause(fp, &ret, &fp_setting->feq_mtx); + mtx_unlock_spin(&fp_setting->feq_mtx); + break; + + case FAIL_POINT_YIELD: + kern_yield(-1); + break; + + case FAIL_POINT_DELAY: + usecs = ent->fe_arg; + DELAY(usecs); break; default: break; } - if (ent != NULL && ent->fe_count > 0 && --ent->fe_count == 0) - free_entry(&fp->fp_entries, ent); if (cont == 0) break; } - /* Get rid of "off"s at the end. */ - while ((ent = TAILQ_LAST(&fp->fp_entries, fail_point_entries)) && - ent->fe_type == FAIL_POINT_OFF) - free_entry(&fp->fp_entries, ent); + if (fail_point_is_off(fp)) + fail_point_eval_swap_out(fp, fp_setting); - FP_UNLOCK(); +abort: + fail_point_setting_release_ref(fp); return (ret); + } /** * Translate internal fail_point structure into human-readable text. */ static void -fail_point_get(struct fail_point *fp, struct sbuf *sb) +fail_point_get(struct fail_point *fp, struct sbuf *sb, + bool verbose) { struct fail_point_entry *ent; + struct fail_point_setting *fp_setting; + struct fail_point_entry *fp_entry_cpy; + int cnt_sleeping; + int idx; + int printed_entry_count; + + cnt_sleeping = 0; + idx = 0; + printed_entry_count = 0; + + fp_entry_cpy = fp_malloc(sizeof(struct fail_point_entry) * + (FP_MAX_ENTRY_COUNT + 1), M_WAITOK); + + fp_setting = fail_point_setting_get_ref(fp); - FP_LOCK(); + if (fp_setting != NULL) { + TAILQ_FOREACH(ent, &fp_setting->fp_entry_queue, fe_entries) { + if (ent->fe_stale) + continue; - TAILQ_FOREACH(ent, &fp->fp_entries, fe_entries) { + KASSERT(printed_entry_count < FP_MAX_ENTRY_COUNT, + ("FP entry list larger than allowed")); + + fp_entry_cpy[printed_entry_count] = *ent; + ++printed_entry_count; + } + } + fail_point_setting_release_ref(fp); + + /* This is our equivalent of a NULL terminator */ + fp_entry_cpy[printed_entry_count].fe_type = FAIL_POINT_INVALID; + + while (idx < printed_entry_count) { + ent = &fp_entry_cpy[idx]; + ++idx; if (ent->fe_prob < PROB_MAX) { int decimal = ent->fe_prob % (PROB_MAX / 100); int units = ent->fe_prob / (PROB_MAX / 100); @@ -313,7 +695,7 @@ fail_point_get(struct fail_point *fp, struct sbuf *sb) } sbuf_printf(sb, "%%"); } - if (ent->fe_count > 0) + if (ent->fe_count >= 0) sbuf_printf(sb, "%d*", ent->fe_count); sbuf_printf(sb, "%s", fail_type_strings[ent->fe_type].name); if (ent->fe_arg) @@ -323,10 +705,33 @@ fail_point_get(struct fail_point *fp, struct sbuf *sb) if (TAILQ_NEXT(ent, fe_entries)) sbuf_printf(sb, "->"); } - if (TAILQ_EMPTY(&fp->fp_entries)) + if (!printed_entry_count) sbuf_printf(sb, "off"); - FP_UNLOCK(); + fp_free(fp_entry_cpy); + if (verbose) { +#ifdef STACK + /* Print number of sleeping threads. queue=0 is the argument + * used by msleep when sending our threads to sleep. */ + sbuf_printf(sb, "\nsleeping_thread_stacks = {\n"); + sleepq_sbuf_print_stacks(sb, FP_SLEEP_CHANNEL(fp), 0, + &cnt_sleeping); + + sbuf_printf(sb, "},\n"); +#endif + sbuf_printf(sb, "sleeping_thread_count = %d,\n", + cnt_sleeping); + +#ifdef STACK + sbuf_printf(sb, "paused_thread_stacks = {\n"); + sleepq_sbuf_print_stacks(sb, FP_PAUSE_CHANNEL(fp), 0, + &cnt_sleeping); + + sbuf_printf(sb, "},\n"); +#endif + sbuf_printf(sb, "paused_thread_count = %d\n", + cnt_sleeping); + } } /** @@ -336,38 +741,91 @@ fail_point_get(struct fail_point *fp, struct sbuf *sb) static int fail_point_set(struct fail_point *fp, char *buf) { - int error = 0; struct fail_point_entry *ent, *ent_next; - struct fail_point_entries new_entries; + struct fail_point_setting *entries; + bool should_wake_paused; + bool should_truncate; + int error; + + error = 0; + should_wake_paused = false; + should_truncate = false; /* Parse new entries. */ - TAILQ_INIT(&new_entries); - if (!parse_fail_point(&new_entries, buf)) { - clear_entries(&new_entries); + /** + * ref protects our new malloc'd stuff from being garbage collected + * before we link it. + */ + fail_point_setting_get_ref(fp); + entries = fail_point_setting_new(fp); + if (parse_fail_point(entries, buf) == NULL) { + STAILQ_REMOVE(&fp_setting_garbage, entries, + fail_point_setting, fs_garbage_link); + fail_point_setting_destroy(entries); error = EINVAL; goto end; } - FP_LOCK(); - - /* Move new entries in. */ - TAILQ_SWAP(&fp->fp_entries, &new_entries, fail_point_entry, fe_entries); - clear_entries(&new_entries); + /** + * Transfer the entries we are going to keep to a new list. + * Get rid of useless zero probability entries, and entries with hit + * count 0. + * If 'off' is present, and it has no hit count set, then all entries + * after it are discarded since they are unreachable. + */ + TAILQ_FOREACH_SAFE(ent, &entries->fp_entry_queue, fe_entries, ent_next) { + if (ent->fe_prob == 0 || ent->fe_count == 0) { + printf("Discarding entry which cannot execute %s\n", + fail_type_strings[ent->fe_type].name); + TAILQ_REMOVE(&entries->fp_entry_queue, ent, + fe_entries); + fp_free(ent); + continue; + } else if (should_truncate) { + printf("Discarding unreachable entry %s\n", + fail_type_strings[ent->fe_type].name); + TAILQ_REMOVE(&entries->fp_entry_queue, ent, + fe_entries); + fp_free(ent); + continue; + } - /* Get rid of useless zero probability entries. */ - TAILQ_FOREACH_SAFE(ent, &fp->fp_entries, fe_entries, ent_next) { - if (ent->fe_prob == 0) - free_entry(&fp->fp_entries, ent); + if (ent->fe_type == FAIL_POINT_OFF) { + should_wake_paused = true; + if (ent->fe_count == FE_COUNT_UNTRACKED) { + should_truncate = true; + TAILQ_REMOVE(&entries->fp_entry_queue, ent, + fe_entries); + fp_free(ent); + } + } else if (ent->fe_type == FAIL_POINT_PAUSE) { + should_truncate = true; + } else if (ent->fe_type == FAIL_POINT_SLEEP && (fp->fp_flags & + FAIL_POINT_NONSLEEPABLE)) { + /** + * If this fail point is annotated as being in a + * non-sleepable ctx, convert sleep to delay and + * convert the msec argument to usecs. + */ + printf("Sleep call request on fail point in " + "non-sleepable context; using delay instead " + "of sleep\n"); + ent->fe_type = FAIL_POINT_DELAY; + ent->fe_arg *= 1000; + } } - /* Get rid of "off"s at the end. */ - while ((ent = TAILQ_LAST(&fp->fp_entries, fail_point_entries)) && - ent->fe_type == FAIL_POINT_OFF) - free_entry(&fp->fp_entries, ent); - - FP_UNLOCK(); + if (TAILQ_EMPTY(&entries->fp_entry_queue)) { + entries = fail_point_swap_settings(fp, NULL); + if (entries != NULL) + wakeup(FP_PAUSE_CHANNEL(fp)); + } else { + if (should_wake_paused) + wakeup(FP_PAUSE_CHANNEL(fp)); + fail_point_swap_settings(fp, entries); + } - end: +end: #ifdef IWARNING if (error) IWARNING("Failed to set %s %s to %s", @@ -377,6 +835,7 @@ fail_point_set(struct fail_point *fp, char *buf) fp->fp_name, fp->fp_location, buf); #endif /* IWARNING */ + fail_point_setting_release_ref(fp); return (error); } @@ -385,25 +844,33 @@ fail_point_set(struct fail_point *fp, char *buf) /** * Handle kernel failpoint set/get. */ + int fail_point_sysctl(SYSCTL_HANDLER_ARGS) { - struct fail_point *fp = arg1; - char *buf = NULL; + struct fail_point *fp; + char *buf; + struct sbuf *sb_check; struct sbuf sb; int error; - /* Retrieving */ - sbuf_new(&sb, NULL, 128, SBUF_AUTOEXTEND | SBUF_INCLUDENUL); - fail_point_get(fp, &sb); - sbuf_trim(&sb); - error = sbuf_finish(&sb); - if (error == 0) - error = SYSCTL_OUT(req, sbuf_data(&sb), sbuf_len(&sb)); - sbuf_delete(&sb); + error = 0; + fp = arg1; + buf = NULL; + + sb_check = sbuf_new(&sb, NULL, 1024, SBUF_AUTOEXTEND); + if (sb_check != &sb) + return (ENOMEM); + + sbuf_set_drain(&sb, (sbuf_drain_func *)fail_sysctl_drain_func, req); /* Setting */ - if (!error && req->newptr) { + /** + * Lock protects any new entries from being garbage collected before we + * can link them to the fail point. + */ + sx_xlock(&sx_fp_set); + if (req->newptr) { if (req->newlen > MAX_FAIL_POINT_BUF) { error = EINVAL; goto out; @@ -417,31 +884,95 @@ fail_point_sysctl(SYSCTL_HANDLER_ARGS) buf[req->newlen] = '\0'; error = fail_point_set(fp, buf); - } + } + + fail_point_garbage_collect(); + sx_xunlock(&sx_fp_set); + + /* Retrieving. */ + fail_point_get(fp, &sb, false); out: - fp_free(buf); + sbuf_finish(&sb); + sbuf_delete(&sb); + + if (buf) + fp_free(buf); + return (error); } +int +fail_point_sysctl_status(SYSCTL_HANDLER_ARGS) +{ + struct fail_point *fp; + struct sbuf sb, *sb_check; + + fp = arg1; + + sb_check = sbuf_new(&sb, NULL, 1024, SBUF_AUTOEXTEND); + if (sb_check != &sb) + return (ENOMEM); + + sbuf_set_drain(&sb, (sbuf_drain_func *)fail_sysctl_drain_func, req); + + /* Retrieving. */ + fail_point_get(fp, &sb, true); + + sbuf_finish(&sb); + sbuf_delete(&sb); + + /** + * Lock protects any new entries from being garbage collected before we + * can link them to the fail point. + */ + sx_xlock(&sx_fp_set); + fail_point_garbage_collect(); + sx_xunlock(&sx_fp_set); + + return (0); +} + +int +fail_sysctl_drain_func(void *sysctl_args, const char *buf, int len) +{ + struct sysctl_req *sa; + int error; + + sa = sysctl_args; + + error = SYSCTL_OUT(sa, buf, len); + + if (error == ENOMEM) + return (-1); + else + return (len); +} + + /** * Internal helper function to translate a human-readable failpoint string * into a internally-parsable fail_point structure. */ static char * -parse_fail_point(struct fail_point_entries *ents, char *p) +parse_fail_point(struct fail_point_setting *ents, char *p) { /* <fail_point> :: * <term> ( "->" <term> )* */ + uint8_t term_count; + + term_count = 1; + p = parse_term(ents, p); if (p == NULL) return (NULL); + while (*p != '\0') { - if (p[0] != '-' || p[1] != '>') - return (NULL); - p = parse_term(ents, p + 2); - if (p == NULL) + term_count++; + if (p[0] != '-' || p[1] != '>' || + (p = parse_term(ents, p+2)) == NULL || + term_count > FP_MAX_ENTRY_COUNT) return (NULL); } return (p); @@ -451,14 +982,11 @@ parse_fail_point(struct fail_point_entries *ents, char *p) * Internal helper function to parse an individual term from a failpoint. */ static char * -parse_term(struct fail_point_entries *ents, char *p) +parse_term(struct fail_point_setting *ents, char *p) { struct fail_point_entry *ent; - ent = fp_malloc(sizeof *ent, M_WAITOK | M_ZERO); - ent->fe_prob = PROB_MAX; - ent->fe_pid = NO_PID; - TAILQ_INSERT_TAIL(ents, ent, fe_entries); + ent = fail_point_entry_new(ents); /* * <term> :: @@ -483,7 +1011,7 @@ parse_term(struct fail_point_entries *ents, char *p) if (ent->fe_prob > PROB_MAX) ent->fe_prob = PROB_MAX; } else if (*p == '*') { - if (!units || decimal) + if (!units || units < 0 || decimal) return (NULL); ent->fe_count = units; } else @@ -500,7 +1028,7 @@ parse_term(struct fail_point_entries *ents, char *p) /* [ "(" <integer> ")" ] */ if (*p != '(') - return p; + return (p); p++; if (!isdigit(*p) && *p != '-') return (NULL); @@ -509,7 +1037,7 @@ parse_term(struct fail_point_entries *ents, char *p) return (NULL); /* [ "[pid " <integer> "]" ] */ -#define PID_STRING "[pid " +#define PID_STRING "[pid " if (strncmp(p, PID_STRING, sizeof(PID_STRING) - 1) != 0) return (p); p += sizeof(PID_STRING) - 1; @@ -530,7 +1058,7 @@ parse_number(int *out_units, int *out_decimal, char *p) { char *old_p; - /* + /** * <number> :: * <integer> [ "." <integer> ] | * "." <integer> @@ -584,29 +1112,17 @@ parse_type(struct fail_point_entry *ent, char *beg) return (NULL); } -/** - * Internal helper function to free an individual failpoint term. - */ -static void -free_entry(struct fail_point_entries *ents, struct fail_point_entry *ent) -{ - TAILQ_REMOVE(ents, ent, fe_entries); - fp_free(ent); -} +/* The fail point sysctl tree. */ +SYSCTL_NODE(_debug, OID_AUTO, fail_point, CTLFLAG_RW, 0, "fail points"); -/** - * Internal helper function to clear out all failpoint terms for a single - * failpoint. - */ -static void -clear_entries(struct fail_point_entries *ents) +/* Debugging/testing stuff for fail point */ +static int +sysctl_test_fail_point(SYSCTL_HANDLER_ARGS) { - struct fail_point_entry *ent, *ent_next; - TAILQ_FOREACH_SAFE(ent, ents, fe_entries, ent_next) - fp_free(ent); - TAILQ_INIT(ents); + KFAIL_POINT_RETURN(DEBUG_FP, test_fail_point); + return (0); } - -/* The fail point sysctl tree. */ -SYSCTL_NODE(_debug, OID_AUTO, fail_point, CTLFLAG_RW, 0, "fail points"); +SYSCTL_OID(_debug_fail_point, OID_AUTO, test_trigger_fail_point, + CTLTYPE_STRING | CTLFLAG_RD, NULL, 0, sysctl_test_fail_point, "A", + "Trigger test fail points"); |