diff options
author | sjg <sjg@FreeBSD.org> | 2013-09-05 20:18:59 +0000 |
---|---|---|
committer | sjg <sjg@FreeBSD.org> | 2013-09-05 20:18:59 +0000 |
commit | 62bb1062226d3ce6a2350808256a25508978352d (patch) | |
tree | 22b131dceb13c3df96da594fbaadb693504797c7 /usr.sbin/watchdogd | |
parent | 72ab90509b3a51ab361bf710338f2ef44a4e360d (diff) | |
parent | 04932445481c2cb89ff69a83b961bdef3d64757e (diff) | |
download | FreeBSD-src-62bb1062226d3ce6a2350808256a25508978352d.zip FreeBSD-src-62bb1062226d3ce6a2350808256a25508978352d.tar.gz |
Merge from head
Diffstat (limited to 'usr.sbin/watchdogd')
-rw-r--r-- | usr.sbin/watchdogd/watchdogd.8 | 94 | ||||
-rw-r--r-- | usr.sbin/watchdogd/watchdogd.c | 209 |
2 files changed, 279 insertions, 24 deletions
diff --git a/usr.sbin/watchdogd/watchdogd.8 b/usr.sbin/watchdogd/watchdogd.8 index b8a5505..6176a20 100644 --- a/usr.sbin/watchdogd/watchdogd.8 +++ b/usr.sbin/watchdogd/watchdogd.8 @@ -27,7 +27,7 @@ .\" .\" $FreeBSD$ .\" -.Dd March 5, 2013 +.Dd July 27, 2013 .Dt WATCHDOGD 8 .Os .Sh NAME @@ -115,7 +115,7 @@ will terminate. The .Nm utility recognizes the following runtime options: -.Bl -tag -width ".Fl -softtimeout-action Ar action " +.Bl -tag -width 30m .It Fl I Ar file Write the process ID of the .Nm @@ -208,6 +208,96 @@ device for .Bl -tag -width ".Pa /var/run/watchdogd.pid" -compact .It Pa /var/run/watchdogd.pid .El +.Sh EXAMPLES +.Ss Debugging watchdogd and/or your watchdog script. +This is a useful recipe for debugging +.Nm +and your watchdog script. +.Pp +(Note that ^C works oddly because +.Nm +calls +.Xr system 3 +so the +first ^C will terminate the "sleep" command.) +.Pp +Explanation of options used: +.Bl -enum -offset indent -compact +.It +Set Debug on (--debug) +.It +Set the watchdog to trip at 30 seconds. (-t 30) +.It +Use of a softtimeout: +.Bl -enum -offset indent -compact -nested +.It +Use a softtimeout (do not arm the hardware watchdog). +(--softtimeout) +.It +Set the softtimeout action to do both kernel +.Xr printf 9 +and +.Xr log 9 +when it trips. +(--softtimeout-action log,printf) +.El +.It +Use of a pre-timeout: +.Bl -enum -offset indent -compact -nested +.It +Set a pre-timeout of 15 seconds (this will later trigger a panic/dump). +(--pretimeout 15) +.It +Set the action to also kernel +.Xr printf 9 +and +.Xr log 9 +when it trips. +(--pretimeout-action log,printf) +.El +.It +Use of a script: +.Bl -enum -offset indent -compact -nested +.It +Run "sleep 60" as a shell command that acts as the watchdog (-e 'sleep 60') +.It +Warn us when the script takes longer than 1 second to run (-w) +.El +.El +.Bd -literal +watchdogd --debug -t 30 \\ + --softtimeout --softtimeout-action log,printf \\ + --pretimeout 15 --pretimeout-action log,printf \\ + -e 'sleep 60' -w +.Ed +.Ss Production use of example +.Bl -enum -offset indent -compact +.It +Set hard timeout to 120 seconds (-t 120) +.It +Set a panic to happen at 60 seconds (to trigger a +.Xr crash 8 +for dump analysis): +.Bl -enum -offset indent -compact -nested +.It +Use of pre-timeout (--pretimeout 60) +.It +Specify pre-timeout action (--pretimeout-action log,printf,panic ) +.El +.It +Use of a script: +.Bl -enum -offset indent -compact -nested +.It +Run your script (-e '/path/to/your/script 60') +.It +Log if your script takes a longer than 15 seconds to run time. (-w -T 15) +.El +.El +.Bd -literal +watchdogd -t 120 \\ + --pretimeout 60 --pretimeout-action log,printf,panic \\ + -e '/path/to/your/script 60' -w -T 15 +.Ed .Sh SEE ALSO .Xr watchdog 4 , .Xr watchdog 8 , diff --git a/usr.sbin/watchdogd/watchdogd.c b/usr.sbin/watchdogd/watchdogd.c index 5416751..5fd16f5 100644 --- a/usr.sbin/watchdogd/watchdogd.c +++ b/usr.sbin/watchdogd/watchdogd.c @@ -39,6 +39,7 @@ __FBSDID("$FreeBSD$"); #include <sys/rtprio.h> #include <sys/stat.h> #include <sys/time.h> +#include <sys/sysctl.h> #include <sys/watchdog.h> #include <err.h> @@ -49,6 +50,7 @@ __FBSDID("$FreeBSD$"); #include <paths.h> #include <signal.h> #include <stdio.h> +#include <stdint.h> #include <stdlib.h> #include <string.h> #include <strings.h> @@ -58,19 +60,25 @@ __FBSDID("$FreeBSD$"); #include <getopt.h> +static long fetchtimeout(int opt, + const char *longopt, const char *myoptarg, int zero_ok); static void parseargs(int, char *[]); +static int seconds_to_pow2ns(int); static void sighandler(int); static void watchdog_loop(void); static int watchdog_init(void); static int watchdog_onoff(int onoff); static int watchdog_patpat(u_int timeout); static void usage(void); +static int tstotv(struct timeval *tv, struct timespec *ts); +static int tvtohz(struct timeval *tv); static int debugging = 0; static int end_program = 0; static const char *pidfile = _PATH_VARRUN "watchdogd.pid"; static u_int timeout = WD_TO_128SEC; static u_int pretimeout = 0; +static u_int timeout_sec; static u_int passive = 0; static int is_daemon = 0; static int is_dry_run = 0; /* do not arm the watchdog, only @@ -183,6 +191,59 @@ main(int argc, char *argv[]) } } +static void +pow2ns_to_ts(int pow2ns, struct timespec *ts) +{ + uint64_t ns; + + ns = 1ULL << pow2ns; + ts->tv_sec = ns / 1000000000ULL; + ts->tv_nsec = ns % 1000000000ULL; +} + +/* + * Convert a timeout in seconds to N where 2^N nanoseconds is close to + * "seconds". + * + * The kernel expects the timeouts for watchdogs in "2^N nanosecond format". + */ +static u_int +parse_timeout_to_pow2ns(char opt, const char *longopt, const char *myoptarg) +{ + double a; + u_int rv; + struct timespec ts; + struct timeval tv; + int ticks; + char shortopt[] = "- "; + + if (!longopt) + shortopt[1] = opt; + + a = fetchtimeout(opt, longopt, myoptarg, 1); + + if (a == 0) + rv = WD_TO_NEVER; + else + rv = seconds_to_pow2ns(a); + pow2ns_to_ts(rv, &ts); + tstotv(&tv, &ts); + ticks = tvtohz(&tv); + if (debugging) { + printf("Timeout for %s%s " + "is 2^%d nanoseconds " + "(in: %s sec -> out: %jd sec %ld ns -> %d ticks)\n", + longopt ? "-" : "", longopt ? longopt : shortopt, + rv, + myoptarg, (intmax_t)ts.tv_sec, ts.tv_nsec, ticks); + } + if (ticks <= 0) { + errx(1, "Timeout for %s%s is too small, please choose a higher timeout.", longopt ? "-" : "", longopt ? longopt : shortopt); + } + + return (rv); +} + /* * Catch signals and begin shutdown process. */ @@ -427,7 +488,7 @@ usage(void) } static long -fetchtimeout(int opt, const char *longopt, const char *myoptarg) +fetchtimeout(int opt, const char *longopt, const char *myoptarg, int zero_ok) { const char *errstr; char *p; @@ -439,7 +500,7 @@ fetchtimeout(int opt, const char *longopt, const char *myoptarg) rv = strtol(myoptarg, &p, 0); if ((p != NULL && *p != '\0') || errno != 0) errstr = "is not a number"; - if (rv <= 0) + if (rv < 0 || (!zero_ok && rv == 0)) errstr = "must be greater than zero"; if (errstr) { if (longopt) @@ -513,6 +574,110 @@ timeout_act_str2int(const char *lopt, const char *acts) return rv; } +int +tstotv(struct timeval *tv, struct timespec *ts) +{ + + tv->tv_sec = ts->tv_sec; + tv->tv_usec = ts->tv_nsec / 1000; + return 0; +} + +/* + * Convert a timeval to a number of ticks. + * Mostly copied from the kernel. + */ +int +tvtohz(struct timeval *tv) +{ + register unsigned long ticks; + register long sec, usec; + int hz; + size_t hzsize; + int error; + int tick; + + hzsize = sizeof(hz); + + error = sysctlbyname("kern.hz", &hz, &hzsize, NULL, 0); + if (error) + err(1, "sysctlbyname kern.hz"); + + tick = 1000000 / hz; + + /* + * If the number of usecs in the whole seconds part of the time + * difference fits in a long, then the total number of usecs will + * fit in an unsigned long. Compute the total and convert it to + * ticks, rounding up and adding 1 to allow for the current tick + * to expire. Rounding also depends on unsigned long arithmetic + * to avoid overflow. + * + * Otherwise, if the number of ticks in the whole seconds part of + * the time difference fits in a long, then convert the parts to + * ticks separately and add, using similar rounding methods and + * overflow avoidance. This method would work in the previous + * case but it is slightly slower and assumes that hz is integral. + * + * Otherwise, round the time difference down to the maximum + * representable value. + * + * If ints have 32 bits, then the maximum value for any timeout in + * 10ms ticks is 248 days. + */ + sec = tv->tv_sec; + usec = tv->tv_usec; + if (usec < 0) { + sec--; + usec += 1000000; + } + if (sec < 0) { +#ifdef DIAGNOSTIC + if (usec > 0) { + sec++; + usec -= 1000000; + } + printf("tvotohz: negative time difference %ld sec %ld usec\n", + sec, usec); +#endif + ticks = 1; + } else if (sec <= LONG_MAX / 1000000) + ticks = (sec * 1000000 + (unsigned long)usec + (tick - 1)) + / tick + 1; + else if (sec <= LONG_MAX / hz) + ticks = sec * hz + + ((unsigned long)usec + (tick - 1)) / tick + 1; + else + ticks = LONG_MAX; + if (ticks > INT_MAX) + ticks = INT_MAX; + return ((int)ticks); +} + +static int +seconds_to_pow2ns(int seconds) +{ + uint64_t power; + uint64_t ns; + uint64_t shifted; + + if (seconds <= 0) + errx(1, "seconds %d < 0", seconds); + ns = ((uint64_t)seconds) * 1000000000ULL; + power = flsll(ns); + shifted = 1ULL << power; + if (shifted <= ns) { + power++; + } + if (debugging) { + printf("shifted %lld\n", (long long)shifted); + printf("seconds_to_pow2ns: seconds: %d, ns %lld, power %d\n", + seconds, (long long)ns, (int)power); + } + return (power); +} + + /* * Handle the few command line arguments supported. */ @@ -521,9 +686,7 @@ parseargs(int argc, char *argv[]) { int longindex; int c; - char *p; const char *lopt; - double a; /* * if we end with a 'd' aka 'watchdogd' then we are the daemon program, @@ -559,30 +722,21 @@ parseargs(int argc, char *argv[]) break; #endif case 's': - nap = fetchtimeout(c, NULL, optarg); + nap = fetchtimeout(c, NULL, optarg, 0); break; case 'S': do_syslog = 0; break; case 't': - p = NULL; - errno = 0; - a = strtod(optarg, &p); - if ((p != NULL && *p != '\0') || errno != 0) - errx(EX_USAGE, "-t argument is not a number"); - if (a < 0) - errx(EX_USAGE, "-t argument must be positive"); - - if (a == 0) - timeout = WD_TO_NEVER; - else - timeout = flsll(a * 1e9); - if (debugging) - printf("Timeout is 2^%d nanoseconds\n", - timeout); + timeout_sec = atoi(optarg); + timeout = parse_timeout_to_pow2ns(c, NULL, optarg); + if (debugging) + printf("Timeout is 2^%d nanoseconds\n", + timeout); break; case 'T': - carp_thresh_seconds = fetchtimeout(c, "NULL", optarg); + carp_thresh_seconds = + fetchtimeout(c, "NULL", optarg, 0); break; case 'w': do_timedog = 1; @@ -590,7 +744,7 @@ parseargs(int argc, char *argv[]) case 0: lopt = longopts[longindex].name; if (!strcmp(lopt, "pretimeout")) { - pretimeout = fetchtimeout(0, lopt, optarg); + pretimeout = fetchtimeout(0, lopt, optarg, 0); } else if (!strcmp(lopt, "pretimeout-action")) { pretimeout_act = timeout_act_str2int(lopt, optarg); @@ -618,4 +772,15 @@ parseargs(int argc, char *argv[]) errx(EX_USAGE, "extra arguments."); if (is_daemon && timeout < WD_TO_1SEC) errx(EX_USAGE, "-t argument is less than one second."); + if (pretimeout_set) { + struct timespec ts; + + pow2ns_to_ts(timeout, &ts); + if (pretimeout >= (uintmax_t)ts.tv_sec) { + errx(EX_USAGE, + "pretimeout (%d) >= timeout (%d -> %ld)\n" + "see manual section TIMEOUT RESOLUTION", + pretimeout, timeout_sec, (long)ts.tv_sec); + } + } } |