summaryrefslogtreecommitdiffstats
path: root/sbin
diff options
context:
space:
mode:
authorpjd <pjd@FreeBSD.org>2011-03-21 15:29:20 +0000
committerpjd <pjd@FreeBSD.org>2011-03-21 15:29:20 +0000
commit8270eb5240b7602d56905f048c82db0de72b981e (patch)
treec49bb2fb01ab4cee1175bcaa88558ffa453feaab /sbin
parent7bae0e70986bbe0482de73a4b62d4ccb49c2950c (diff)
downloadFreeBSD-src-8270eb5240b7602d56905f048c82db0de72b981e.zip
FreeBSD-src-8270eb5240b7602d56905f048c82db0de72b981e.tar.gz
Before handling any events on descriptors check signals so we can update
our info about worker processes if any of them was terminated in the meantime. This fixes the problem with 'hastctl status' running from a hook called on split-brain: 1. Secondary calls a hooks and terminates. 2. Hook asks for resource status via 'hastctl status'. 3. The main hastd handles the status request by sending it to the secondary worker who is already dead, but because signals weren't checked yet he doesn't know that and we get EPIPE. MFC after: 1 week
Diffstat (limited to 'sbin')
-rw-r--r--sbin/hastd/hastd.c71
1 files changed, 43 insertions, 28 deletions
diff --git a/sbin/hastd/hastd.c b/sbin/hastd/hastd.c
index c09c92e..3f09cc7 100644
--- a/sbin/hastd/hastd.c
+++ b/sbin/hastd/hastd.c
@@ -884,19 +884,12 @@ out:
}
static void
-main_loop(void)
+check_signals(void)
{
- struct hast_resource *res;
- struct timeval seltimeout;
struct timespec sigtimeout;
- int fd, maxfd, ret, signo;
- time_t lastcheck, now;
sigset_t mask;
- fd_set rfds;
+ int signo;
- lastcheck = time(NULL);
- seltimeout.tv_sec = REPORT_INTERVAL;
- seltimeout.tv_usec = 0;
sigtimeout.tv_sec = 0;
sigtimeout.tv_nsec = 0;
@@ -906,29 +899,45 @@ main_loop(void)
PJDLOG_VERIFY(sigaddset(&mask, SIGTERM) == 0);
PJDLOG_VERIFY(sigaddset(&mask, SIGCHLD) == 0);
+ while ((signo = sigtimedwait(&mask, NULL, &sigtimeout)) != -1) {
+ switch (signo) {
+ case SIGINT:
+ case SIGTERM:
+ sigexit_received = true;
+ terminate_workers();
+ proto_close(cfg->hc_controlconn);
+ exit(EX_OK);
+ break;
+ case SIGCHLD:
+ child_exit();
+ break;
+ case SIGHUP:
+ hastd_reload();
+ break;
+ default:
+ PJDLOG_ABORT("Unexpected signal (%d).", signo);
+ }
+ }
+}
+
+static void
+main_loop(void)
+{
+ struct hast_resource *res;
+ struct timeval seltimeout;
+ int fd, maxfd, ret;
+ time_t lastcheck, now;
+ fd_set rfds;
+
+ lastcheck = time(NULL);
+ seltimeout.tv_sec = REPORT_INTERVAL;
+ seltimeout.tv_usec = 0;
+
pjdlog_info("Started successfully, running protocol version %d.",
HAST_PROTO_VERSION);
for (;;) {
- while ((signo = sigtimedwait(&mask, NULL, &sigtimeout)) != -1) {
- switch (signo) {
- case SIGINT:
- case SIGTERM:
- sigexit_received = true;
- terminate_workers();
- proto_close(cfg->hc_controlconn);
- exit(EX_OK);
- break;
- case SIGCHLD:
- child_exit();
- break;
- case SIGHUP:
- hastd_reload();
- break;
- default:
- PJDLOG_ABORT("Unexpected signal (%d).", signo);
- }
- }
+ check_signals();
/* Setup descriptors for select(2). */
FD_ZERO(&rfds);
@@ -976,6 +985,12 @@ main_loop(void)
pjdlog_exit(EX_OSERR, "select() failed");
}
+ /*
+ * Check for signals before we do anything to update our
+ * info about terminated workers in the meantime.
+ */
+ check_signals();
+
if (FD_ISSET(proto_descriptor(cfg->hc_controlconn), &rfds))
control_handle(cfg);
if (FD_ISSET(proto_descriptor(cfg->hc_listenconn), &rfds))
OpenPOWER on IntegriCloud