summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorpjd <pjd@FreeBSD.org>2010-08-05 19:16:31 +0000
committerpjd <pjd@FreeBSD.org>2010-08-05 19:16:31 +0000
commit7a18b662f13ec766be7a2d5d636b3f18d33b7c27 (patch)
tree0f8ede3b1b9ab2a4ab2dcbf5a6d8403abaff6620
parent0a7f48a833394993f0f1ded5d0e1e2509c59da0a (diff)
downloadFreeBSD-src-7a18b662f13ec766be7a2d5d636b3f18d33b7c27.zip
FreeBSD-src-7a18b662f13ec766be7a2d5d636b3f18d33b7c27.tar.gz
Implement configuration reload on SIGHUP. This includes:
- Load added resources. - Stop and forget removed resources. - Update modified resources in least intrusive way, ie. don't touch /dev/hast/<name> unless path to local component or provider name were modified. Obtained from: Wheel Systems Sp. z o.o. http://www.wheelsystems.com MFC after: 1 month
-rw-r--r--sbin/hastd/hastd.c208
-rw-r--r--sbin/hastd/hastd.h3
-rw-r--r--sbin/hastd/primary.c123
3 files changed, 322 insertions, 12 deletions
diff --git a/sbin/hastd/hastd.c b/sbin/hastd/hastd.c
index 6d5ef04..31fc3bf 100644
--- a/sbin/hastd/hastd.c
+++ b/sbin/hastd/hastd.c
@@ -1,5 +1,6 @@
/*-
* Copyright (c) 2009-2010 The FreeBSD Foundation
+ * Copyright (c) 2010 Pawel Jakub Dawidek <pjd@FreeBSD.org>
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
@@ -57,13 +58,13 @@ __FBSDID("$FreeBSD$");
#include "subr.h"
/* Path to configuration file. */
-static const char *cfgpath = HAST_CONFIG;
+const char *cfgpath = HAST_CONFIG;
/* Hastd configuration. */
static struct hastd_config *cfg;
/* Was SIGCHLD signal received? */
static bool sigchld_received = false;
/* Was SIGHUP signal received? */
-static bool sighup_received = false;
+bool sighup_received = false;
/* Was SIGINT or SIGTERM signal received? */
bool sigexit_received = false;
/* PID file handle. */
@@ -169,12 +170,203 @@ child_exit(void)
}
}
+static bool
+resource_needs_restart(const struct hast_resource *res0,
+ const struct hast_resource *res1)
+{
+
+ assert(strcmp(res0->hr_name, res1->hr_name) == 0);
+
+ if (strcmp(res0->hr_provname, res1->hr_provname) != 0)
+ return (true);
+ if (strcmp(res0->hr_localpath, res1->hr_localpath) != 0)
+ return (true);
+ if (res0->hr_role == HAST_ROLE_INIT ||
+ res0->hr_role == HAST_ROLE_SECONDARY) {
+ if (strcmp(res0->hr_remoteaddr, res1->hr_remoteaddr) != 0)
+ return (true);
+ if (res0->hr_replication != res1->hr_replication)
+ return (true);
+ if (res0->hr_timeout != res1->hr_timeout)
+ return (true);
+ }
+ return (false);
+}
+
+static bool
+resource_needs_reload(const struct hast_resource *res0,
+ const struct hast_resource *res1)
+{
+
+ assert(strcmp(res0->hr_name, res1->hr_name) == 0);
+ assert(strcmp(res0->hr_provname, res1->hr_provname) == 0);
+ assert(strcmp(res0->hr_localpath, res1->hr_localpath) == 0);
+
+ if (res0->hr_role != HAST_ROLE_PRIMARY)
+ return (false);
+
+ if (strcmp(res0->hr_remoteaddr, res1->hr_remoteaddr) != 0)
+ return (true);
+ if (res0->hr_replication != res1->hr_replication)
+ return (true);
+ if (res0->hr_timeout != res1->hr_timeout)
+ return (true);
+ return (false);
+}
+
static void
hastd_reload(void)
{
+ struct hastd_config *newcfg;
+ struct hast_resource *nres, *cres, *tres;
+ uint8_t role;
+
+ pjdlog_info("Reloading configuration...");
+
+ newcfg = yy_config_parse(cfgpath, false);
+ if (newcfg == NULL)
+ goto failed;
+
+ /*
+ * Check if control address has changed.
+ */
+ if (strcmp(cfg->hc_controladdr, newcfg->hc_controladdr) != 0) {
+ if (proto_server(newcfg->hc_controladdr,
+ &newcfg->hc_controlconn) < 0) {
+ pjdlog_errno(LOG_ERR,
+ "Unable to listen on control address %s",
+ newcfg->hc_controladdr);
+ goto failed;
+ }
+ }
+ /*
+ * Check if listen address has changed.
+ */
+ if (strcmp(cfg->hc_listenaddr, newcfg->hc_listenaddr) != 0) {
+ if (proto_server(newcfg->hc_listenaddr,
+ &newcfg->hc_listenconn) < 0) {
+ pjdlog_errno(LOG_ERR, "Unable to listen on address %s",
+ newcfg->hc_listenaddr);
+ goto failed;
+ }
+ }
+ /*
+ * Only when both control and listen sockets are successfully
+ * initialized switch them to new configuration.
+ */
+ if (newcfg->hc_controlconn != NULL) {
+ pjdlog_info("Control socket changed from %s to %s.",
+ cfg->hc_controladdr, newcfg->hc_controladdr);
+ proto_close(cfg->hc_controlconn);
+ cfg->hc_controlconn = newcfg->hc_controlconn;
+ newcfg->hc_controlconn = NULL;
+ strlcpy(cfg->hc_controladdr, newcfg->hc_controladdr,
+ sizeof(cfg->hc_controladdr));
+ }
+ if (newcfg->hc_listenconn != NULL) {
+ pjdlog_info("Listen socket changed from %s to %s.",
+ cfg->hc_listenaddr, newcfg->hc_listenaddr);
+ proto_close(cfg->hc_listenconn);
+ cfg->hc_listenconn = newcfg->hc_listenconn;
+ newcfg->hc_listenconn = NULL;
+ strlcpy(cfg->hc_listenaddr, newcfg->hc_listenaddr,
+ sizeof(cfg->hc_listenaddr));
+ }
- /* TODO */
- pjdlog_warning("Configuration reload is not implemented.");
+ /*
+ * Stop and remove resources that were removed from the configuration.
+ */
+ TAILQ_FOREACH_SAFE(cres, &cfg->hc_resources, hr_next, tres) {
+ TAILQ_FOREACH(nres, &newcfg->hc_resources, hr_next) {
+ if (strcmp(cres->hr_name, nres->hr_name) == 0)
+ break;
+ }
+ if (nres == NULL) {
+ control_set_role(cres, HAST_ROLE_INIT);
+ TAILQ_REMOVE(&cfg->hc_resources, cres, hr_next);
+ pjdlog_info("Resource %s removed.", cres->hr_name);
+ free(cres);
+ }
+ }
+ /*
+ * Move new resources to the current configuration.
+ */
+ TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) {
+ TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) {
+ if (strcmp(cres->hr_name, nres->hr_name) == 0)
+ break;
+ }
+ if (cres == NULL) {
+ TAILQ_REMOVE(&newcfg->hc_resources, nres, hr_next);
+ TAILQ_INSERT_TAIL(&cfg->hc_resources, nres, hr_next);
+ pjdlog_info("Resource %s added.", nres->hr_name);
+ }
+ }
+ /*
+ * Deal with modified resources.
+ * Depending on what has changed exactly we might want to perform
+ * different actions.
+ *
+ * We do full resource restart in the following situations:
+ * Resource role is INIT or SECONDARY.
+ * Resource role is PRIMARY and path to local component or provider
+ * name has changed.
+ * In case of PRIMARY, the worker process will be killed and restarted,
+ * which also means removing /dev/hast/<name> provider and
+ * recreating it.
+ *
+ * We do just reload (send SIGHUP to worker process) if we act as
+ * PRIMARY, but only remote address, replication mode and timeout
+ * has changed. For those, there is no need to restart worker process.
+ * If PRIMARY receives SIGHUP, it will reconnect if remote address or
+ * replication mode has changed or simply set new timeout if only
+ * timeout has changed.
+ */
+ TAILQ_FOREACH_SAFE(nres, &newcfg->hc_resources, hr_next, tres) {
+ TAILQ_FOREACH(cres, &cfg->hc_resources, hr_next) {
+ if (strcmp(cres->hr_name, nres->hr_name) == 0)
+ break;
+ }
+ assert(cres != NULL);
+ if (resource_needs_restart(cres, nres)) {
+ pjdlog_info("Resource %s configuration was modified, restarting it.",
+ cres->hr_name);
+ role = cres->hr_role;
+ control_set_role(cres, HAST_ROLE_INIT);
+ TAILQ_REMOVE(&cfg->hc_resources, cres, hr_next);
+ free(cres);
+ TAILQ_REMOVE(&newcfg->hc_resources, nres, hr_next);
+ TAILQ_INSERT_TAIL(&cfg->hc_resources, nres, hr_next);
+ control_set_role(nres, role);
+ } else if (resource_needs_reload(cres, nres)) {
+ pjdlog_info("Resource %s configuration was modified, reloading it.",
+ cres->hr_name);
+ strlcpy(cres->hr_remoteaddr, nres->hr_remoteaddr,
+ sizeof(cres->hr_remoteaddr));
+ cres->hr_replication = nres->hr_replication;
+ cres->hr_timeout = nres->hr_timeout;
+ if (cres->hr_workerpid != 0) {
+ if (kill(cres->hr_workerpid, SIGHUP) < 0) {
+ pjdlog_errno(LOG_WARNING,
+ "Unable to send SIGHUP to worker process %u",
+ (unsigned int)cres->hr_workerpid);
+ }
+ }
+ }
+ }
+
+ yy_config_free(newcfg);
+ pjdlog_info("Configuration reloaded successfully.");
+ return;
+failed:
+ if (newcfg != NULL) {
+ if (newcfg->hc_controlconn != NULL)
+ proto_close(newcfg->hc_controlconn);
+ if (newcfg->hc_listenconn != NULL)
+ proto_close(newcfg->hc_listenconn);
+ yy_config_free(newcfg);
+ }
+ pjdlog_warning("Configuration not reloaded.");
}
static void
@@ -402,10 +594,6 @@ main_loop(void)
fd_set rfds, wfds;
int cfd, lfd, maxfd, ret;
- cfd = proto_descriptor(cfg->hc_controlconn);
- lfd = proto_descriptor(cfg->hc_listenconn);
- maxfd = cfd > lfd ? cfd : lfd;
-
for (;;) {
if (sigchld_received) {
sigchld_received = false;
@@ -416,6 +604,10 @@ main_loop(void)
hastd_reload();
}
+ cfd = proto_descriptor(cfg->hc_controlconn);
+ lfd = proto_descriptor(cfg->hc_listenconn);
+ maxfd = cfd > lfd ? cfd : lfd;
+
/* Setup descriptors for select(2). */
FD_ZERO(&rfds);
FD_SET(cfd, &rfds);
diff --git a/sbin/hastd/hastd.h b/sbin/hastd/hastd.h
index 199de8c..12b384d 100644
--- a/sbin/hastd/hastd.h
+++ b/sbin/hastd/hastd.h
@@ -39,7 +39,8 @@
#include "hast.h"
-extern bool sigexit_received;
+extern const char *cfgpath;
+extern bool sigexit_received, sighup_received;
extern struct pidfh *pfh;
void hastd_primary(struct hast_resource *res);
diff --git a/sbin/hastd/primary.c b/sbin/hastd/primary.c
index faaa136..a5e5d6e 100644
--- a/sbin/hastd/primary.c
+++ b/sbin/hastd/primary.c
@@ -1,5 +1,6 @@
/*-
* Copyright (c) 2009 The FreeBSD Foundation
+ * Copyright (c) 2010 Pawel Jakub Dawidek <pjd@FreeBSD.org>
* All rights reserved.
*
* This software was developed by Pawel Jakub Dawidek under sponsorship from
@@ -65,6 +66,9 @@ __FBSDID("$FreeBSD$");
#include "subr.h"
#include "synch.h"
+/* The is only one remote component for now. */
+#define ISREMOTE(no) ((no) == 1)
+
struct hio {
/*
* Number of components we are still waiting for.
@@ -424,6 +428,7 @@ init_environment(struct hast_resource *res __unused)
*/
signal(SIGINT, sighandler);
signal(SIGTERM, sighandler);
+ signal(SIGHUP, sighandler);
}
static void
@@ -1713,6 +1718,9 @@ sighandler(int sig)
case SIGTERM:
sigexit_received = true;
break;
+ case SIGHUP:
+ sighup_received = true;
+ break;
default:
assert(!"invalid condition");
}
@@ -1726,6 +1734,114 @@ sighandler(int sig)
mtx_unlock(&hio_guard_lock);
}
+static void
+config_reload(void)
+{
+ struct hastd_config *newcfg;
+ struct hast_resource *res;
+ unsigned int ii, ncomps;
+ int modified;
+
+ pjdlog_info("Reloading configuration...");
+
+ ncomps = HAST_NCOMPONENTS;
+
+ newcfg = yy_config_parse(cfgpath, false);
+ if (newcfg == NULL)
+ goto failed;
+
+ TAILQ_FOREACH(res, &newcfg->hc_resources, hr_next) {
+ if (strcmp(res->hr_name, gres->hr_name) == 0)
+ break;
+ }
+ /*
+ * If resource was removed from the configuration file, resource
+ * name, provider name or path to local component was modified we
+ * shouldn't be here. This means that someone modified configuration
+ * file and send SIGHUP to us instead of main hastd process.
+ * Log advice and ignore the signal.
+ */
+ if (res == NULL || strcmp(gres->hr_name, res->hr_name) != 0 ||
+ strcmp(gres->hr_provname, res->hr_provname) != 0 ||
+ strcmp(gres->hr_localpath, res->hr_localpath) != 0) {
+ pjdlog_warning("To reload configuration send SIGHUP to the main hastd process (pid %u).",
+ (unsigned int)getppid());
+ goto failed;
+ }
+
+#define MODIFIED_REMOTEADDR 0x1
+#define MODIFIED_REPLICATION 0x2
+#define MODIFIED_TIMEOUT 0x4
+ modified = 0;
+ if (strcmp(gres->hr_remoteaddr, res->hr_remoteaddr) != 0) {
+ /*
+ * Don't copy res->hr_remoteaddr to gres just yet.
+ * We want remote_close() to log disconnect from the old
+ * addresses, not from the new ones.
+ */
+ modified |= MODIFIED_REMOTEADDR;
+ }
+ if (gres->hr_replication != res->hr_replication) {
+ gres->hr_replication = res->hr_replication;
+ modified |= MODIFIED_REPLICATION;
+ }
+ if (gres->hr_timeout != res->hr_timeout) {
+ gres->hr_timeout = res->hr_timeout;
+ modified |= MODIFIED_TIMEOUT;
+ }
+ /*
+ * If only timeout was modified we only need to change it without
+ * reconnecting.
+ */
+ if (modified == MODIFIED_TIMEOUT) {
+ for (ii = 0; ii < ncomps; ii++) {
+ if (!ISREMOTE(ii))
+ continue;
+ rw_rlock(&hio_remote_lock[ii]);
+ if (!ISCONNECTED(gres, ii)) {
+ rw_unlock(&hio_remote_lock[ii]);
+ continue;
+ }
+ rw_unlock(&hio_remote_lock[ii]);
+ if (proto_timeout(gres->hr_remotein,
+ gres->hr_timeout) < 0) {
+ pjdlog_errno(LOG_WARNING,
+ "Unable to set connection timeout");
+ }
+ if (proto_timeout(gres->hr_remoteout,
+ gres->hr_timeout) < 0) {
+ pjdlog_errno(LOG_WARNING,
+ "Unable to set connection timeout");
+ }
+ }
+ } else {
+ for (ii = 0; ii < ncomps; ii++) {
+ if (!ISREMOTE(ii))
+ continue;
+ remote_close(gres, ii);
+ }
+ if (modified & MODIFIED_REMOTEADDR) {
+ strlcpy(gres->hr_remoteaddr, res->hr_remoteaddr,
+ sizeof(gres->hr_remoteaddr));
+ }
+ }
+#undef MODIFIED_REMOTEADDR
+#undef MODIFIED_REPLICATION
+#undef MODIFIED_TIMEOUT
+
+ pjdlog_info("Configuration reloaded successfully.");
+ return;
+failed:
+ if (newcfg != NULL) {
+ if (newcfg->hc_controlconn != NULL)
+ proto_close(newcfg->hc_controlconn);
+ if (newcfg->hc_listenconn != NULL)
+ proto_close(newcfg->hc_listenconn);
+ yy_config_free(newcfg);
+ }
+ pjdlog_warning("Configuration not reloaded.");
+}
+
/*
* Thread guards remote connections and reconnects when needed, handles
* signals, etc.
@@ -1739,14 +1855,16 @@ guard_thread(void *arg)
int timeout;
ncomps = HAST_NCOMPONENTS;
- /* The is only one remote component for now. */
-#define ISREMOTE(no) ((no) == 1)
for (;;) {
if (sigexit_received) {
primary_exitx(EX_OK,
"Termination signal received, exiting.");
}
+ if (sighup_received) {
+ sighup_received = false;
+ config_reload();
+ }
/*
* If all the connection will be fine, we will sleep until
* someone wakes us up.
@@ -1810,7 +1928,6 @@ guard_thread(void *arg)
(void)cv_timedwait(&hio_guard_cond, &hio_guard_lock, timeout);
mtx_unlock(&hio_guard_lock);
}
-#undef ISREMOTE
/* NOTREACHED */
return (NULL);
}
OpenPOWER on IntegriCloud