summaryrefslogtreecommitdiffstats
path: root/sbin
diff options
context:
space:
mode:
authorpjd <pjd@FreeBSD.org>2010-08-30 23:26:10 +0000
committerpjd <pjd@FreeBSD.org>2010-08-30 23:26:10 +0000
commit9b4ae63e7877da8b244dc4d6076cc57bc2f0ef20 (patch)
tree245f35c3193bdea89cf47c51960dd52a809d5f8e /sbin
parent8a7b72b9d3f5a419b8558fe232480453f6c4c070 (diff)
downloadFreeBSD-src-9b4ae63e7877da8b244dc4d6076cc57bc2f0ef20.zip
FreeBSD-src-9b4ae63e7877da8b244dc4d6076cc57bc2f0ef20.tar.gz
Because it is very hard to make fork(2) from threaded process safe (we are
limited to async-signal safe functions in the child process), move all hooks execution to the main (non-threaded) process. Do it by maintaining connection (socketpair) between child and parent and sending events from the child to parent, so it can execute the hook. This is step in right direction for others reasons too. For example there is one less problem to drop privs in worker processes. MFC after: 2 weeks Obtained from: Wheel Systems Sp. z o.o. http://www.wheelsystems.com
Diffstat (limited to 'sbin')
-rw-r--r--sbin/hastd/Makefile2
-rw-r--r--sbin/hastd/hast.h2
-rw-r--r--sbin/hastd/hastd.c44
-rw-r--r--sbin/hastd/primary.c39
-rw-r--r--sbin/hastd/secondary.c21
5 files changed, 74 insertions, 34 deletions
diff --git a/sbin/hastd/Makefile b/sbin/hastd/Makefile
index bfb62f4..6e3147d 100644
--- a/sbin/hastd/Makefile
+++ b/sbin/hastd/Makefile
@@ -5,7 +5,7 @@
PROG= hastd
SRCS= activemap.c
SRCS+= control.c
-SRCS+= ebuf.c
+SRCS+= ebuf.c event.c
SRCS+= hast_proto.c hastd.c hooks.c
SRCS+= metadata.c
SRCS+= nv.c
diff --git a/sbin/hastd/hast.h b/sbin/hastd/hast.h
index 7137917..a42865f 100644
--- a/sbin/hastd/hast.h
+++ b/sbin/hastd/hast.h
@@ -181,6 +181,8 @@ struct hast_resource {
pid_t hr_workerpid;
/* Control connection between parent and child. */
struct proto_conn *hr_ctrl;
+ /* Events from child to parent. */
+ struct proto_conn *hr_event;
/* Activemap structure. */
struct activemap *hr_amp;
diff --git a/sbin/hastd/hastd.c b/sbin/hastd/hastd.c
index ccc25cf..6ddcbd2 100644
--- a/sbin/hastd/hastd.c
+++ b/sbin/hastd/hastd.c
@@ -52,6 +52,7 @@ __FBSDID("$FreeBSD$");
#include <pjdlog.h>
#include "control.h"
+#include "event.h"
#include "hast.h"
#include "hast_proto.h"
#include "hastd.h"
@@ -158,6 +159,11 @@ child_exit(void)
role2str(res->hr_role));
child_exit_log(pid, status);
proto_close(res->hr_ctrl);
+ res->hr_ctrl = NULL;
+ if (res->hr_event != NULL) {
+ proto_close(res->hr_event);
+ res->hr_event = NULL;
+ }
res->hr_workerpid = 0;
if (res->hr_role == HAST_ROLE_PRIMARY) {
/*
@@ -624,9 +630,10 @@ close:
static void
main_loop(void)
{
- fd_set rfds;
- int cfd, lfd, maxfd, ret;
+ struct hast_resource *res;
struct timeval timeout;
+ int fd, maxfd, ret;
+ fd_set rfds;
timeout.tv_sec = REPORT_INTERVAL;
timeout.tv_usec = 0;
@@ -646,14 +653,20 @@ main_loop(void)
hastd_reload();
}
- cfd = proto_descriptor(cfg->hc_controlconn);
- lfd = proto_descriptor(cfg->hc_listenconn);
- maxfd = cfd > lfd ? cfd : lfd;
-
/* Setup descriptors for select(2). */
FD_ZERO(&rfds);
- FD_SET(cfd, &rfds);
- FD_SET(lfd, &rfds);
+ maxfd = fd = proto_descriptor(cfg->hc_controlconn);
+ FD_SET(fd, &rfds);
+ fd = proto_descriptor(cfg->hc_listenconn);
+ FD_SET(fd, &rfds);
+ maxfd = fd > maxfd ? fd : maxfd;
+ TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
+ if (res->hr_event == NULL)
+ continue;
+ fd = proto_descriptor(res->hr_event);
+ FD_SET(fd, &rfds);
+ maxfd = fd > maxfd ? fd : maxfd;
+ }
ret = select(maxfd + 1, &rfds, NULL, NULL, &timeout);
if (ret == 0)
@@ -665,10 +678,21 @@ main_loop(void)
pjdlog_exit(EX_OSERR, "select() failed");
}
- if (FD_ISSET(cfd, &rfds))
+ if (FD_ISSET(proto_descriptor(cfg->hc_controlconn), &rfds))
control_handle(cfg);
- if (FD_ISSET(lfd, &rfds))
+ if (FD_ISSET(proto_descriptor(cfg->hc_listenconn), &rfds))
listen_accept();
+ TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
+ if (res->hr_event == NULL)
+ continue;
+ if (FD_ISSET(proto_descriptor(res->hr_event), &rfds)) {
+ if (event_recv(res) == 0)
+ continue;
+ /* The worker process exited? */
+ proto_close(res->hr_event);
+ res->hr_event = NULL;
+ }
+ }
}
}
diff --git a/sbin/hastd/primary.c b/sbin/hastd/primary.c
index e018e89..a8a2d41 100644
--- a/sbin/hastd/primary.c
+++ b/sbin/hastd/primary.c
@@ -58,6 +58,7 @@ __FBSDID("$FreeBSD$");
#include <rangelock.h>
#include "control.h"
+#include "event.h"
#include "hast.h"
#include "hast_proto.h"
#include "hastd.h"
@@ -226,12 +227,6 @@ static void *sync_thread(void *arg);
static void *guard_thread(void *arg);
static void
-dummy_sighandler(int sig __unused)
-{
- /* Nothing to do. */
-}
-
-static void
cleanup(struct hast_resource *res)
{
int rerrno;
@@ -429,8 +424,6 @@ init_environment(struct hast_resource *res __unused)
/*
* Turn on signals handling.
*/
- /* Because SIGCHLD is ignored by default, setup dummy handler for it. */
- PJDLOG_VERIFY(signal(SIGCHLD, dummy_sighandler) != SIG_ERR);
PJDLOG_VERIFY(sigfillset(&mask) == 0);
PJDLOG_VERIFY(sigprocmask(SIG_SETMASK, &mask, NULL) == 0);
}
@@ -672,11 +665,11 @@ init_remote(struct hast_resource *res, struct proto_conn **inp,
res->hr_remotein = in;
res->hr_remoteout = out;
}
- hook_exec(res->hr_exec, "connect", res->hr_name, NULL);
+ event_send(res, EVENT_CONNECT);
return (true);
close:
if (errmsg != NULL && strcmp(errmsg, "Split-brain condition!") == 0)
- hook_exec(res->hr_exec, "split-brain", res->hr_name, NULL);
+ event_send(res, EVENT_SPLITBRAIN);
proto_close(out);
if (in != NULL)
proto_close(in);
@@ -774,6 +767,14 @@ hastd_primary(struct hast_resource *res)
pjdlog_exit(EX_OSERR,
"Unable to create control sockets between parent and child");
}
+ /*
+ * Create communication channel between child and parent.
+ */
+ if (proto_client("socketpair://", &res->hr_event) < 0) {
+ KEEP_ERRNO((void)pidfile_remove(pfh));
+ pjdlog_exit(EX_OSERR,
+ "Unable to create event sockets between child and parent");
+ }
pid = fork();
if (pid < 0) {
@@ -783,6 +784,8 @@ hastd_primary(struct hast_resource *res)
if (pid > 0) {
/* This is parent. */
+ /* Declare that we are receiver. */
+ proto_recv(res->hr_event, NULL, 0);
res->hr_workerpid = pid;
return;
}
@@ -797,7 +800,9 @@ hastd_primary(struct hast_resource *res)
signal(SIGHUP, SIG_DFL);
signal(SIGCHLD, SIG_DFL);
- hook_init();
+ /* Declare that we are sender. */
+ proto_send(res->hr_event, NULL, 0);
+
init_local(res);
if (real_remote(res) && init_remote(res, NULL, NULL))
sync_start();
@@ -896,7 +901,7 @@ remote_close(struct hast_resource *res, int ncomp)
*/
sync_stop();
- hook_exec(res->hr_exec, "disconnect", res->hr_name, NULL);
+ event_send(res, EVENT_DISCONNECT);
}
/*
@@ -1512,7 +1517,7 @@ sync_thread(void *arg __unused)
pjdlog_info("Synchronization interrupted. "
"%jd bytes synchronized so far.",
(intmax_t)synced);
- hook_exec(res->hr_exec, "syncintr", res->hr_name, NULL);
+ event_send(res, EVENT_SYNCINTR);
}
while (!sync_inprogress) {
dorewind = true;
@@ -1545,8 +1550,7 @@ sync_thread(void *arg __unused)
pjdlog_info("Synchronization started. %ju bytes to go.",
(uintmax_t)(res->hr_extentsize *
activemap_ndirty(res->hr_amp)));
- hook_exec(res->hr_exec, "syncstart",
- res->hr_name, NULL);
+ event_send(res, EVENT_SYNCSTART);
}
}
if (offset < 0) {
@@ -1563,8 +1567,7 @@ sync_thread(void *arg __unused)
pjdlog_info("Synchronization complete. "
"%jd bytes synchronized.",
(intmax_t)synced);
- hook_exec(res->hr_exec, "syncdone",
- res->hr_name, NULL);
+ event_send(res, EVENT_SYNCDONE);
}
mtx_lock(&metadata_lock);
res->hr_syncsrc = HAST_SYNCSRC_UNDEF;
@@ -1950,7 +1953,6 @@ guard_thread(void *arg)
PJDLOG_VERIFY(sigaddset(&mask, SIGHUP) == 0);
PJDLOG_VERIFY(sigaddset(&mask, SIGINT) == 0);
PJDLOG_VERIFY(sigaddset(&mask, SIGTERM) == 0);
- PJDLOG_VERIFY(sigaddset(&mask, SIGCHLD) == 0);
timeout.tv_nsec = 0;
signo = -1;
@@ -1969,7 +1971,6 @@ guard_thread(void *arg)
default:
break;
}
- hook_check(signo == SIGCHLD);
pjdlog_debug(2, "remote_guard: Checking connections.");
now = time(NULL);
diff --git a/sbin/hastd/secondary.c b/sbin/hastd/secondary.c
index d1575e8..14016f7 100644
--- a/sbin/hastd/secondary.c
+++ b/sbin/hastd/secondary.c
@@ -54,6 +54,7 @@ __FBSDID("$FreeBSD$");
#include <pjdlog.h>
#include "control.h"
+#include "event.h"
#include "hast.h"
#include "hast_proto.h"
#include "hastd.h"
@@ -325,7 +326,7 @@ init_remote(struct hast_resource *res, struct nv *nvin)
if (res->hr_secondary_localcnt > res->hr_primary_remotecnt &&
res->hr_primary_localcnt > res->hr_secondary_remotecnt) {
/* Exit on split-brain. */
- hook_exec(res->hr_exec, "split-brain", res->hr_name, NULL);
+ event_send(res, EVENT_SPLITBRAIN);
exit(EX_CONFIG);
}
}
@@ -345,6 +346,14 @@ hastd_secondary(struct hast_resource *res, struct nv *nvin)
pjdlog_exit(EX_OSERR,
"Unable to create control sockets between parent and child");
}
+ /*
+ * Create communication channel between child and parent.
+ */
+ if (proto_client("socketpair://", &res->hr_event) < 0) {
+ KEEP_ERRNO((void)pidfile_remove(pfh));
+ pjdlog_exit(EX_OSERR,
+ "Unable to create event sockets between child and parent");
+ }
pid = fork();
if (pid < 0) {
@@ -358,6 +367,8 @@ hastd_secondary(struct hast_resource *res, struct nv *nvin)
res->hr_remotein = NULL;
proto_close(res->hr_remoteout);
res->hr_remoteout = NULL;
+ /* Declare that we are receiver. */
+ proto_recv(res->hr_event, NULL, 0);
res->hr_workerpid = pid;
return;
}
@@ -372,17 +383,19 @@ hastd_secondary(struct hast_resource *res, struct nv *nvin)
signal(SIGHUP, SIG_DFL);
signal(SIGCHLD, SIG_DFL);
+ /* Declare that we are sender. */
+ proto_send(res->hr_event, NULL, 0);
+
/* Error in setting timeout is not critical, but why should it fail? */
if (proto_timeout(res->hr_remotein, 0) < 0)
pjdlog_errno(LOG_WARNING, "Unable to set connection timeout");
if (proto_timeout(res->hr_remoteout, res->hr_timeout) < 0)
pjdlog_errno(LOG_WARNING, "Unable to set connection timeout");
- hook_init();
init_local(res);
init_remote(res, nvin);
init_environment();
- hook_exec(res->hr_exec, "connect", res->hr_name, NULL);
+ event_send(res, EVENT_CONNECT);
error = pthread_create(&td, NULL, recv_thread, res);
assert(error == 0);
@@ -515,7 +528,7 @@ secondary_exit(int exitcode, const char *fmt, ...)
va_start(ap, fmt);
pjdlogv_errno(LOG_ERR, fmt, ap);
va_end(ap);
- hook_exec(gres->hr_exec, "disconnect", gres->hr_name, NULL);
+ event_send(gres, EVENT_DISCONNECT);
exit(exitcode);
}
OpenPOWER on IntegriCloud