summaryrefslogtreecommitdiffstats
path: root/sbin
diff options
context:
space:
mode:
Diffstat (limited to 'sbin')
-rw-r--r--sbin/hastd/control.c4
-rw-r--r--sbin/hastd/hast.h4
-rw-r--r--sbin/hastd/hastd.c64
-rw-r--r--sbin/hastd/primary.c90
-rw-r--r--sbin/hastd/secondary.c12
5 files changed, 143 insertions, 31 deletions
diff --git a/sbin/hastd/control.c b/sbin/hastd/control.c
index 4faf05d..5e18691 100644
--- a/sbin/hastd/control.c
+++ b/sbin/hastd/control.c
@@ -62,6 +62,10 @@ child_cleanup(struct hast_resource *res)
proto_close(res->hr_event);
res->hr_event = NULL;
}
+ if (res->hr_conn != NULL) {
+ proto_close(res->hr_conn);
+ res->hr_conn = NULL;
+ }
res->hr_workerpid = 0;
}
diff --git a/sbin/hastd/hast.h b/sbin/hastd/hast.h
index dbbe0da..35a43ef 100644
--- a/sbin/hastd/hast.h
+++ b/sbin/hastd/hast.h
@@ -182,10 +182,12 @@ struct hast_resource {
int hr_previous_role;
/* PID of child worker process. 0 - no child. */
pid_t hr_workerpid;
- /* Control connection between parent and child. */
+ /* Control commands from parent to child. */
struct proto_conn *hr_ctrl;
/* Events from child to parent. */
struct proto_conn *hr_event;
+ /* Connection requests from child to parent. */
+ struct proto_conn *hr_conn;
/* Activemap structure. */
struct activemap *hr_amp;
diff --git a/sbin/hastd/hastd.c b/sbin/hastd/hastd.c
index 5bced2c..cbac4f4 100644
--- a/sbin/hastd/hastd.c
+++ b/sbin/hastd/hastd.c
@@ -214,6 +214,19 @@ descriptors_assert(const struct hast_resource *res, int pjdlogmode)
fd, dtype2str(mode), dtype2str(S_IFSOCK));
break;
}
+ } else if (fd == proto_descriptor(res->hr_conn)) {
+ if (!isopen) {
+ snprintf(msg, sizeof(msg),
+ "Descriptor %d (conn) is closed, but should be open.",
+ fd);
+ break;
+ }
+ if (!S_ISSOCK(mode)) {
+ snprintf(msg, sizeof(msg),
+ "Descriptor %d (conn) is %s, but should be %s.",
+ fd, dtype2str(mode), dtype2str(S_IFSOCK));
+ break;
+ }
} else if (res->hr_role == HAST_ROLE_SECONDARY &&
fd == proto_descriptor(res->hr_remotein)) {
if (!isopen) {
@@ -802,6 +815,41 @@ close:
}
static void
+connection_migrate(struct hast_resource *res)
+{
+ struct proto_conn *conn;
+ int16_t val = 0;
+
+ if (proto_recv(res->hr_conn, &val, sizeof(val)) < 0) {
+ pjdlog_errno(LOG_WARNING,
+ "Unable to receive connection command");
+ return;
+ }
+ if (proto_client(res->hr_remoteaddr, &conn) < 0) {
+ val = errno;
+ pjdlog_errno(LOG_WARNING,
+ "Unable to create outgoing connection to %s",
+ res->hr_remoteaddr);
+ goto out;
+ }
+ if (proto_connect(conn, -1) < 0) {
+ val = errno;
+ pjdlog_errno(LOG_WARNING, "Unable to connect to %s",
+ res->hr_remoteaddr);
+ proto_close(conn);
+ goto out;
+ }
+ val = 0;
+out:
+ if (proto_send(res->hr_conn, &val, sizeof(val)) < 0) {
+ pjdlog_errno(LOG_WARNING,
+ "Unable to send reply to connection request");
+ }
+ if (val == 0 && proto_connection_send(res->hr_conn, conn) < 0)
+ pjdlog_errno(LOG_WARNING, "Unable to send connection");
+}
+
+static void
main_loop(void)
{
struct hast_resource *res;
@@ -858,10 +906,18 @@ main_loop(void)
TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
if (res->hr_event == NULL)
continue;
+ PJDLOG_ASSERT(res->hr_conn != NULL);
fd = proto_descriptor(res->hr_event);
PJDLOG_ASSERT(fd >= 0);
FD_SET(fd, &rfds);
maxfd = fd > maxfd ? fd : maxfd;
+ if (res->hr_role == HAST_ROLE_PRIMARY) {
+ /* Only primary workers asks for connections. */
+ fd = proto_descriptor(res->hr_conn);
+ PJDLOG_ASSERT(fd >= 0);
+ FD_SET(fd, &rfds);
+ maxfd = fd > maxfd ? fd : maxfd;
+ }
}
PJDLOG_ASSERT(maxfd + 1 <= (int)FD_SETSIZE);
@@ -882,12 +938,20 @@ main_loop(void)
TAILQ_FOREACH(res, &cfg->hc_resources, hr_next) {
if (res->hr_event == NULL)
continue;
+ PJDLOG_ASSERT(res->hr_conn != NULL);
if (FD_ISSET(proto_descriptor(res->hr_event), &rfds)) {
if (event_recv(res) == 0)
continue;
/* The worker process exited? */
proto_close(res->hr_event);
res->hr_event = NULL;
+ proto_close(res->hr_conn);
+ res->hr_conn = NULL;
+ continue;
+ }
+ if (res->hr_role == HAST_ROLE_PRIMARY &&
+ FD_ISSET(proto_descriptor(res->hr_conn), &rfds)) {
+ connection_migrate(res);
}
}
}
diff --git a/sbin/hastd/primary.c b/sbin/hastd/primary.c
index 7081096..408edbc 100644
--- a/sbin/hastd/primary.c
+++ b/sbin/hastd/primary.c
@@ -488,6 +488,46 @@ init_local(struct hast_resource *res)
exit(EX_NOINPUT);
}
+static int
+primary_connect(struct hast_resource *res, struct proto_conn **connp)
+{
+ struct proto_conn *conn;
+ int16_t val;
+
+ val = 1;
+ if (proto_send(res->hr_conn, &val, sizeof(val)) < 0) {
+ primary_exit(EX_TEMPFAIL,
+ "Unable to send connection request to parent");
+ }
+ if (proto_recv(res->hr_conn, &val, sizeof(val)) < 0) {
+ primary_exit(EX_TEMPFAIL,
+ "Unable to receive reply to connection request from parent");
+ }
+ if (val != 0) {
+ errno = val;
+ pjdlog_errno(LOG_WARNING, "Unable to connect to %s",
+ res->hr_remoteaddr);
+ return (-1);
+ }
+ if (proto_connection_recv(res->hr_conn, true, &conn) < 0) {
+ primary_exit(EX_TEMPFAIL,
+ "Unable to receive connection from parent");
+ }
+ if (proto_connect_wait(conn, HAST_TIMEOUT) < 0) {
+ pjdlog_errno(LOG_WARNING, "Unable to connect to %s",
+ res->hr_remoteaddr);
+ proto_close(conn);
+ return (-1);
+ }
+ /* Error in setting timeout is not critical, but why should it fail? */
+ if (proto_timeout(conn, res->hr_timeout) < 0)
+ pjdlog_errno(LOG_WARNING, "Unable to set connection timeout");
+
+ *connp = conn;
+
+ return (0);
+}
+
static bool
init_remote(struct hast_resource *res, struct proto_conn **inp,
struct proto_conn **outp)
@@ -508,21 +548,9 @@ init_remote(struct hast_resource *res, struct proto_conn **inp,
in = out = NULL;
errmsg = NULL;
- /* Prepare outgoing connection with remote node. */
- if (proto_client(res->hr_remoteaddr, &out) < 0) {
- primary_exit(EX_TEMPFAIL,
- "Unable to create outgoing connection to %s",
- res->hr_remoteaddr);
- }
- /* Try to connect, but accept failure. */
- if (proto_connect(out, HAST_TIMEOUT) < 0) {
- pjdlog_errno(LOG_WARNING, "Unable to connect to %s",
- res->hr_remoteaddr);
- goto close;
- }
- /* Error in setting timeout is not critical, but why should it fail? */
- if (proto_timeout(out, res->hr_timeout) < 0)
- pjdlog_errno(LOG_WARNING, "Unable to set connection timeout");
+ if (primary_connect(res, &out) == -1)
+ return (false);
+
/*
* First handshake step.
* Setup outgoing connection with remote node.
@@ -576,20 +604,9 @@ init_remote(struct hast_resource *res, struct proto_conn **inp,
* Second handshake step.
* Setup incoming connection with remote node.
*/
- if (proto_client(res->hr_remoteaddr, &in) < 0) {
- primary_exit(EX_TEMPFAIL,
- "Unable to create incoming connection to %s",
- res->hr_remoteaddr);
- }
- /* Try to connect, but accept failure. */
- if (proto_connect(in, HAST_TIMEOUT) < 0) {
- pjdlog_errno(LOG_WARNING, "Unable to connect to %s",
- res->hr_remoteaddr);
+ if (primary_connect(res, &in) == -1)
goto close;
- }
- /* Error in setting timeout is not critical, but why should it fail? */
- if (proto_timeout(in, res->hr_timeout) < 0)
- pjdlog_errno(LOG_WARNING, "Unable to set connection timeout");
+
nvout = nv_alloc();
nv_add_string(nvout, res->hr_name, "resource");
nv_add_uint8_array(nvout, res->hr_token, sizeof(res->hr_token),
@@ -792,7 +809,8 @@ hastd_primary(struct hast_resource *res)
int error, mode;
/*
- * Create communication channel between parent and child.
+ * Create communication channel for sending control commands from
+ * parent to child.
*/
if (proto_client("socketpair://", &res->hr_ctrl) < 0) {
/* TODO: There's no need for this to be fatal error. */
@@ -801,7 +819,7 @@ hastd_primary(struct hast_resource *res)
"Unable to create control sockets between parent and child");
}
/*
- * Create communication channel between child and parent.
+ * Create communication channel for sending events from child to parent.
*/
if (proto_client("socketpair://", &res->hr_event) < 0) {
/* TODO: There's no need for this to be fatal error. */
@@ -809,6 +827,16 @@ hastd_primary(struct hast_resource *res)
pjdlog_exit(EX_OSERR,
"Unable to create event sockets between child and parent");
}
+ /*
+ * Create communication channel for sending connection requests from
+ * child to parent.
+ */
+ if (proto_client("socketpair://", &res->hr_conn) < 0) {
+ /* TODO: There's no need for this to be fatal error. */
+ KEEP_ERRNO((void)pidfile_remove(pfh));
+ pjdlog_exit(EX_OSERR,
+ "Unable to create connection sockets between child and parent");
+ }
pid = fork();
if (pid < 0) {
@@ -821,6 +849,7 @@ hastd_primary(struct hast_resource *res)
/* This is parent. */
/* Declare that we are receiver. */
proto_recv(res->hr_event, NULL, 0);
+ proto_recv(res->hr_conn, NULL, 0);
/* Declare that we are sender. */
proto_send(res->hr_ctrl, NULL, 0);
res->hr_workerpid = pid;
@@ -832,6 +861,7 @@ hastd_primary(struct hast_resource *res)
/* Declare that we are sender. */
proto_send(res->hr_event, NULL, 0);
+ proto_send(res->hr_conn, NULL, 0);
/* Declare that we are receiver. */
proto_recv(res->hr_ctrl, NULL, 0);
descriptors_cleanup(res);
diff --git a/sbin/hastd/secondary.c b/sbin/hastd/secondary.c
index 12a68de..9c859e0 100644
--- a/sbin/hastd/secondary.c
+++ b/sbin/hastd/secondary.c
@@ -364,6 +364,16 @@ hastd_secondary(struct hast_resource *res, struct nv *nvin)
pjdlog_exit(EX_OSERR,
"Unable to create event sockets between child and parent");
}
+ /*
+ * Create communication channel for sending connection requests from
+ * parent to child.
+ */
+ if (proto_client("socketpair://", &res->hr_conn) < 0) {
+ /* TODO: There's no need for this to be fatal error. */
+ KEEP_ERRNO((void)pidfile_remove(pfh));
+ pjdlog_exit(EX_OSERR,
+ "Unable to create connection sockets between parent and child");
+ }
pid = fork();
if (pid < 0) {
@@ -381,6 +391,7 @@ hastd_secondary(struct hast_resource *res, struct nv *nvin)
proto_recv(res->hr_event, NULL, 0);
/* Declare that we are sender. */
proto_send(res->hr_ctrl, NULL, 0);
+ proto_send(res->hr_conn, NULL, 0);
res->hr_workerpid = pid;
return;
}
@@ -392,6 +403,7 @@ hastd_secondary(struct hast_resource *res, struct nv *nvin)
proto_send(res->hr_event, NULL, 0);
/* Declare that we are receiver. */
proto_recv(res->hr_ctrl, NULL, 0);
+ proto_recv(res->hr_conn, NULL, 0);
descriptors_cleanup(res);
descriptors_assert(res, mode);
OpenPOWER on IntegriCloud