summaryrefslogtreecommitdiffstats
path: root/sbin
diff options
context:
space:
mode:
authorpjd <pjd@FreeBSD.org>2010-04-18 21:18:32 +0000
committerpjd <pjd@FreeBSD.org>2010-04-18 21:18:32 +0000
commit6d8becbd48ab0aeae3f3d088bf7eb96ac52d92bb (patch)
tree15c507440dc569d74e589e1b9e984d52ccbc9016 /sbin
parent61b0f125e6f04a1060f00bd9cf7f541fd5d11474 (diff)
downloadFreeBSD-src-6d8becbd48ab0aeae3f3d088bf7eb96ac52d92bb.zip
FreeBSD-src-6d8becbd48ab0aeae3f3d088bf7eb96ac52d92bb.tar.gz
MFC r204177,r205738,r206669,r206696,r206697:
r204177: Changing proto_socketpair.c compilation and linking order revealed a problem - we should simply ignore proto_server() if address doesn't start with socketpair://, and not abort. r205738: Don't hold connection lock when doing reconnects as it makes I/Os wait for connection timeouts. Reported by: Kevin Day <toasty@dragondata.com> r206669: Increase ggate queue size to maximum value. HAST was not able to stand heavy random load. Reported by: Hiroyuki Yamagami r206696: Fix control socket leak when worker process exits. Submitted by: Mikolaj Golub <to.my.trociny@gmail.com> r206697: Fix log size calculation which caused message truncation. Submitted by: Mikolaj Golub <to.my.trociny@gmail.com>
Diffstat (limited to 'sbin')
-rw-r--r--sbin/hastd/hastd.c1
-rw-r--r--sbin/hastd/pjdlog.c2
-rw-r--r--sbin/hastd/primary.c78
-rw-r--r--sbin/hastd/proto_socketpair.c5
4 files changed, 57 insertions, 29 deletions
diff --git a/sbin/hastd/hastd.c b/sbin/hastd/hastd.c
index 19f0893..957885d 100644
--- a/sbin/hastd/hastd.c
+++ b/sbin/hastd/hastd.c
@@ -137,6 +137,7 @@ child_exit(void)
pjdlog_error("Worker process failed (pid=%u, status=%d).",
(unsigned int)pid, WEXITSTATUS(status));
}
+ proto_close(res->hr_ctrl);
res->hr_workerpid = 0;
if (res->hr_role == HAST_ROLE_PRIMARY) {
sleep(1);
diff --git a/sbin/hastd/pjdlog.c b/sbin/hastd/pjdlog.c
index 38c5539..9f8b3f4 100644
--- a/sbin/hastd/pjdlog.c
+++ b/sbin/hastd/pjdlog.c
@@ -228,7 +228,7 @@ pjdlogv_common(int loglevel, int debuglevel, int error, const char *fmt,
len = snprintf(log, sizeof(log), "%s", pjdlog_prefix);
if ((size_t)len < sizeof(log))
- len = vsnprintf(log + len, sizeof(log) - len, fmt, ap);
+ len += vsnprintf(log + len, sizeof(log) - len, fmt, ap);
if (error != -1 && (size_t)len < sizeof(log)) {
(void)snprintf(log + len, sizeof(log) - len, ": %s.",
strerror(error));
diff --git a/sbin/hastd/primary.c b/sbin/hastd/primary.c
index ed6e91c..0915154 100644
--- a/sbin/hastd/primary.c
+++ b/sbin/hastd/primary.c
@@ -460,9 +460,11 @@ init_local(struct hast_resource *res)
exit(EX_NOINPUT);
}
-static void
-init_remote(struct hast_resource *res)
+static bool
+init_remote(struct hast_resource *res, struct proto_conn **inp,
+ struct proto_conn **outp)
{
+ struct proto_conn *in, *out;
struct nv *nvout, *nvin;
const unsigned char *token;
unsigned char *map;
@@ -472,13 +474,17 @@ init_remote(struct hast_resource *res)
uint32_t mapsize;
size_t size;
+ assert((inp == NULL && outp == NULL) || (inp != NULL && outp != NULL));
+
+ in = out = NULL;
+
/* Prepare outgoing connection with remote node. */
- if (proto_client(res->hr_remoteaddr, &res->hr_remoteout) < 0) {
+ if (proto_client(res->hr_remoteaddr, &out) < 0) {
primary_exit(EX_OSERR, "Unable to create connection to %s",
res->hr_remoteaddr);
}
/* Try to connect, but accept failure. */
- if (proto_connect(res->hr_remoteout) < 0) {
+ if (proto_connect(out) < 0) {
pjdlog_errno(LOG_WARNING, "Unable to connect to %s",
res->hr_remoteaddr);
goto close;
@@ -496,7 +502,7 @@ init_remote(struct hast_resource *res)
nv_free(nvout);
goto close;
}
- if (hast_proto_send(res, res->hr_remoteout, nvout, NULL, 0) < 0) {
+ if (hast_proto_send(res, out, nvout, NULL, 0) < 0) {
pjdlog_errno(LOG_WARNING,
"Unable to send handshake header to %s",
res->hr_remoteaddr);
@@ -504,7 +510,7 @@ init_remote(struct hast_resource *res)
goto close;
}
nv_free(nvout);
- if (hast_proto_recv_hdr(res->hr_remoteout, &nvin) < 0) {
+ if (hast_proto_recv_hdr(out, &nvin) < 0) {
pjdlog_errno(LOG_WARNING,
"Unable to receive handshake header from %s",
res->hr_remoteaddr);
@@ -536,12 +542,12 @@ init_remote(struct hast_resource *res)
* Second handshake step.
* Setup incoming connection with remote node.
*/
- if (proto_client(res->hr_remoteaddr, &res->hr_remotein) < 0) {
+ if (proto_client(res->hr_remoteaddr, &in) < 0) {
pjdlog_errno(LOG_WARNING, "Unable to create connection to %s",
res->hr_remoteaddr);
}
/* Try to connect, but accept failure. */
- if (proto_connect(res->hr_remotein) < 0) {
+ if (proto_connect(in) < 0) {
pjdlog_errno(LOG_WARNING, "Unable to connect to %s",
res->hr_remoteaddr);
goto close;
@@ -560,7 +566,7 @@ init_remote(struct hast_resource *res)
nv_free(nvout);
goto close;
}
- if (hast_proto_send(res, res->hr_remotein, nvout, NULL, 0) < 0) {
+ if (hast_proto_send(res, in, nvout, NULL, 0) < 0) {
pjdlog_errno(LOG_WARNING,
"Unable to send handshake header to %s",
res->hr_remoteaddr);
@@ -568,7 +574,7 @@ init_remote(struct hast_resource *res)
goto close;
}
nv_free(nvout);
- if (hast_proto_recv_hdr(res->hr_remoteout, &nvin) < 0) {
+ if (hast_proto_recv_hdr(out, &nvin) < 0) {
pjdlog_errno(LOG_WARNING,
"Unable to receive handshake header from %s",
res->hr_remoteaddr);
@@ -611,7 +617,7 @@ init_remote(struct hast_resource *res)
* Remote node have some dirty extents on its own, lets
* download its activemap.
*/
- if (hast_proto_recv_data(res, res->hr_remoteout, nvin, map,
+ if (hast_proto_recv_data(res, out, nvin, map,
mapsize) < 0) {
pjdlog_errno(LOG_ERR,
"Unable to receive remote activemap");
@@ -631,18 +637,29 @@ init_remote(struct hast_resource *res)
(void)hast_activemap_flush(res);
}
pjdlog_info("Connected to %s.", res->hr_remoteaddr);
+ if (inp != NULL && outp != NULL) {
+ *inp = in;
+ *outp = out;
+ } else {
+ res->hr_remotein = in;
+ res->hr_remoteout = out;
+ }
+ return (true);
+close:
+ proto_close(out);
+ if (in != NULL)
+ proto_close(in);
+ return (false);
+}
+
+static void
+sync_start(void)
+{
+
mtx_lock(&sync_lock);
sync_inprogress = true;
mtx_unlock(&sync_lock);
cv_signal(&sync_cond);
- return;
-close:
- proto_close(res->hr_remoteout);
- res->hr_remoteout = NULL;
- if (res->hr_remotein != NULL) {
- proto_close(res->hr_remotein);
- res->hr_remotein = NULL;
- }
}
static void
@@ -665,7 +682,7 @@ init_ggate(struct hast_resource *res)
ggiocreate.gctl_mediasize = res->hr_datasize;
ggiocreate.gctl_sectorsize = res->hr_local_sectorsize;
ggiocreate.gctl_flags = 0;
- ggiocreate.gctl_maxcount = 128;
+ ggiocreate.gctl_maxcount = G_GATE_MAX_QUEUE_SIZE;
ggiocreate.gctl_timeout = 0;
ggiocreate.gctl_unit = G_GATE_NAME_GIVEN;
snprintf(ggiocreate.gctl_name, sizeof(ggiocreate.gctl_name), "hast/%s",
@@ -735,7 +752,8 @@ hastd_primary(struct hast_resource *res)
setproctitle("%s (primary)", res->hr_name);
init_local(res);
- init_remote(res);
+ if (init_remote(res, NULL, NULL))
+ sync_start();
init_ggate(res);
init_environment(res);
error = pthread_create(&td, NULL, ggate_recv_thread, res);
@@ -1695,6 +1713,7 @@ static void *
guard_thread(void *arg)
{
struct hast_resource *res = arg;
+ struct proto_conn *in, *out;
unsigned int ii, ncomps;
int timeout;
@@ -1738,26 +1757,31 @@ guard_thread(void *arg)
* connected.
*/
rw_unlock(&hio_remote_lock[ii]);
- rw_wlock(&hio_remote_lock[ii]);
- assert(res->hr_remotein == NULL);
- assert(res->hr_remoteout == NULL);
pjdlog_debug(2,
"remote_guard: Reconnecting to %s.",
res->hr_remoteaddr);
- init_remote(res);
- if (ISCONNECTED(res, ii)) {
+ in = out = NULL;
+ if (init_remote(res, &in, &out)) {
+ rw_wlock(&hio_remote_lock[ii]);
+ assert(res->hr_remotein == NULL);
+ assert(res->hr_remoteout == NULL);
+ assert(in != NULL && out != NULL);
+ res->hr_remotein = in;
+ res->hr_remoteout = out;
+ rw_unlock(&hio_remote_lock[ii]);
pjdlog_info("Successfully reconnected to %s.",
res->hr_remoteaddr);
+ sync_start();
} else {
/* Both connections should be NULL. */
assert(res->hr_remotein == NULL);
assert(res->hr_remoteout == NULL);
+ assert(in == NULL && out == NULL);
pjdlog_debug(2,
"remote_guard: Reconnect to %s failed.",
res->hr_remoteaddr);
timeout = RECONNECT_SLEEP;
}
- rw_unlock(&hio_remote_lock[ii]);
}
}
(void)cv_timedwait(&hio_guard_cond, &hio_guard_lock, timeout);
diff --git a/sbin/hastd/proto_socketpair.c b/sbin/hastd/proto_socketpair.c
index 0e2cfa2..08d0c66 100644
--- a/sbin/hastd/proto_socketpair.c
+++ b/sbin/hastd/proto_socketpair.c
@@ -91,9 +91,12 @@ sp_connect(void *ctx __unused)
}
static int
-sp_server(const char *addr __unused, void **ctxp __unused)
+sp_server(const char *addr, void **ctxp __unused)
{
+ if (strcmp(addr, "socketpair://") != 0)
+ return (-1);
+
assert(!"proto_server() not supported on socketpairs");
abort();
}
OpenPOWER on IntegriCloud