summaryrefslogtreecommitdiffstats
path: root/sbin
diff options
context:
space:
mode:
authorpjd <pjd@FreeBSD.org>2010-11-02 22:13:08 +0000
committerpjd <pjd@FreeBSD.org>2010-11-02 22:13:08 +0000
commitc148a74821d2b0cff6651f00ab1dcc590bc73a6a (patch)
tree667b2e8d33242e14957176a8e61082215c269563 /sbin
parentdb8115405e9cedda133d4e8a82ec30f3a745c356 (diff)
downloadFreeBSD-src-c148a74821d2b0cff6651f00ab1dcc590bc73a6a.zip
FreeBSD-src-c148a74821d2b0cff6651f00ab1dcc590bc73a6a.tar.gz
Send packets to remote node only via the send thread to avoid possible
races - in this case a keepalive packet was send from wrong thread which lead to connection dropping, because of corrupted packet. Fix it by sending keepalive packets directly from the send thread. As a bonus we now send keepalive packets only when connection is idle. Submitted by: Mikolaj Golub <to.my.trociny@gmail.com> MFC after: 3 days
Diffstat (limited to 'sbin')
-rw-r--r--sbin/hastd/primary.c97
1 files changed, 57 insertions, 40 deletions
diff --git a/sbin/hastd/primary.c b/sbin/hastd/primary.c
index 4c2f44f..baa0dac 100644
--- a/sbin/hastd/primary.c
+++ b/sbin/hastd/primary.c
@@ -180,14 +180,21 @@ static pthread_mutex_t metadata_lock;
if (_wakeup) \
cv_signal(&hio_##name##_list_cond); \
} while (0)
-#define QUEUE_TAKE1(hio, name, ncomp) do { \
+#define QUEUE_TAKE1(hio, name, ncomp, timeout) do { \
+ bool _last; \
+ \
mtx_lock(&hio_##name##_list_lock[(ncomp)]); \
- while (((hio) = TAILQ_FIRST(&hio_##name##_list[(ncomp)])) == NULL) { \
- cv_wait(&hio_##name##_list_cond[(ncomp)], \
- &hio_##name##_list_lock[(ncomp)]); \
+ _last = false; \
+ while (((hio) = TAILQ_FIRST(&hio_##name##_list[(ncomp)])) == NULL && !_last) { \
+ cv_timedwait(&hio_##name##_list_cond[(ncomp)], \
+ &hio_##name##_list_lock[(ncomp)], (timeout)); \
+ if ((timeout) != 0) \
+ _last = true; \
+ } \
+ if (hio != NULL) { \
+ TAILQ_REMOVE(&hio_##name##_list[(ncomp)], (hio), \
+ hio_next[(ncomp)]); \
} \
- TAILQ_REMOVE(&hio_##name##_list[(ncomp)], (hio), \
- hio_next[(ncomp)]); \
mtx_unlock(&hio_##name##_list_lock[(ncomp)]); \
} while (0)
#define QUEUE_TAKE2(hio, name) do { \
@@ -1112,7 +1119,7 @@ local_send_thread(void *arg)
for (;;) {
pjdlog_debug(2, "local_send: Taking request.");
- QUEUE_TAKE1(hio, send, ncomp);
+ QUEUE_TAKE1(hio, send, ncomp, 0);
pjdlog_debug(2, "local_send: (%p) Got request.", hio);
ggio = &hio->hio_ggio;
switch (ggio->gctl_cmd) {
@@ -1176,6 +1183,38 @@ local_send_thread(void *arg)
return (NULL);
}
+static void
+keepalive_send(struct hast_resource *res, unsigned int ncomp)
+{
+ struct nv *nv;
+
+ if (!ISCONNECTED(res, ncomp))
+ return;
+
+ assert(res->hr_remotein != NULL);
+ assert(res->hr_remoteout != NULL);
+
+ nv = nv_alloc();
+ nv_add_uint8(nv, HIO_KEEPALIVE, "cmd");
+ if (nv_error(nv) != 0) {
+ nv_free(nv);
+ pjdlog_debug(1,
+ "keepalive_send: Unable to prepare header to send.");
+ return;
+ }
+ if (hast_proto_send(res, res->hr_remoteout, nv, NULL, 0) < 0) {
+ pjdlog_common(LOG_DEBUG, 1, errno,
+ "keepalive_send: Unable to send request");
+ nv_free(nv);
+ rw_unlock(&hio_remote_lock[ncomp]);
+ remote_close(res, ncomp);
+ rw_rlock(&hio_remote_lock[ncomp]);
+ return;
+ }
+ nv_free(nv);
+ pjdlog_debug(2, "keepalive_send: Request sent.");
+}
+
/*
* Thread sends request to secondary node.
*/
@@ -1184,6 +1223,7 @@ remote_send_thread(void *arg)
{
struct hast_resource *res = arg;
struct g_gate_ctl_io *ggio;
+ time_t lastcheck, now;
struct hio *hio;
struct nv *nv;
unsigned int ncomp;
@@ -1194,10 +1234,19 @@ remote_send_thread(void *arg)
/* Remote component is 1 for now. */
ncomp = 1;
+ lastcheck = time(NULL);
for (;;) {
pjdlog_debug(2, "remote_send: Taking request.");
- QUEUE_TAKE1(hio, send, ncomp);
+ QUEUE_TAKE1(hio, send, ncomp, RETRY_SLEEP);
+ if (hio == NULL) {
+ now = time(NULL);
+ if (lastcheck + RETRY_SLEEP <= now) {
+ keepalive_send(res, ncomp);
+ lastcheck = now;
+ }
+ continue;
+ }
pjdlog_debug(2, "remote_send: (%p) Got request.", hio);
ggio = &hio->hio_ggio;
switch (ggio->gctl_cmd) {
@@ -1883,32 +1932,6 @@ failed:
}
static void
-keepalive_send(struct hast_resource *res, unsigned int ncomp)
-{
- struct nv *nv;
-
- nv = nv_alloc();
- nv_add_uint8(nv, HIO_KEEPALIVE, "cmd");
- if (nv_error(nv) != 0) {
- nv_free(nv);
- pjdlog_debug(1,
- "keepalive_send: Unable to prepare header to send.");
- return;
- }
- if (hast_proto_send(res, res->hr_remoteout, nv, NULL, 0) < 0) {
- pjdlog_common(LOG_DEBUG, 1, errno,
- "keepalive_send: Unable to send request");
- nv_free(nv);
- rw_unlock(&hio_remote_lock[ncomp]);
- remote_close(res, ncomp);
- rw_rlock(&hio_remote_lock[ncomp]);
- return;
- }
- nv_free(nv);
- pjdlog_debug(2, "keepalive_send: Request sent.");
-}
-
-static void
guard_one(struct hast_resource *res, unsigned int ncomp)
{
struct proto_conn *in, *out;
@@ -1926,12 +1949,6 @@ guard_one(struct hast_resource *res, unsigned int ncomp)
if (ISCONNECTED(res, ncomp)) {
assert(res->hr_remotein != NULL);
assert(res->hr_remoteout != NULL);
- keepalive_send(res, ncomp);
- }
-
- if (ISCONNECTED(res, ncomp)) {
- assert(res->hr_remotein != NULL);
- assert(res->hr_remoteout != NULL);
rw_unlock(&hio_remote_lock[ncomp]);
pjdlog_debug(2, "remote_guard: Connection to %s is ok.",
res->hr_remoteaddr);
OpenPOWER on IntegriCloud