summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorpjd <pjd@FreeBSD.org>2010-09-22 19:03:11 +0000
committerpjd <pjd@FreeBSD.org>2010-09-22 19:03:11 +0000
commit9433a082e8ce9d04d47ef10efdf954103b527d01 (patch)
treeb5341732e7574446c8cde8df47eae5f3f667961b
parent3657e3ff87f4a157905be2ffb58ae05887892b2c (diff)
downloadFreeBSD-src-9433a082e8ce9d04d47ef10efdf954103b527d01.zip
FreeBSD-src-9433a082e8ce9d04d47ef10efdf954103b527d01.tar.gz
Fix possible deadlock where worker process sends an event to the main process
while the main process sends control message to the worker process, but worker process hasn't started control thread yet, because it waits for reply from the main process. The fix is to start the control thread before sending any events. Reported and fix suggested by: Mikolaj Golub <to.my.trociny@gmail.com> MFC after: 3 days
-rw-r--r--sbin/hastd/primary.c16
-rw-r--r--sbin/hastd/secondary.c18
2 files changed, 26 insertions, 8 deletions
diff --git a/sbin/hastd/primary.c b/sbin/hastd/primary.c
index 0b2402a..5d6896e 100644
--- a/sbin/hastd/primary.c
+++ b/sbin/hastd/primary.c
@@ -807,10 +807,20 @@ hastd_primary(struct hast_resource *res)
proto_send(res->hr_event, NULL, 0);
init_local(res);
- if (real_remote(res) && init_remote(res, NULL, NULL))
- sync_start();
init_ggate(res);
init_environment(res);
+ /*
+ * Create the control thread before sending any event to the parent,
+ * as we can deadlock when parent sends control request to worker,
+ * but worker has no control thread started yet, so parent waits.
+ * In the meantime worker sends an event to the parent, but parent
+ * is unable to handle the event, because it waits for control
+ * request response.
+ */
+ error = pthread_create(&td, NULL, ctrl_thread, res);
+ assert(error == 0);
+ if (real_remote(res) && init_remote(res, NULL, NULL))
+ sync_start();
error = pthread_create(&td, NULL, ggate_recv_thread, res);
assert(error == 0);
error = pthread_create(&td, NULL, local_send_thread, res);
@@ -823,8 +833,6 @@ hastd_primary(struct hast_resource *res)
assert(error == 0);
error = pthread_create(&td, NULL, sync_thread, res);
assert(error == 0);
- error = pthread_create(&td, NULL, ctrl_thread, res);
- assert(error == 0);
(void)guard_thread(res);
}
diff --git a/sbin/hastd/secondary.c b/sbin/hastd/secondary.c
index 6f56239..9e1e537 100644
--- a/sbin/hastd/secondary.c
+++ b/sbin/hastd/secondary.c
@@ -393,17 +393,27 @@ hastd_secondary(struct hast_resource *res, struct nv *nvin)
pjdlog_errno(LOG_WARNING, "Unable to set connection timeout");
init_local(res);
- init_remote(res, nvin);
init_environment();
+
+ /*
+ * Create the control thread before sending any event to the parent,
+ * as we can deadlock when parent sends control request to worker,
+ * but worker has no control thread started yet, so parent waits.
+ * In the meantime worker sends an event to the parent, but parent
+ * is unable to handle the event, because it waits for control
+ * request response.
+ */
+ error = pthread_create(&td, NULL, ctrl_thread, res);
+ assert(error == 0);
+
+ init_remote(res, nvin);
event_send(res, EVENT_CONNECT);
error = pthread_create(&td, NULL, recv_thread, res);
assert(error == 0);
error = pthread_create(&td, NULL, disk_thread, res);
assert(error == 0);
- error = pthread_create(&td, NULL, send_thread, res);
- assert(error == 0);
- (void)ctrl_thread(res);
+ (void)send_thread(res);
}
static void
OpenPOWER on IntegriCloud