diff options
author | Philipp Reisner <philipp.reisner@linbit.com> | 2012-08-08 21:19:09 +0200 |
---|---|---|
committer | Philipp Reisner <philipp.reisner@linbit.com> | 2012-11-09 14:08:18 +0100 |
commit | b66623e33efbbf55717df7bfc49882371118b866 (patch) | |
tree | 3f345827c8fab3b4aa8f2b7cf7bd760c704483af | |
parent | 39a1aa7f49dc8eae5c8d3a4bf759eb7abeabe6c0 (diff) | |
download | op-kernel-dev-b66623e33efbbf55717df7bfc49882371118b866.zip op-kernel-dev-b66623e33efbbf55717df7bfc49882371118b866.tar.gz |
drbd: Avoid NetworkFailure state during disconnect
Disconnecting is a cluster wide state change. In case the peer node agrees
to the state transition, it sends back the fact on the meta-data connection
and closes both sockets.
In case the node node that initiated the state transfer sees the closing
action on the data-socket, before the P_STATE_CHG_REPLY packet, it was
going into one of the network failure states.
At least with the fencing option set to something else thatn "dont-care",
the unclean shutdown of the connection causes a short IO freeze or
a fence operation.
Signed-off-by: Philipp Reisner <philipp.reisner@linbit.com>
Signed-off-by: Lars Ellenberg <lars.ellenberg@linbit.com>
-rw-r--r-- | drivers/block/drbd/drbd_int.h | 1 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_receiver.c | 30 | ||||
-rw-r--r-- | drivers/block/drbd/drbd_state.c | 3 |
3 files changed, 33 insertions, 1 deletions
diff --git a/drivers/block/drbd/drbd_int.h b/drivers/block/drbd/drbd_int.h index b83398d..37ae87e 100644 --- a/drivers/block/drbd/drbd_int.h +++ b/drivers/block/drbd/drbd_int.h @@ -816,6 +816,7 @@ enum { * so shrink_page_list() would not recurse into, * and potentially deadlock on, this drbd worker. */ + DISCONNECT_SENT, }; struct drbd_tconn { /* is a resource from the config file */ diff --git a/drivers/block/drbd/drbd_receiver.c b/drivers/block/drbd/drbd_receiver.c index 0eefbeb..1a8f698 100644 --- a/drivers/block/drbd/drbd_receiver.c +++ b/drivers/block/drbd/drbd_receiver.c @@ -522,7 +522,6 @@ static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size) conn_err(tconn, "sock_recvmsg returned %d\n", rv); break; } else if (rv == 0) { - conn_info(tconn, "sock was shut down by peer\n"); break; } else { /* signal came in, or peer/link went down, @@ -535,9 +534,25 @@ static int drbd_recv(struct drbd_tconn *tconn, void *buf, size_t size) set_fs(oldfs); + if (rv == 0) { + if (test_bit(DISCONNECT_SENT, &tconn->flags)) { + long t; + rcu_read_lock(); + t = rcu_dereference(tconn->net_conf)->ping_timeo * HZ/10; + rcu_read_unlock(); + + t = wait_event_timeout(tconn->ping_wait, tconn->cstate < C_WF_REPORT_PARAMS, t); + + if (t) + goto out; + } + conn_info(tconn, "sock was shut down by peer\n"); + } + if (rv != size) conn_request_state(tconn, NS(conn, C_BROKEN_PIPE), CS_HARD); +out: return rv; } @@ -894,6 +909,7 @@ static int conn_connect(struct drbd_tconn *tconn) .door_bell = COMPLETION_INITIALIZER_ONSTACK(ad.door_bell), }; + clear_bit(DISCONNECT_SENT, &tconn->flags); if (conn_request_state(tconn, NS(conn, C_WF_CONNECTION), CS_VERBOSE) < SS_SUCCESS) return -2; @@ -5316,6 +5332,18 @@ int drbd_asender(struct drbd_thread *thi) received += rv; buf += rv; } else if (rv == 0) { + if (test_bit(DISCONNECT_SENT, &tconn->flags)) { + long t; + rcu_read_lock(); + t = rcu_dereference(tconn->net_conf)->ping_timeo * HZ/10; + rcu_read_unlock(); + + t = wait_event_timeout(tconn->ping_wait, + tconn->cstate < C_WF_REPORT_PARAMS, + t); + if (t) + break; + } conn_err(tconn, "meta connection shut down by peer.\n"); goto reconnect; } else if (rv == -EAGAIN) { diff --git a/drivers/block/drbd/drbd_state.c b/drivers/block/drbd/drbd_state.c index c16349a..4fda4e2 100644 --- a/drivers/block/drbd/drbd_state.c +++ b/drivers/block/drbd/drbd_state.c @@ -1742,6 +1742,9 @@ conn_cl_wide(struct drbd_tconn *tconn, union drbd_state mask, union drbd_state v goto abort; } + if (val.conn == C_DISCONNECTING) + set_bit(DISCONNECT_SENT, &tconn->flags); + wait_event(tconn->ping_wait, (rv = _conn_rq_cond(tconn, mask, val))); clear_bit(CONN_WD_ST_CHG_REQ, &tconn->flags); |