diff options
author | Sage Weil <sage@newdream.net> | 2010-02-25 12:40:45 -0800 |
---|---|---|
committer | Sage Weil <sage@newdream.net> | 2010-02-25 12:40:45 -0800 |
commit | e80a52d14f868059e8ec790c9fae88cdb8a1df98 (patch) | |
tree | 8353f33245509d7c3bd7fb25f5c254df6db8db9b /fs | |
parent | 161fd65ac934608345aed35226fc889ea3b0b500 (diff) | |
download | op-kernel-dev-e80a52d14f868059e8ec790c9fae88cdb8a1df98.zip op-kernel-dev-e80a52d14f868059e8ec790c9fae88cdb8a1df98.tar.gz |
ceph: fix connection fault STANDBY check
Move any out_sent messages to out_queue _before_ checking if
out_queue is empty and going to STANDBY, or else we may drop
something that was never acked.
And clean up the code a bit (less goto).
Signed-off-by: Sage Weil <sage@newdream.net>
Diffstat (limited to 'fs')
-rw-r--r-- | fs/ceph/messenger.c | 31 |
1 files changed, 13 insertions, 18 deletions
diff --git a/fs/ceph/messenger.c b/fs/ceph/messenger.c index 9ea7b76..0ddc2c7 100644 --- a/fs/ceph/messenger.c +++ b/fs/ceph/messenger.c @@ -1853,32 +1853,27 @@ static void ceph_fault(struct ceph_connection *con) con->in_msg = NULL; } + /* Requeue anything that hasn't been acked */ + list_splice_init(&con->out_sent, &con->out_queue); /* If there are no messages in the queue, place the connection * in a STANDBY state (i.e., don't try to reconnect just yet). */ if (list_empty(&con->out_queue) && !con->out_keepalive_pending) { dout("fault setting STANDBY\n"); set_bit(STANDBY, &con->state); - mutex_unlock(&con->mutex); - goto out; + } else { + /* retry after a delay. */ + if (con->delay == 0) + con->delay = BASE_DELAY_INTERVAL; + else if (con->delay < MAX_DELAY_INTERVAL) + con->delay *= 2; + dout("fault queueing %p delay %lu\n", con, con->delay); + con->ops->get(con); + if (queue_delayed_work(ceph_msgr_wq, &con->work, + round_jiffies_relative(con->delay)) == 0) + con->ops->put(con); } - /* Requeue anything that hasn't been acked, and retry after a - * delay. */ - list_splice_init(&con->out_sent, &con->out_queue); - - if (con->delay == 0) - con->delay = BASE_DELAY_INTERVAL; - else if (con->delay < MAX_DELAY_INTERVAL) - con->delay *= 2; - - /* explicitly schedule work to try to reconnect again later. */ - dout("fault queueing %p delay %lu\n", con, con->delay); - con->ops->get(con); - if (queue_delayed_work(ceph_msgr_wq, &con->work, - round_jiffies_relative(con->delay)) == 0) - con->ops->put(con); - out_unlock: mutex_unlock(&con->mutex); out: |