diff options
author | David Teigland <teigland@redhat.com> | 2007-09-27 15:53:38 -0500 |
---|---|---|
committer | Steven Whitehouse <swhiteho@redhat.com> | 2007-10-10 08:56:38 +0100 |
commit | c36258b5925e6cf6bf72904635100593573bfcff (patch) | |
tree | 565f1ce29a7f8a2cd1c25f2d36c932727adbdbc2 /fs/dlm/rcom.c | |
parent | b434eda6fda5bcdcc2dd918e5ffbf7184f2d4e17 (diff) | |
download | op-kernel-dev-c36258b5925e6cf6bf72904635100593573bfcff.zip op-kernel-dev-c36258b5925e6cf6bf72904635100593573bfcff.tar.gz |
[DLM] block dlm_recv in recovery transition
Introduce a per-lockspace rwsem that's held in read mode by dlm_recv
threads while working in the dlm. This allows dlm_recv activity to be
suspended when the lockspace transitions to, from and between recovery
cycles.
The specific bug prompting this change is one where an in-progress
recovery cycle is aborted by a new recovery cycle. While dlm_recv was
processing a recovery message, the recovery cycle was aborted and
dlm_recoverd began cleaning up. dlm_recv decremented recover_locks_count
on an rsb after dlm_recoverd had reset it to zero. This is fixed by
suspending dlm_recv (taking write lock on the rwsem) before aborting the
current recovery.
The transitions to/from normal and recovery modes are simplified by using
this new ability to block dlm_recv. The switch from normal to recovery
mode means dlm_recv goes from processing locking messages, to saving them
for later, and vice versa. Races are avoided by blocking dlm_recv when
setting the flag that switches between modes.
Signed-off-by: David Teigland <teigland@redhat.com>
Signed-off-by: Steven Whitehouse <swhiteho@redhat.com>
Diffstat (limited to 'fs/dlm/rcom.c')
-rw-r--r-- | fs/dlm/rcom.c | 36 |
1 files changed, 8 insertions, 28 deletions
diff --git a/fs/dlm/rcom.c b/fs/dlm/rcom.c index 188b91c..ae2fd97f 100644 --- a/fs/dlm/rcom.c +++ b/fs/dlm/rcom.c @@ -2,7 +2,7 @@ ******************************************************************************* ** ** Copyright (C) Sistina Software, Inc. 1997-2003 All rights reserved. -** Copyright (C) 2005 Red Hat, Inc. All rights reserved. +** Copyright (C) 2005-2007 Red Hat, Inc. All rights reserved. ** ** This copyrighted material is made available to anyone wishing to use, ** modify, copy, or redistribute it subject to the terms and conditions @@ -386,7 +386,10 @@ static void receive_rcom_lock_reply(struct dlm_ls *ls, struct dlm_rcom *rc_in) dlm_recover_process_copy(ls, rc_in); } -static int send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in) +/* If the lockspace doesn't exist then still send a status message + back; it's possible that it just doesn't have its global_id yet. */ + +int dlm_send_ls_not_ready(int nodeid, struct dlm_rcom *rc_in) { struct dlm_rcom *rc; struct rcom_config *rf; @@ -446,28 +449,11 @@ static int is_old_reply(struct dlm_ls *ls, struct dlm_rcom *rc) return rv; } -/* Called by dlm_recvd; corresponds to dlm_receive_message() but special +/* Called by dlm_recv; corresponds to dlm_receive_message() but special recovery-only comms are sent through here. */ -void dlm_receive_rcom(struct dlm_header *hd, int nodeid) +void dlm_receive_rcom(struct dlm_ls *ls, struct dlm_rcom *rc, int nodeid) { - struct dlm_rcom *rc = (struct dlm_rcom *) hd; - struct dlm_ls *ls; - - dlm_rcom_in(rc); - - /* If the lockspace doesn't exist then still send a status message - back; it's possible that it just doesn't have its global_id yet. */ - - ls = dlm_find_lockspace_global(hd->h_lockspace); - if (!ls) { - log_print("lockspace %x from %d type %x not found", - hd->h_lockspace, nodeid, rc->rc_type); - if (rc->rc_type == DLM_RCOM_STATUS) - send_ls_not_ready(nodeid, rc); - return; - } - if (dlm_recovery_stopped(ls) && (rc->rc_type != DLM_RCOM_STATUS)) { log_debug(ls, "ignoring recovery message %x from %d", rc->rc_type, nodeid); @@ -477,12 +463,6 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid) if (is_old_reply(ls, rc)) goto out; - if (nodeid != rc->rc_header.h_nodeid) { - log_error(ls, "bad rcom nodeid %d from %d", - rc->rc_header.h_nodeid, nodeid); - goto out; - } - switch (rc->rc_type) { case DLM_RCOM_STATUS: receive_rcom_status(ls, rc); @@ -520,6 +500,6 @@ void dlm_receive_rcom(struct dlm_header *hd, int nodeid) DLM_ASSERT(0, printk("rc_type=%x\n", rc->rc_type);); } out: - dlm_put_lockspace(ls); + return; } |