summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorSunil Mushran <sunil.mushran@oracle.com>2011-07-24 10:30:54 -0700
committerSunil Mushran <sunil.mushran@oracle.com>2011-07-24 10:30:54 -0700
commita2c0cc1579176bd0808ef7deea456767dfa80217 (patch)
tree4f797a5fda954ce8a4783e9149da455879ca3641
parentff0a522e7db79625aa27a433467eb94c5e255718 (diff)
downloadop-kernel-dev-a2c0cc1579176bd0808ef7deea456767dfa80217.zip
op-kernel-dev-a2c0cc1579176bd0808ef7deea456767dfa80217.tar.gz
ocfs2/dlm: dlmlock_remote() needs to account for remastery
In dlmlock_remote(), we wait for the resource to stop being active before setting the inprogress flag. Active includes recovery, migration, etc. The problem here is that if the resource was being recovered or migrated, the new owner could very well be that node itself (and thus not a remote node). This problem was observed in Oracle bug#12583620. The error messages observed were as follows: dlm_send_remote_lock_request:337 ERROR: Error -40 (ELOOP) when sending message 503 (key 0xd6d8c7) to node 2 dlmlock_remote:271 ERROR: dlm status = DLM_BADARGS dlmlock:751 ERROR: dlm status = DLM_BADARGS Signed-off-by: Sunil Mushran <sunil.mushran@oracle.com>
-rw-r--r--fs/ocfs2/dlm/dlmlock.c18
1 files changed, 8 insertions, 10 deletions
diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c
index 3ef2c1a..f32fcba 100644
--- a/fs/ocfs2/dlm/dlmlock.c
+++ b/fs/ocfs2/dlm/dlmlock.c
@@ -227,10 +227,16 @@ static enum dlm_status dlmlock_remote(struct dlm_ctxt *dlm,
lock->ml.type, res->lockname.len,
res->lockname.name, flags);
+ /*
+ * Wait if resource is getting recovered, remastered, etc.
+ * If the resource was remastered and new owner is self, then exit.
+ */
spin_lock(&res->spinlock);
-
- /* will exit this call with spinlock held */
__dlm_wait_on_lockres(res);
+ if (res->owner == dlm->node_num) {
+ spin_unlock(&res->spinlock);
+ return DLM_RECOVERING;
+ }
res->state |= DLM_LOCK_RES_IN_PROGRESS;
/* add lock to local (secondary) queue */
@@ -710,18 +716,10 @@ retry_lock:
if (status == DLM_RECOVERING || status == DLM_MIGRATING ||
status == DLM_FORWARD) {
- mlog(0, "retrying lock with migration/"
- "recovery/in progress\n");
msleep(100);
- /* no waiting for dlm_reco_thread */
if (recovery) {
if (status != DLM_RECOVERING)
goto retry_lock;
-
- mlog(0, "%s: got RECOVERING "
- "for $RECOVERY lock, master "
- "was %u\n", dlm->name,
- res->owner);
/* wait to see the node go down, then
* drop down and allow the lockres to
* get cleaned up. need to remaster. */
OpenPOWER on IntegriCloud