From 6d98c3ccb52f692f1a60339dde7c700686a5568b Mon Sep 17 00:00:00 2001 From: Wengang Wang Date: Fri, 16 Jul 2010 23:13:33 +0800 Subject: ocfs2/dlm: fix a dead lock When we have to take both dlm->master_lock and lockres->spinlock, take them in order lockres->spinlock and then dlm->master_lock. The patch fixes a violation of the rule. We can simply move taking dlm->master_lock to where we have dropped res->spinlock since when we access res->state and free mle memory we don't need master_lock's protection. Signed-off-by: Wengang Wang Cc: stable@kernel.org Signed-off-by: Joel Becker --- fs/ocfs2/dlm/dlmmaster.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'fs/ocfs2/dlm') diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 94b97fc..6d098b8 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -3050,8 +3050,6 @@ int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data, /* check for pre-existing lock */ spin_lock(&dlm->spinlock); res = __dlm_lookup_lockres(dlm, name, namelen, hash); - spin_lock(&dlm->master_lock); - if (res) { spin_lock(&res->spinlock); if (res->state & DLM_LOCK_RES_RECOVERING) { @@ -3069,14 +3067,15 @@ int dlm_migrate_request_handler(struct o2net_msg *msg, u32 len, void *data, spin_unlock(&res->spinlock); } + spin_lock(&dlm->master_lock); /* ignore status. only nonzero status would BUG. */ ret = dlm_add_migration_mle(dlm, res, mle, &oldmle, name, namelen, migrate->new_master, migrate->master); -unlock: spin_unlock(&dlm->master_lock); +unlock: spin_unlock(&dlm->spinlock); if (oldmle) { -- cgit v1.1 From 7beaf243787f85a2ef9213ccf13ab4a243283fde Mon Sep 17 00:00:00 2001 From: Srinivas Eeda Date: Mon, 19 Jul 2010 16:04:12 -0700 Subject: ocfs2 fix o2dlm dlm run purgelist (rev 3) This patch fixes two problems in dlm_run_purgelist 1. If a lockres is found to be in use, dlm_run_purgelist keeps trying to purge the same lockres instead of trying the next lockres. 2. When a lockres is found unused, dlm_run_purgelist releases lockres spinlock before setting DLM_LOCK_RES_DROPPING_REF and calls dlm_purge_lockres. spinlock is reacquired but in this window lockres can get reused. This leads to BUG. This patch modifies dlm_run_purgelist to skip lockres if it's in use and purge next lockres. It also sets DLM_LOCK_RES_DROPPING_REF before releasing the lockres spinlock protecting it from getting reused. Signed-off-by: Srinivas Eeda Acked-by: Sunil Mushran Cc: stable@kernel.org Signed-off-by: Joel Becker --- fs/ocfs2/dlm/dlmthread.c | 80 ++++++++++++++++++++---------------------------- 1 file changed, 34 insertions(+), 46 deletions(-) (limited to 'fs/ocfs2/dlm') diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index d4f73ca..dd78ca3 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c @@ -152,45 +152,25 @@ void dlm_lockres_calc_usage(struct dlm_ctxt *dlm, spin_unlock(&dlm->spinlock); } -static int dlm_purge_lockres(struct dlm_ctxt *dlm, +static void dlm_purge_lockres(struct dlm_ctxt *dlm, struct dlm_lock_resource *res) { int master; int ret = 0; - spin_lock(&res->spinlock); - if (!__dlm_lockres_unused(res)) { - mlog(0, "%s:%.*s: tried to purge but not unused\n", - dlm->name, res->lockname.len, res->lockname.name); - __dlm_print_one_lock_resource(res); - spin_unlock(&res->spinlock); - BUG(); - } - - if (res->state & DLM_LOCK_RES_MIGRATING) { - mlog(0, "%s:%.*s: Delay dropref as this lockres is " - "being remastered\n", dlm->name, res->lockname.len, - res->lockname.name); - /* Re-add the lockres to the end of the purge list */ - if (!list_empty(&res->purge)) { - list_del_init(&res->purge); - list_add_tail(&res->purge, &dlm->purge_list); - } - spin_unlock(&res->spinlock); - return 0; - } + assert_spin_locked(&dlm->spinlock); + assert_spin_locked(&res->spinlock); master = (res->owner == dlm->node_num); - if (!master) - res->state |= DLM_LOCK_RES_DROPPING_REF; - spin_unlock(&res->spinlock); mlog(0, "purging lockres %.*s, master = %d\n", res->lockname.len, res->lockname.name, master); if (!master) { + res->state |= DLM_LOCK_RES_DROPPING_REF; /* drop spinlock... retake below */ + spin_unlock(&res->spinlock); spin_unlock(&dlm->spinlock); spin_lock(&res->spinlock); @@ -208,31 +188,35 @@ static int dlm_purge_lockres(struct dlm_ctxt *dlm, mlog(0, "%s:%.*s: dlm_deref_lockres returned %d\n", dlm->name, res->lockname.len, res->lockname.name, ret); spin_lock(&dlm->spinlock); + spin_lock(&res->spinlock); } - spin_lock(&res->spinlock); if (!list_empty(&res->purge)) { mlog(0, "removing lockres %.*s:%p from purgelist, " "master = %d\n", res->lockname.len, res->lockname.name, res, master); list_del_init(&res->purge); - spin_unlock(&res->spinlock); dlm_lockres_put(res); dlm->purge_count--; - } else - spin_unlock(&res->spinlock); + } + + if (!__dlm_lockres_unused(res)) { + mlog(ML_ERROR, "found lockres %s:%.*s: in use after deref\n", + dlm->name, res->lockname.len, res->lockname.name); + __dlm_print_one_lock_resource(res); + BUG(); + } __dlm_unhash_lockres(res); /* lockres is not in the hash now. drop the flag and wake up * any processes waiting in dlm_get_lock_resource. */ if (!master) { - spin_lock(&res->spinlock); res->state &= ~DLM_LOCK_RES_DROPPING_REF; spin_unlock(&res->spinlock); wake_up(&res->wq); - } - return 0; + } else + spin_unlock(&res->spinlock); } static void dlm_run_purge_list(struct dlm_ctxt *dlm, @@ -251,17 +235,7 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm, lockres = list_entry(dlm->purge_list.next, struct dlm_lock_resource, purge); - /* Status of the lockres *might* change so double - * check. If the lockres is unused, holding the dlm - * spinlock will prevent people from getting and more - * refs on it -- there's no need to keep the lockres - * spinlock. */ spin_lock(&lockres->spinlock); - unused = __dlm_lockres_unused(lockres); - spin_unlock(&lockres->spinlock); - - if (!unused) - continue; purge_jiffies = lockres->last_used + msecs_to_jiffies(DLM_PURGE_INTERVAL_MS); @@ -273,15 +247,29 @@ static void dlm_run_purge_list(struct dlm_ctxt *dlm, * in tail order, we can stop at the first * unpurgable resource -- anyone added after * him will have a greater last_used value */ + spin_unlock(&lockres->spinlock); break; } + /* Status of the lockres *might* change so double + * check. If the lockres is unused, holding the dlm + * spinlock will prevent people from getting and more + * refs on it. */ + unused = __dlm_lockres_unused(lockres); + if (!unused || + (lockres->state & DLM_LOCK_RES_MIGRATING)) { + mlog(0, "lockres %s:%.*s: is in use or " + "being remastered, used %d, state %d\n", + dlm->name, lockres->lockname.len, + lockres->lockname.name, !unused, lockres->state); + list_move_tail(&dlm->purge_list, &lockres->purge); + spin_unlock(&lockres->spinlock); + continue; + } + dlm_lockres_get(lockres); - /* This may drop and reacquire the dlm spinlock if it - * has to do migration. */ - if (dlm_purge_lockres(dlm, lockres)) - BUG(); + dlm_purge_lockres(dlm, lockres); dlm_lockres_put(lockres); -- cgit v1.1 From a524812b7eaa7783d7811198921100f079034e61 Mon Sep 17 00:00:00 2001 From: Wengang Wang Date: Fri, 30 Jul 2010 16:14:44 +0800 Subject: ocfs2/dlm: avoid incorrect bit set in refmap on recovery master In the following situation, there remains an incorrect bit in refmap on the recovery master. Finally the recovery master will fail at purging the lockres due to the incorrect bit in refmap. 1) node A has no interest on lockres A any longer, so it is purging it. 2) the owner of lockres A is node B, so node A is sending de-ref message to node B. 3) at this time, node B crashed. node C becomes the recovery master. it recovers lockres A(because the master is the dead node B). 4) node A migrated lockres A to node C with a refbit there. 5) node A failed to send de-ref message to node B because it crashed. The failure is ignored. no other action is done for lockres A any more. For mormal, re-send the deref message to it to recovery master can fix it. Well, ignoring the failure of deref to the original master and not recovering the lockres to recovery master has the same effect. And the later is simpler. Signed-off-by: Wengang Wang Acked-by: Srinivas Eeda Cc: stable@kernel.org Signed-off-by: Joel Becker --- fs/ocfs2/dlm/dlmrecovery.c | 22 ++++++++++------------ fs/ocfs2/dlm/dlmthread.c | 34 +++++++++++++++++++++------------- 2 files changed, 31 insertions(+), 25 deletions(-) (limited to 'fs/ocfs2/dlm') diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index 9dfaac7..aaaffbc 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -1997,6 +1997,8 @@ void dlm_move_lockres_to_recovery_list(struct dlm_ctxt *dlm, struct list_head *queue; struct dlm_lock *lock, *next; + assert_spin_locked(&dlm->spinlock); + assert_spin_locked(&res->spinlock); res->state |= DLM_LOCK_RES_RECOVERING; if (!list_empty(&res->recovering)) { mlog(0, @@ -2326,19 +2328,15 @@ static void dlm_do_local_recovery_cleanup(struct dlm_ctxt *dlm, u8 dead_node) /* zero the lvb if necessary */ dlm_revalidate_lvb(dlm, res, dead_node); if (res->owner == dead_node) { - if (res->state & DLM_LOCK_RES_DROPPING_REF) - mlog(0, "%s:%.*s: owned by " - "dead node %u, this node was " - "dropping its ref when it died. " - "continue, dropping the flag.\n", - dlm->name, res->lockname.len, - res->lockname.name, dead_node); - - /* the wake_up for this will happen when the - * RECOVERING flag is dropped later */ - res->state &= ~DLM_LOCK_RES_DROPPING_REF; + if (res->state & DLM_LOCK_RES_DROPPING_REF) { + mlog(ML_NOTICE, "Ignore %.*s for " + "recovery as it is being freed\n", + res->lockname.len, + res->lockname.name); + } else + dlm_move_lockres_to_recovery_list(dlm, + res); - dlm_move_lockres_to_recovery_list(dlm, res); } else if (res->owner == dlm->node_num) { dlm_free_dead_locks(dlm, res, dead_node); __dlm_lockres_calc_usage(dlm, res); diff --git a/fs/ocfs2/dlm/dlmthread.c b/fs/ocfs2/dlm/dlmthread.c index dd78ca3..2211acf 100644 --- a/fs/ocfs2/dlm/dlmthread.c +++ b/fs/ocfs2/dlm/dlmthread.c @@ -92,19 +92,27 @@ int __dlm_lockres_has_locks(struct dlm_lock_resource *res) * truly ready to be freed. */ int __dlm_lockres_unused(struct dlm_lock_resource *res) { - if (!__dlm_lockres_has_locks(res) && - (list_empty(&res->dirty) && !(res->state & DLM_LOCK_RES_DIRTY))) { - /* try not to scan the bitmap unless the first two - * conditions are already true */ - int bit = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); - if (bit >= O2NM_MAX_NODES) { - /* since the bit for dlm->node_num is not - * set, inflight_locks better be zero */ - BUG_ON(res->inflight_locks != 0); - return 1; - } - } - return 0; + int bit; + + if (__dlm_lockres_has_locks(res)) + return 0; + + if (!list_empty(&res->dirty) || res->state & DLM_LOCK_RES_DIRTY) + return 0; + + if (res->state & DLM_LOCK_RES_RECOVERING) + return 0; + + bit = find_next_bit(res->refmap, O2NM_MAX_NODES, 0); + if (bit < O2NM_MAX_NODES) + return 0; + + /* + * since the bit for dlm->node_num is not set, inflight_locks better + * be zero + */ + BUG_ON(res->inflight_locks != 0); + return 1; } -- cgit v1.1 From b11f1f1ab73fd358b1b734a9427744802202ba68 Mon Sep 17 00:00:00 2001 From: Wengang Wang Date: Fri, 30 Jul 2010 23:18:00 +0800 Subject: ocfs2/dlm: remove potential deadlock -V3 When we need to take both dlm_domain_lock and dlm->spinlock, we should take them in order of: dlm_domain_lock then dlm->spinlock. There is pathes disobey this order. That is calling dlm_lockres_put() with dlm->spinlock held in dlm_run_purge_list. dlm_lockres_put() calls dlm_put() at the ref and dlm_put() locks on dlm_domain_lock. Fix: Don't grab/put the dlm when the initialising/releasing lockres. That grab is not required because we don't call dlm_unregister_domain() based on refcount. Signed-off-by: Wengang Wang Cc: stable@kernel.org Signed-off-by: Joel Becker --- fs/ocfs2/dlm/dlmmaster.c | 4 ---- 1 file changed, 4 deletions(-) (limited to 'fs/ocfs2/dlm') diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index 6d098b8..ffb4c68 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -511,8 +511,6 @@ static void dlm_lockres_release(struct kref *kref) atomic_dec(&dlm->res_cur_count); - dlm_put(dlm); - if (!hlist_unhashed(&res->hash_node) || !list_empty(&res->granted) || !list_empty(&res->converting) || @@ -585,8 +583,6 @@ static void dlm_init_lockres(struct dlm_ctxt *dlm, res->migration_pending = 0; res->inflight_locks = 0; - /* put in dlm_lockres_release */ - dlm_grab(dlm); res->dlm = dlm; kref_init(&res->refs); -- cgit v1.1