From 1a5c4e2a0e339a01fdfbd519ba664d8efdc8d702 Mon Sep 17 00:00:00 2001 From: Fabian Frederick Date: Wed, 4 Jun 2014 16:06:06 -0700 Subject: ocfs2: remove NULL assignments on static Static values are automatically initialized to NULL. Signed-off-by: Fabian Frederick Cc: Joel Becker Cc: Mark Fasheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/dlm/dlmdebug.c | 2 +- fs/ocfs2/dlm/dlmlock.c | 2 +- fs/ocfs2/dlm/dlmmaster.c | 6 +++--- 3 files changed, 5 insertions(+), 5 deletions(-) (limited to 'fs/ocfs2/dlm') diff --git a/fs/ocfs2/dlm/dlmdebug.c b/fs/ocfs2/dlm/dlmdebug.c index e33cd7a..18f13c2 100644 --- a/fs/ocfs2/dlm/dlmdebug.c +++ b/fs/ocfs2/dlm/dlmdebug.c @@ -338,7 +338,7 @@ void dlm_print_one_mle(struct dlm_master_list_entry *mle) #ifdef CONFIG_DEBUG_FS -static struct dentry *dlm_debugfs_root = NULL; +static struct dentry *dlm_debugfs_root; #define DLM_DEBUGFS_DIR "o2dlm" #define DLM_DEBUGFS_DLM_STATE "dlm_state" diff --git a/fs/ocfs2/dlm/dlmlock.c b/fs/ocfs2/dlm/dlmlock.c index 5d32f75..66c2a49 100644 --- a/fs/ocfs2/dlm/dlmlock.c +++ b/fs/ocfs2/dlm/dlmlock.c @@ -52,7 +52,7 @@ #define MLOG_MASK_PREFIX ML_DLM #include "cluster/masklog.h" -static struct kmem_cache *dlm_lock_cache = NULL; +static struct kmem_cache *dlm_lock_cache; static DEFINE_SPINLOCK(dlm_cookie_lock); static u64 dlm_next_cookie = 1; diff --git a/fs/ocfs2/dlm/dlmmaster.c b/fs/ocfs2/dlm/dlmmaster.c index ee1f884..3087a21 100644 --- a/fs/ocfs2/dlm/dlmmaster.c +++ b/fs/ocfs2/dlm/dlmmaster.c @@ -82,9 +82,9 @@ static inline int dlm_mle_equal(struct dlm_ctxt *dlm, return 1; } -static struct kmem_cache *dlm_lockres_cache = NULL; -static struct kmem_cache *dlm_lockname_cache = NULL; -static struct kmem_cache *dlm_mle_cache = NULL; +static struct kmem_cache *dlm_lockres_cache; +static struct kmem_cache *dlm_lockname_cache; +static struct kmem_cache *dlm_mle_cache; static void dlm_mle_release(struct kref *kref); static void dlm_init_mle(struct dlm_master_list_entry *mle, -- cgit v1.1 From 6718cb5e0e75faa2b938dc1ee247fbd70136ccd7 Mon Sep 17 00:00:00 2001 From: Xue jiufei Date: Wed, 4 Jun 2014 16:06:10 -0700 Subject: ocfs2/dlm: fix possible convert=sion deadlock We found there is a conversion deadlock when the owner of lockres happened to crash before send DLM_PROXY_AST_MSG for a downconverting lock. The situation is as follows: Node1 Node2 Node3 the owner of lockresA lock_1 granted at EX mode and call ocfs2_cluster_unlock to decrease ex_holders. converting lock_3 from NL to EX send DLM_PROXY_AST_MSG to Node1, asking Node 1 to downconvert. receiving DLM_PROXY_AST_MSG, thread ocfs2dc send DLM_CONVERT_LOCK_MSG to Node2 to downconvert lock_1(EX->NL). lock_1 can be granted and put it into pending_asts list, return DLM_NORMAL. then something happened and Node2 crashed. received DLM_NORMAL, waiting for DLM_PROXY_AST_MSG. selected as the recovery master, receving migrate lock from Node1, queue lock_1 to the tail of converting list. After dlm recovery, converting list in the master of lockresA(Node3) will be: converting list head <-> lock_3(NL->EX) <->lock_1(EX<->NL). Requested mode of lock_3 is not compatible with the granted mode of lock_1, so it can not be granted. and lock_1 can not downconvert because covnerting queue is strictly FIFO. So a deadlock is created. We think function dlm_process_recovery_data() should queue_ast for lock_1 or alter the order of lock_1 and lock_3, so dlm_thread can process lock_1 first. And if there are multiple downconverting locks, they must convert form PR to NL, so no need to sort them. Signed-off-by: joyce.xue Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/dlm/dlmrecovery.c | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) (limited to 'fs/ocfs2/dlm') diff --git a/fs/ocfs2/dlm/dlmrecovery.c b/fs/ocfs2/dlm/dlmrecovery.c index fe29f79..5de0194 100644 --- a/fs/ocfs2/dlm/dlmrecovery.c +++ b/fs/ocfs2/dlm/dlmrecovery.c @@ -1986,7 +1986,15 @@ skip_lvb: } if (!bad) { dlm_lock_get(newlock); - list_add_tail(&newlock->list, queue); + if (mres->flags & DLM_MRES_RECOVERY && + ml->list == DLM_CONVERTING_LIST && + newlock->ml.type > + newlock->ml.convert_type) { + /* newlock is doing downconvert, add it to the + * head of converting list */ + list_add(&newlock->list, queue); + } else + list_add_tail(&newlock->list, queue); mlog(0, "%s:%.*s: added lock for node %u, " "setting refmap bit\n", dlm->name, res->lockname.len, res->lockname.name, ml->node); -- cgit v1.1 From 01c6222f876062355599e5a63560c514b6de25d2 Mon Sep 17 00:00:00 2001 From: Xue jiufei Date: Wed, 4 Jun 2014 16:06:14 -0700 Subject: ocfs2/dlm: disallow node joining when recovery is on going We found a race situation when dlm recovery and node joining occurs simultaneously if the network state is bad. N1 N4 start joining dlm and send query join to all live nodes set joining node to N1, return OK send query join to other live nodes and it may take a while call dlm_send_join_assert() to send assert join message when N2 is down, so keep trying to send message to N2 until find N2 is down send assert join message to N3, but connection is down with N3, so it may take a while become the recovery master for N2 and send begin reco message to other nodes in domain map but no N1 connection with N3 is rebuild, then send assert join to N4 call dlm_assert_joined_handler(), add N1 to domain_map dlm recovery done, send finalize message to nodes in domain map, including N1 receiving finalize message, trigger the BUG() because recovery master mismatch. Signed-off-by: joyce.xue Cc: Mark Fasheh Cc: Joel Becker Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/dlm/dlmdomain.c | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) (limited to 'fs/ocfs2/dlm') diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index c973690..8d46907 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -959,6 +959,14 @@ static int dlm_assert_joined_handler(struct o2net_msg *msg, u32 len, void *data, * domain. Set him in the map and clean up our * leftover join state. */ BUG_ON(dlm->joining_node != assert->node_idx); + + if (dlm->reco.state & DLM_RECO_STATE_ACTIVE) { + mlog(0, "dlm recovery is ongoing, disallow join\n"); + spin_unlock(&dlm->spinlock); + spin_unlock(&dlm_domain_lock); + return -EAGAIN; + } + set_bit(assert->node_idx, dlm->domain_map); clear_bit(assert->node_idx, dlm->exit_domain_map); __dlm_set_joining_node(dlm, DLM_LOCK_RES_OWNER_UNKNOWN); @@ -1517,6 +1525,7 @@ static int dlm_send_one_join_assert(struct dlm_ctxt *dlm, unsigned int node) { int status; + int ret; struct dlm_assert_joined assert_msg; mlog(0, "Sending join assert to node %u\n", node); @@ -1528,11 +1537,13 @@ static int dlm_send_one_join_assert(struct dlm_ctxt *dlm, status = o2net_send_message(DLM_ASSERT_JOINED_MSG, DLM_MOD_KEY, &assert_msg, sizeof(assert_msg), node, - NULL); + &ret); if (status < 0) mlog(ML_ERROR, "Error %d when sending message %u (key 0x%x) to " "node %u\n", status, DLM_ASSERT_JOINED_MSG, DLM_MOD_KEY, node); + else + status = ret; return status; } -- cgit v1.1 From e72db989e1c01fde28aabf7fd29faeaa08538e24 Mon Sep 17 00:00:00 2001 From: Xue jiufei Date: Wed, 4 Jun 2014 16:06:24 -0700 Subject: ocfs2: remove some unused code dlm_recovery_ctxt.received is unused. ocfs2_should_refresh_lock_res() can only return 0 or 1, so the error handling code in ocfs2_super_lock() is unneeded. Signed-off-by: joyce.xue Cc: Joel Becker Cc: Mark Fasheh Signed-off-by: Andrew Morton Signed-off-by: Linus Torvalds --- fs/ocfs2/dlm/dlmcommon.h | 1 - fs/ocfs2/dlm/dlmdomain.c | 1 - 2 files changed, 2 deletions(-) (limited to 'fs/ocfs2/dlm') diff --git a/fs/ocfs2/dlm/dlmcommon.h b/fs/ocfs2/dlm/dlmcommon.h index e051776..a106b3f 100644 --- a/fs/ocfs2/dlm/dlmcommon.h +++ b/fs/ocfs2/dlm/dlmcommon.h @@ -108,7 +108,6 @@ static inline int dlm_is_recovery_lock(const char *lock_name, int name_len) struct dlm_recovery_ctxt { struct list_head resources; - struct list_head received; struct list_head node_data; u8 new_master; u8 dead_node; diff --git a/fs/ocfs2/dlm/dlmdomain.c b/fs/ocfs2/dlm/dlmdomain.c index 8d46907..39efc505 100644 --- a/fs/ocfs2/dlm/dlmdomain.c +++ b/fs/ocfs2/dlm/dlmdomain.c @@ -2034,7 +2034,6 @@ static struct dlm_ctxt *dlm_alloc_ctxt(const char *domain, INIT_LIST_HEAD(&dlm->list); INIT_LIST_HEAD(&dlm->dirty_list); INIT_LIST_HEAD(&dlm->reco.resources); - INIT_LIST_HEAD(&dlm->reco.received); INIT_LIST_HEAD(&dlm->reco.node_data); INIT_LIST_HEAD(&dlm->purge_list); INIT_LIST_HEAD(&dlm->dlm_domain_handlers); -- cgit v1.1