7 files changed, 171 insertions, 95 deletions
diff --git a/drivers/staging/lustre/lustre/ldlm/interval_tree.c b/drivers/staging/lustre/lustre/ldlm/interval_tree.c
index e134ecd..e106902 100644
--- a/drivers/staging/lustre/lustre/ldlm/interval_tree.c
+++ b/drivers/staging/lustre/lustre/ldlm/interval_tree.c
@@ -101,11 +101,6 @@ static inline int node_equal(struct interval_node *n1, struct interval_node *n2)
 	return extent_equal(&n1->in_extent, &n2->in_extent);
 }
 
-static inline __u64 max_u64(__u64 x, __u64 y)
-{
-	return x > y ? x : y;
-}
-
 static struct interval_node *interval_first(struct interval_node *node)
 {
 	if (!node)
@@ -134,8 +129,8 @@ static void __rotate_change_maxhigh(struct interval_node *node,
 	rotate->in_max_high = node->in_max_high;
 	left_max = node->in_left ? node->in_left->in_max_high : 0;
 	right_max = node->in_right ? node->in_right->in_max_high : 0;
-	node->in_max_high  = max_u64(interval_high(node),
-				     max_u64(left_max, right_max));
+	node->in_max_high  = max(interval_high(node),
+				 max(left_max, right_max));
 }
 
 /* The left rotation "pivots" around the link from node to node->right, and
@@ -394,8 +389,8 @@ static void update_maxhigh(struct interval_node *node,
 	while (node) {
 		left_max = node->in_left ? node->in_left->in_max_high : 0;
 		right_max = node->in_right ? node->in_right->in_max_high : 0;
-		node->in_max_high = max_u64(interval_high(node),
-					    max_u64(left_max, right_max));
+		node->in_max_high = max(interval_high(node),
+					max(left_max, right_max));
 
 		if (node->in_max_high >= old_maxhigh)
 			break;
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h b/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h
index 5c02501..5d24b48 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_internal.h
@@ -108,9 +108,7 @@ extern unsigned int ldlm_cancel_unused_locks_before_replay;
 
 /* ldlm_resource.c */
 int ldlm_resource_putref_locked(struct ldlm_resource *res);
-void ldlm_namespace_free_prior(struct ldlm_namespace *ns,
-			       struct obd_import *imp, int force);
-void ldlm_namespace_free_post(struct ldlm_namespace *ns);
+
 /* ldlm_lock.c */
 
 struct ldlm_cb_set_arg {
@@ -156,6 +154,7 @@ int ldlm_bl_to_thread_list(struct ldlm_namespace *ns,
 			   struct ldlm_lock_desc *ld,
 			   struct list_head *cancels, int count,
 			   enum ldlm_cancel_flags cancel_flags);
+int ldlm_bl_thread_wakeup(void);
 
 void ldlm_handle_bl_callback(struct ldlm_namespace *ns,
 			     struct ldlm_lock_desc *ld, struct ldlm_lock *lock);
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c
index 5a94265..ddb4642 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lock.c
@@ -435,7 +435,6 @@ static struct ldlm_lock *ldlm_lock_new(struct ldlm_resource *resource)
 	lock->l_exp_refs_nr = 0;
 	lock->l_exp_refs_target = NULL;
 #endif
-	INIT_LIST_HEAD(&lock->l_exp_list);
 
 	return lock;
 }
@@ -771,19 +770,11 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, enum ldlm_mode mode)
 
 	ldlm_lock_decref_internal_nolock(lock, mode);
 
-	if (ldlm_is_local(lock) &&
+	if ((ldlm_is_local(lock) || lock->l_req_mode == LCK_GROUP) &&
 	    !lock->l_readers && !lock->l_writers) {
 		/* If this is a local lock on a server namespace and this was
 		 * the last reference, cancel the lock.
-		 */
-		CDEBUG(D_INFO, "forcing cancel of local lock\n");
-		ldlm_set_cbpending(lock);
-	}
-
-	if (!lock->l_readers && !lock->l_writers &&
-	    (ldlm_is_cbpending(lock) || lock->l_req_mode == LCK_GROUP)) {
-		/* If we received a blocked AST and this was the last reference,
-		 * run the callback.
+		 *
 		 * Group locks are special:
 		 * They must not go in LRU, but they are not called back
 		 * like non-group locks, instead they are manually released.
@@ -791,6 +782,13 @@ void ldlm_lock_decref_internal(struct ldlm_lock *lock, enum ldlm_mode mode)
 		 * they are manually released, so we remove them when they have
 		 * no more reader or writer references. - LU-6368
 		 */
+		ldlm_set_cbpending(lock);
+	}
+
+	if (!lock->l_readers && !lock->l_writers && ldlm_is_cbpending(lock)) {
+		/* If we received a blocked AST and this was the last reference,
+		 * run the callback.
+		 */
 		LDLM_DEBUG(lock, "final decref done on cbpending lock");
 
 		LDLM_LOCK_GET(lock); /* dropped by bl thread */
@@ -1882,6 +1880,19 @@ out:
 	return rc;
 }
 
+static bool is_bl_done(struct ldlm_lock *lock)
+{
+	bool bl_done = true;
+
+	if (!ldlm_is_bl_done(lock)) {
+		lock_res_and_lock(lock);
+		bl_done = ldlm_is_bl_done(lock);
+		unlock_res_and_lock(lock);
+	}
+
+	return bl_done;
+}
+
 /**
  * Helper function to call blocking AST for LDLM lock \a lock in a
  * "cancelling" mode.
@@ -1899,8 +1910,20 @@ void ldlm_cancel_callback(struct ldlm_lock *lock)
 		} else {
 			LDLM_DEBUG(lock, "no blocking ast");
 		}
+		/* only canceller can set bl_done bit */
+		ldlm_set_bl_done(lock);
+		wake_up_all(&lock->l_waitq);
+	} else if (!ldlm_is_bl_done(lock)) {
+		struct l_wait_info lwi = { 0 };
+
+		/*
+		 * The lock is guaranteed to have been canceled once
+		 * returning from this function.
+		 */
+		unlock_res_and_lock(lock);
+		l_wait_event(lock->l_waitq, is_bl_done(lock), &lwi);
+		lock_res_and_lock(lock);
 	}
-	ldlm_set_bl_done(lock);
 }
 
 /**
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c b/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c
index 12647af..6f9d540 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_lockd.c
@@ -454,6 +454,12 @@ int ldlm_bl_to_thread_list(struct ldlm_namespace *ns, struct ldlm_lock_desc *ld,
 	return ldlm_bl_to_thread(ns, ld, NULL, cancels, count, cancel_flags);
 }
 
+int ldlm_bl_thread_wakeup(void)
+{
+	wake_up(&ldlm_state->ldlm_bl_pool->blp_waitq);
+	return 0;
+}
+
 /* Setinfo coming from Server (eg MDT) to Client (eg MDC)! */
 static int ldlm_handle_setinfo(struct ptlrpc_request *req)
 {
@@ -675,8 +681,11 @@ static int ldlm_callback_handler(struct ptlrpc_request *req)
 	return 0;
 }
 
-static struct ldlm_bl_work_item *ldlm_bl_get_work(struct ldlm_bl_pool *blp)
+static int ldlm_bl_get_work(struct ldlm_bl_pool *blp,
+			    struct ldlm_bl_work_item **p_blwi,
+			    struct obd_export **p_exp)
 {
+	int num_th = atomic_read(&blp->blp_num_threads);
 	struct ldlm_bl_work_item *blwi = NULL;
 	static unsigned int num_bl;
 
@@ -693,18 +702,18 @@ static struct ldlm_bl_work_item *ldlm_bl_get_work(struct ldlm_bl_pool *blp)
 					  blwi_entry);
 
 	if (blwi) {
-		if (++num_bl >= atomic_read(&blp->blp_num_threads))
+		if (++num_bl >= num_th)
 			num_bl = 0;
 		list_del(&blwi->blwi_entry);
 	}
 	spin_unlock(&blp->blp_lock);
+	*p_blwi = blwi;
 
-	return blwi;
+	return (*p_blwi || *p_exp) ? 1 : 0;
 }
 
 /* This only contains temporary data until the thread starts */
 struct ldlm_bl_thread_data {
-	char			bltd_name[CFS_CURPROC_COMM_MAX];
 	struct ldlm_bl_pool	*bltd_blp;
 	struct completion	bltd_comp;
 	int			bltd_num;
@@ -712,19 +721,32 @@ struct ldlm_bl_thread_data {
 
 static int ldlm_bl_thread_main(void *arg);
 
-static int ldlm_bl_thread_start(struct ldlm_bl_pool *blp)
+static int ldlm_bl_thread_start(struct ldlm_bl_pool *blp, bool check_busy)
 {
 	struct ldlm_bl_thread_data bltd = { .bltd_blp = blp };
 	struct task_struct *task;
 
 	init_completion(&bltd.bltd_comp);
-	bltd.bltd_num = atomic_read(&blp->blp_num_threads);
-	snprintf(bltd.bltd_name, sizeof(bltd.bltd_name),
-		 "ldlm_bl_%02d", bltd.bltd_num);
-	task = kthread_run(ldlm_bl_thread_main, &bltd, "%s", bltd.bltd_name);
+
+	bltd.bltd_num = atomic_inc_return(&blp->blp_num_threads);
+	if (bltd.bltd_num >= blp->blp_max_threads) {
+		atomic_dec(&blp->blp_num_threads);
+		return 0;
+	}
+
+	LASSERTF(bltd.bltd_num > 0, "thread num:%d\n", bltd.bltd_num);
+	if (check_busy &&
+	    atomic_read(&blp->blp_busy_threads) < (bltd.bltd_num - 1)) {
+		atomic_dec(&blp->blp_num_threads);
+		return 0;
+	}
+
+	task = kthread_run(ldlm_bl_thread_main, &bltd, "ldlm_bl_%02d",
+			   bltd.bltd_num);
 	if (IS_ERR(task)) {
 		CERROR("cannot start LDLM thread ldlm_bl_%02d: rc %ld\n",
-		       atomic_read(&blp->blp_num_threads), PTR_ERR(task));
+		       bltd.bltd_num, PTR_ERR(task));
+		atomic_dec(&blp->blp_num_threads);
 		return PTR_ERR(task);
 	}
 	wait_for_completion(&bltd.bltd_comp);
@@ -732,6 +754,64 @@ static int ldlm_bl_thread_start(struct ldlm_bl_pool *blp)
 	return 0;
 }
 
+/* Not fatal if racy and have a few too many threads */
+static int ldlm_bl_thread_need_create(struct ldlm_bl_pool *blp,
+				      struct ldlm_bl_work_item *blwi)
+{
+	if (atomic_read(&blp->blp_num_threads) >= blp->blp_max_threads)
+		return 0;
+
+	if (atomic_read(&blp->blp_busy_threads) <
+	    atomic_read(&blp->blp_num_threads))
+		return 0;
+
+	if (blwi && (!blwi->blwi_ns || blwi->blwi_mem_pressure))
+		return 0;
+
+	return 1;
+}
+
+static int ldlm_bl_thread_blwi(struct ldlm_bl_pool *blp,
+			       struct ldlm_bl_work_item *blwi)
+{
+	if (!blwi->blwi_ns)
+		/* added by ldlm_cleanup() */
+		return LDLM_ITER_STOP;
+
+	if (blwi->blwi_mem_pressure)
+		memory_pressure_set();
+
+	OBD_FAIL_TIMEOUT(OBD_FAIL_LDLM_PAUSE_CANCEL2, 4);
+
+	if (blwi->blwi_count) {
+		int count;
+
+		/*
+		 * The special case when we cancel locks in lru
+		 * asynchronously, we pass the list of locks here.
+		 * Thus locks are marked LDLM_FL_CANCELING, but NOT
+		 * canceled locally yet.
+		 */
+		count = ldlm_cli_cancel_list_local(&blwi->blwi_head,
+						   blwi->blwi_count,
+						   LCF_BL_AST);
+		ldlm_cli_cancel_list(&blwi->blwi_head, count, NULL,
+				     blwi->blwi_flags);
+	} else {
+		ldlm_handle_bl_callback(blwi->blwi_ns, &blwi->blwi_ld,
+					blwi->blwi_lock);
+	}
+	if (blwi->blwi_mem_pressure)
+		memory_pressure_clr();
+
+	if (blwi->blwi_flags & LCF_ASYNC)
+		kfree(blwi);
+	else
+		complete(&blwi->blwi_comp);
+
+	return 0;
+}
+
 /**
  * Main blocking requests processing thread.
  *
@@ -742,76 +822,40 @@ static int ldlm_bl_thread_start(struct ldlm_bl_pool *blp)
 static int ldlm_bl_thread_main(void *arg)
 {
 	struct ldlm_bl_pool *blp;
+	struct ldlm_bl_thread_data *bltd = arg;
 
-	{
-		struct ldlm_bl_thread_data *bltd = arg;
-
-		blp = bltd->bltd_blp;
-
-		atomic_inc(&blp->blp_num_threads);
-		atomic_inc(&blp->blp_busy_threads);
+	blp = bltd->bltd_blp;
 
-		complete(&bltd->bltd_comp);
-		/* cannot use bltd after this, it is only on caller's stack */
-	}
+	complete(&bltd->bltd_comp);
+	/* cannot use bltd after this, it is only on caller's stack */
 
 	while (1) {
 		struct l_wait_info lwi = { 0 };
 		struct ldlm_bl_work_item *blwi = NULL;
-		int busy;
-
-		blwi = ldlm_bl_get_work(blp);
+		struct obd_export *exp = NULL;
+		int rc;
 
-		if (!blwi) {
-			atomic_dec(&blp->blp_busy_threads);
+		rc = ldlm_bl_get_work(blp, &blwi, &exp);
+		if (!rc)
 			l_wait_event_exclusive(blp->blp_waitq,
-					       (blwi = ldlm_bl_get_work(blp)),
+					       ldlm_bl_get_work(blp, &blwi,
+								&exp),
 					       &lwi);
-			busy = atomic_inc_return(&blp->blp_busy_threads);
-		} else {
-			busy = atomic_read(&blp->blp_busy_threads);
-		}
-
-		if (!blwi->blwi_ns)
-			/* added by ldlm_cleanup() */
-			break;
+		atomic_inc(&blp->blp_busy_threads);
 
-		/* Not fatal if racy and have a few too many threads */
-		if (unlikely(busy < blp->blp_max_threads &&
-			     busy >= atomic_read(&blp->blp_num_threads) &&
-			     !blwi->blwi_mem_pressure))
+		if (ldlm_bl_thread_need_create(blp, blwi))
 			/* discard the return value, we tried */
-			ldlm_bl_thread_start(blp);
-
-		if (blwi->blwi_mem_pressure)
-			memory_pressure_set();
-
-		if (blwi->blwi_count) {
-			int count;
-			/* The special case when we cancel locks in LRU
-			 * asynchronously, we pass the list of locks here.
-			 * Thus locks are marked LDLM_FL_CANCELING, but NOT
-			 * canceled locally yet.
-			 */
-			count = ldlm_cli_cancel_list_local(&blwi->blwi_head,
-							   blwi->blwi_count,
-							   LCF_BL_AST);
-			ldlm_cli_cancel_list(&blwi->blwi_head, count, NULL,
-					     blwi->blwi_flags);
-		} else {
-			ldlm_handle_bl_callback(blwi->blwi_ns, &blwi->blwi_ld,
-						blwi->blwi_lock);
-		}
-		if (blwi->blwi_mem_pressure)
-			memory_pressure_clr();
+			ldlm_bl_thread_start(blp, true);
 
-		if (blwi->blwi_flags & LCF_ASYNC)
-			kfree(blwi);
-		else
-			complete(&blwi->blwi_comp);
+		if (blwi)
+			rc = ldlm_bl_thread_blwi(blp, blwi);
+
+		atomic_dec(&blp->blp_busy_threads);
+
+		if (rc == LDLM_ITER_STOP)
+			break;
 	}
 
-	atomic_dec(&blp->blp_busy_threads);
 	atomic_dec(&blp->blp_num_threads);
 	complete(&blp->blp_comp);
 	return 0;
@@ -991,7 +1035,7 @@ static int ldlm_setup(void)
 	}
 
 	for (i = 0; i < blp->blp_min_threads; i++) {
-		rc = ldlm_bl_thread_start(blp);
+		rc = ldlm_bl_thread_start(blp, false);
 		if (rc < 0)
 			goto out;
 	}
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
index 8dfb3c8..cf3fc57 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_pool.c
@@ -900,8 +900,9 @@ static int ldlm_pools_recalc(enum ldlm_side client)
 {
 	struct ldlm_namespace *ns;
 	struct ldlm_namespace *ns_old = NULL;
+	/* seconds of sleep if no active namespaces */
+	int time = LDLM_POOL_CLI_DEF_RECALC_PERIOD;
 	int nr;
-	int time = 50; /* seconds of sleep if no active namespaces */
 
 	/*
 	 * Recalc at least ldlm_namespace_nr_read(client) namespaces.
@@ -974,6 +975,10 @@ static int ldlm_pools_recalc(enum ldlm_side client)
 			ldlm_namespace_put(ns);
 		}
 	}
+
+	/* Wake up the blocking threads from time to time. */
+	ldlm_bl_thread_wakeup();
+
 	return time;
 }
 
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_request.c b/drivers/staging/lustre/lustre/ldlm/ldlm_request.c
index ebfda36..84eeaa5 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_request.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_request.c
@@ -1029,13 +1029,23 @@ int ldlm_cli_cancel(const struct lustre_handle *lockh,
 	struct ldlm_lock *lock;
 	LIST_HEAD(cancels);
 
-	/* concurrent cancels on the same handle can happen */
-	lock = ldlm_handle2lock_long(lockh, LDLM_FL_CANCELING);
+	lock = ldlm_handle2lock_long(lockh, 0);
 	if (!lock) {
 		LDLM_DEBUG_NOLOCK("lock is already being destroyed");
 		return 0;
 	}
 
+	lock_res_and_lock(lock);
+	/* Lock is being canceled and the caller doesn't want to wait */
+	if (ldlm_is_canceling(lock) && (cancel_flags & LCF_ASYNC)) {
+		unlock_res_and_lock(lock);
+		LDLM_LOCK_RELEASE(lock);
+		return 0;
+	}
+
+	ldlm_set_canceling(lock);
+	unlock_res_and_lock(lock);
+
 	rc = ldlm_cli_cancel_local(lock);
 	if (rc == LDLM_FL_LOCAL_ONLY || cancel_flags & LCF_LOCAL) {
 		LDLM_LOCK_RELEASE(lock);
diff --git a/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c b/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c
index d16f5e9..633f65b 100644
--- a/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c
+++ b/drivers/staging/lustre/lustre/ldlm/ldlm_resource.c
@@ -806,7 +806,7 @@ static void cleanup_resource(struct ldlm_resource *res, struct list_head *q,
 
 		unlock_res(res);
 		ldlm_lock2handle(lock, &lockh);
-		rc = ldlm_cli_cancel(&lockh, LCF_ASYNC);
+		rc = ldlm_cli_cancel(&lockh, LCF_LOCAL);
 		if (rc)
 			CERROR("ldlm_cli_cancel: %d\n", rc);
 		LDLM_LOCK_RELEASE(lock);