MFC r260097:

Move most of NFS file handle affinity code out of the heavily congested global RPC thread pool lock and protect it with own set of locks. On synthetic benchmarks this improves peak NFS request rate by 40%.
author: mav <mav@FreeBSD.org> 2014-01-22 23:52:20 +0000
committer: mav <mav@FreeBSD.org> 2014-01-22 23:52:20 +0000
commit: 9b0c44b0a6b411d66547ee6ce611289ef831a73f (patch)
tree: b0d6da9226cd6929781db9747f22168d51987334 /sys/nfs
parent: 695b5b9179f2f608ff6042914c460cc75dc11519 (diff)
download: FreeBSD-src-9b0c44b0a6b411d66547ee6ce611289ef831a73f.zip
FreeBSD-src-9b0c44b0a6b411d66547ee6ce611289ef831a73f.tar.gz
2 files changed, 76 insertions, 76 deletions
diff --git a/sys/nfs/nfs_fha.c b/sys/nfs/nfs_fha.c
index 1892729..2b29421 100644
--- a/sys/nfs/nfs_fha.c
+++ b/sys/nfs/nfs_fha.c
@@ -52,13 +52,10 @@ void
 fha_init(struct fha_params *softc)
 {
 	char tmpstr[128];
+	int i;
 
-	/*
-	 * A small hash table to map filehandles to fha_hash_entry
-	 * structures.
-	 */
-	softc->g_fha.hashtable = hashinit(256, M_NFS_FHA,
-	    &softc->g_fha.hashmask);
+	for (i = 0; i < FHA_HASH_SIZE; i++)
+		mtx_init(&softc->fha_hash[i].mtx, "fhalock", NULL, MTX_DEF);
 
 	/*
 	 * Set the default tuning parameters.
@@ -117,8 +114,11 @@ fha_init(struct fha_params *softc)
 void
 fha_uninit(struct fha_params *softc)
 {
+	int i;
+
 	sysctl_ctx_free(&softc->sysctl_ctx);
-	hashdestroy(softc->g_fha.hashtable, M_NFS_FHA, softc->g_fha.hashmask);
+	for (i = 0; i < FHA_HASH_SIZE; i++)
+		mtx_destroy(&softc->fha_hash[i].mtx);
 }
 
 /*
@@ -207,8 +207,13 @@ static void
 fha_hash_entry_destroy(struct fha_hash_entry *e)
 {
 
-	if (e->num_rw + e->num_exclusive)
-		panic("nonempty fhe");
+	mtx_assert(e->mtx, MA_OWNED);
+	KASSERT(e->num_rw == 0,
+	    ("%d reqs on destroyed fhe %p", e->num_rw, e));
+	KASSERT(e->num_exclusive == 0,
+	    ("%d exclusive reqs on destroyed fhe %p", e->num_exclusive, e));
+	KASSERT(e->num_threads == 0,
+	    ("%d threads on destroyed fhe %p", e->num_threads, e));
 	free(e, M_NFS_FHA);
 }
 
@@ -216,6 +221,7 @@ static void
 fha_hash_entry_remove(struct fha_hash_entry *e)
 {
 
+	mtx_assert(e->mtx, MA_OWNED);
 	LIST_REMOVE(e, link);
 	fha_hash_entry_destroy(e);
 }
@@ -224,36 +230,22 @@ static struct fha_hash_entry *
 fha_hash_entry_lookup(struct fha_params *softc, u_int64_t fh)
 {
 	SVCPOOL *pool;
-
-	pool = *softc->pool;
-
+	struct fha_hash_slot *fhs;
 	struct fha_hash_entry *fhe, *new_fhe;
 
-	LIST_FOREACH(fhe, &softc->g_fha.hashtable[fh % softc->g_fha.hashmask],
-	    link)
+	pool = *softc->pool;
+	fhs = &softc->fha_hash[fh % FHA_HASH_SIZE];
+	new_fhe = fha_hash_entry_new(fh);
+	new_fhe->mtx = &fhs->mtx;
+	mtx_lock(&fhs->mtx);
+	LIST_FOREACH(fhe, &fhs->list, link)
 		if (fhe->fh == fh)
 			break;
-
 	if (!fhe) {
-		/* Allocate a new entry. */
-		mtx_unlock(&pool->sp_lock);
-		new_fhe = fha_hash_entry_new(fh);
-		mtx_lock(&pool->sp_lock);
-
-		/* Double-check to make sure we still need the new entry. */
-		LIST_FOREACH(fhe,
-		    &softc->g_fha.hashtable[fh % softc->g_fha.hashmask], link)
-			if (fhe->fh == fh)
-				break;
-		if (!fhe) {
-			fhe = new_fhe;
-			LIST_INSERT_HEAD(
-			    &softc->g_fha.hashtable[fh % softc->g_fha.hashmask],
-			    fhe, link);
-		} else
-			fha_hash_entry_destroy(new_fhe);
-	}
-
+		fhe = new_fhe;
+		LIST_INSERT_HEAD(&fhs->list, fhe, link);
+	} else
+		fha_hash_entry_destroy(new_fhe);
 	return (fhe);
 }
 
@@ -261,6 +253,8 @@ static void
 fha_hash_entry_add_thread(struct fha_hash_entry *fhe, SVCTHREAD *thread)
 {
 
+	mtx_assert(fhe->mtx, MA_OWNED);
+	thread->st_p2 = 0;
 	LIST_INSERT_HEAD(&fhe->threads, thread, st_alink);
 	fhe->num_threads++;
 }
@@ -269,6 +263,9 @@ static void
 fha_hash_entry_remove_thread(struct fha_hash_entry *fhe, SVCTHREAD *thread)
 {
 
+	mtx_assert(fhe->mtx, MA_OWNED);
+	KASSERT(thread->st_p2 == 0,
+	    ("%d reqs on removed thread %p", thread->st_p2, thread));
 	LIST_REMOVE(thread, st_alink);
 	fhe->num_threads--;
 }
@@ -280,6 +277,7 @@ static void
 fha_hash_entry_add_op(struct fha_hash_entry *fhe, int locktype, int count)
 {
 
+	mtx_assert(fhe->mtx, MA_OWNED);
 	if (LK_EXCLUSIVE == locktype)
 		fhe->num_exclusive += count;
 	else
@@ -306,7 +304,7 @@ fha_hash_entry_choose_thread(struct fha_params *softc,
 	pool = *softc->pool;
 
 	LIST_FOREACH(thread, &fhe->threads, st_alink) {
-		req_count = thread->st_reqcount;
+		req_count = thread->st_p2;
 
 		/* If there are any writes in progress, use the first thread. */
 		if (fhe->num_exclusive) {
@@ -322,7 +320,7 @@ fha_hash_entry_choose_thread(struct fha_params *softc,
 		 * exceed our per-thread load limit in the process.
 		 */
 		offset1 = i->offset;
-		offset2 = STAILQ_FIRST(&thread->st_reqs)->rq_p3;
+		offset2 = thread->st_p3;
 
 		if (((offset1 >= offset2)
 		  && ((offset1 - offset2) < (1 << softc->ctls.bin_shift)))
@@ -360,28 +358,11 @@ fha_hash_entry_choose_thread(struct fha_params *softc,
 	 */
 	if ((softc->ctls.max_nfsds_per_fh == 0) ||
 	    (fhe->num_threads < softc->ctls.max_nfsds_per_fh)) {
-		/*
-		 * We can add a new thread, so try for an idle thread
-		 * first, and fall back to this_thread if none are idle.
-		 */
-		if (STAILQ_EMPTY(&this_thread->st_reqs)) {
-			thread = this_thread;
+		thread = this_thread;
 #if 0
-			ITRACE_CURPROC(ITRACE_NFS, ITRACE_INFO,
-			    "fha: %p(%d)t", thread, thread->st_reqcount);
-#endif
-		} else if ((thread = LIST_FIRST(&pool->sp_idlethreads))) {
-#if 0
-			ITRACE_CURPROC(ITRACE_NFS, ITRACE_INFO,
-			    "fha: %p(%d)i", thread, thread->st_reqcount);
-#endif
-		} else {
-			thread = this_thread;
-#if 0
-			ITRACE_CURPROC(ITRACE_NFS, ITRACE_INFO,
-			    "fha: %p(%d)b", thread, thread->st_reqcount);
+		ITRACE_CURPROC(ITRACE_NFS, ITRACE_INFO,
+		    "fha: %p(%d)t", thread, thread->st_p2);
 #endif
-		}
 		fha_hash_entry_add_thread(fhe, thread);
 	} else {
 		/*
@@ -411,16 +392,16 @@ fha_assign(SVCTHREAD *this_thread, struct svc_req *req,
 
 	/* Check to see whether we're enabled. */
 	if (softc->ctls.enable == 0)
-		return (this_thread);
+		goto thist;
 
 	/*
 	 * Only do placement if this is an NFS request.
 	 */
 	if (req->rq_prog != NFS_PROG)
-		return (this_thread);
+		goto thist;
 
 	if (req->rq_vers != 2 && req->rq_vers != 3)
-		return (this_thread);
+		goto thist;
 
 	fha_extract_info(req, &i, cb);
 
@@ -440,8 +421,21 @@ fha_assign(SVCTHREAD *this_thread, struct svc_req *req,
 	thread = fha_hash_entry_choose_thread(softc, fhe, &i, this_thread);
 	KASSERT(thread, ("fha_assign: NULL thread!"));
 	fha_hash_entry_add_op(fhe, i.locktype, 1);
+	thread->st_p2++;
+	thread->st_p3 = i.offset;
+
+	/*
+	 * Grab the pool lock here to not let chosen thread go away before
+	 * the new request inserted to its queue while we drop fhe lock.
+	 */
+	mtx_lock(&(*softc->pool)->sp_lock);
+	mtx_unlock(fhe->mtx);
 
 	return (thread);
+thist:
+	req->rq_p1 = NULL;
+	mtx_lock(&(*softc->pool)->sp_lock);
+	return (this_thread);
 }
 
 /*
@@ -452,6 +446,7 @@ void
 fha_nd_complete(SVCTHREAD *thread, struct svc_req *req)
 {
 	struct fha_hash_entry *fhe = req->rq_p1;
+	struct mtx *mtx;
 
 	/*
 	 * This may be called for reqs that didn't go through
@@ -460,13 +455,18 @@ fha_nd_complete(SVCTHREAD *thread, struct svc_req *req)
 	if (!fhe)
 		return;
 
+	mtx = fhe->mtx;
+	mtx_lock(mtx);
 	fha_hash_entry_add_op(fhe, req->rq_p2, -1);
-
-	if (thread->st_reqcount == 0) {
+	thread->st_p2--;
+	KASSERT(thread->st_p2 >= 0, ("Negative request count %d on %p",
+	    thread->st_p2, thread));
+	if (thread->st_p2 == 0) {
 		fha_hash_entry_remove_thread(fhe, thread);
 		if (0 == fhe->num_rw + fhe->num_exclusive)
 			fha_hash_entry_remove(fhe);
 	}
+	mtx_unlock(mtx);
 }
 
 int
@@ -489,10 +489,9 @@ fhe_stats_sysctl(SYSCTL_HANDLER_ARGS, struct fha_params *softc)
 	}
 	pool = *softc->pool;
 
-	mtx_lock(&pool->sp_lock);
 	count = 0;
-	for (i = 0; i <= softc->g_fha.hashmask; i++)
-		if (!LIST_EMPTY(&softc->g_fha.hashtable[i]))
+	for (i = 0; i < FHA_HASH_SIZE; i++)
+		if (!LIST_EMPTY(&softc->fha_hash[i].list))
 			count++;
 
 	if (count == 0) {
@@ -500,8 +499,9 @@ fhe_stats_sysctl(SYSCTL_HANDLER_ARGS, struct fha_params *softc)
 		goto out;
 	}
 
-	for (i = 0; i <= softc->g_fha.hashmask; i++) {
-		LIST_FOREACH(fhe, &softc->g_fha.hashtable[i], link) {
+	for (i = 0; i < FHA_HASH_SIZE; i++) {
+		mtx_lock(&softc->fha_hash[i].mtx);
+		LIST_FOREACH(fhe, &softc->fha_hash[i].list, link) {
 			sbuf_printf(&sb, "%sfhe %p: {\n", first ? "" : ", ", fhe);
 
 			sbuf_printf(&sb, "    fh: %ju\n", (uintmax_t) fhe->fh);
@@ -512,8 +512,7 @@ fhe_stats_sysctl(SYSCTL_HANDLER_ARGS, struct fha_params *softc)
 			LIST_FOREACH(thread, &fhe->threads, st_alink) {
 				sbuf_printf(&sb, "    thread %p offset %ju "
 				    "(count %d)\n", thread,
-				    STAILQ_FIRST(&thread->st_reqs)->rq_p3,
-				    thread->st_reqcount);
+				    thread->st_p3, thread->st_p2);
 			}
 
 			sbuf_printf(&sb, "}");
@@ -525,11 +524,10 @@ fhe_stats_sysctl(SYSCTL_HANDLER_ARGS, struct fha_params *softc)
 				break;
 			}
 		}
+		mtx_unlock(&softc->fha_hash[i].mtx);
 	}
 
  out:
-	if (pool)
-		mtx_unlock(&pool->sp_lock);
 	sbuf_trim(&sb);
 	sbuf_finish(&sb);
 	error = sysctl_handle_string(oidp, sbuf_data(&sb), sbuf_len(&sb), req);
diff --git a/sys/nfs/nfs_fha.h b/sys/nfs/nfs_fha.h
index d3dd039..58aa7fe 100644
--- a/sys/nfs/nfs_fha.h
+++ b/sys/nfs/nfs_fha.h
@@ -35,11 +35,7 @@
 #define FHA_DEF_MAX_NFSDS_PER_FH	8
 #define FHA_DEF_MAX_REQS_PER_NFSD	0  /* Unlimited */
 
-/* This is the global structure that represents the state of the fha system. */
-struct fha_global {
-	struct fha_hash_entry_list *hashtable;
-	u_long hashmask;
-};
+#define FHA_HASH_SIZE	251
 
 struct fha_ctls {
 	int	 enable;
@@ -62,6 +58,7 @@ struct fha_ctls {
  * avoid contention between threads over single files.
  */
 struct fha_hash_entry {
+	struct mtx *mtx;
 	LIST_ENTRY(fha_hash_entry) link;
 	u_int64_t fh;
 	u_int32_t num_rw;
@@ -72,6 +69,11 @@ struct fha_hash_entry {
 
 LIST_HEAD(fha_hash_entry_list, fha_hash_entry);
 
+struct fha_hash_slot {
+	struct fha_hash_entry_list list;
+	struct mtx mtx;
+};
+
 /* A structure used for passing around data internally. */
 struct fha_info {
 	u_int64_t fh;
@@ -93,7 +95,7 @@ struct fha_callbacks {
 };
 
 struct fha_params {
-	struct fha_global g_fha; 
+	struct fha_hash_slot fha_hash[FHA_HASH_SIZE];
 	struct sysctl_ctx_list sysctl_ctx;
 	struct sysctl_oid *sysctl_tree;
 	struct fha_ctls ctls;
author	mav <mav@FreeBSD.org>	2014-01-22 23:52:20 +0000
committer	mav <mav@FreeBSD.org>	2014-01-22 23:52:20 +0000
commit	9b0c44b0a6b411d66547ee6ce611289ef831a73f (patch)
tree	b0d6da9226cd6929781db9747f22168d51987334 /sys/nfs
parent	695b5b9179f2f608ff6042914c460cc75dc11519 (diff)
download	FreeBSD-src-9b0c44b0a6b411d66547ee6ce611289ef831a73f.zip FreeBSD-src-9b0c44b0a6b411d66547ee6ce611289ef831a73f.tar.gz