NFS: Cache state owners after files are closed

Servers have a finite amount of memory to store NFSv4 open and lock owners. Moreover, servers may have a difficult time determining when they can reap their state owner table, thanks to gray areas in the NFSv4 protocol specification. Thus clients should be careful to reuse state owners when possible. Currently Linux is not too careful. When a user has closed all her files on one mount point, the state owner's reference count goes to zero, and it is released. The next OPEN allocates a new one. A workload that serially opens and closes files can run through a large number of open owners this way. When a state owner's reference count goes to zero, slap it onto a free list for that nfs_server, with an expiry time. Garbage collect before looking for a state owner. This makes state owners for active users available for re-use. Now that there can be unused state owners remaining at umount time, purge the state owner free list when a server is destroyed. Also be sure not to reclaim unused state owners during state recovery. This change has benefits for the client as well. For some workloads, this approach drops the number of OPEN_CONFIRM calls from the same as the number of OPEN calls, down to just one. This reduces wire traffic and thus open(2) latency. Before this patch, untarring a kernel source tarball shows the OPEN_CONFIRM call counter steadily increasing through the test. With the patch, the OPEN_CONFIRM count remains at 1 throughout the entire untar. As long as the expiry time is kept short, I don't think garbage collection should be terribly expensive, although it does bounce the clp->cl_lock around a bit. [ At some point we should rationalize the use of the nfs_server ->destroy method. ] Signed-off-by: Chuck Lever <chuck.lever@oracle.com> [Trond: Fixed a garbage collection race and a few efficiency issues] Signed-off-by: Trond Myklebust <Trond.Myklebust@netapp.com>
author: Chuck Lever <chuck.lever@oracle.com> 2011-12-06 16:13:48 -0500
committer: Trond Myklebust <Trond.Myklebust@netapp.com> 2012-01-05 11:59:18 -0500
commit: 0aaaf5c424c7ffd6b0c4253251356558b16ef3a2 (patch)
tree: 8ef0eebc41a8e247d52280fd79d36934a71fcb00 /fs/nfs/nfs4state.c
parent: 414adf14cd3b52e411f79d941a15d0fd4af427fc (diff)
download: op-kernel-dev-0aaaf5c424c7ffd6b0c4253251356558b16ef3a2.zip
op-kernel-dev-0aaaf5c424c7ffd6b0c4253251356558b16ef3a2.tar.gz
1 files changed, 80 insertions, 9 deletions
diff --git a/fs/nfs/nfs4state.c b/fs/nfs/nfs4state.c
index 6354e4f..a53f33b 100644
--- a/fs/nfs/nfs4state.c
+++ b/fs/nfs/nfs4state.c
@@ -49,6 +49,7 @@
 #include <linux/ratelimit.h>
 #include <linux/workqueue.h>
 #include <linux/bitops.h>
+#include <linux/jiffies.h>
 
 #include "nfs4_fs.h"
 #include "callback.h"
@@ -388,6 +389,8 @@ nfs4_find_state_owner_locked(struct nfs_server *server, struct rpc_cred *cred)
 		else if (cred > sp->so_cred)
 			p = &parent->rb_right;
 		else {
+			if (!list_empty(&sp->so_lru))
+				list_del_init(&sp->so_lru);
 			atomic_inc(&sp->so_count);
 			return sp;
 		}
@@ -412,6 +415,8 @@ nfs4_insert_state_owner_locked(struct nfs4_state_owner *new)
 		else if (new->so_cred > sp->so_cred)
 			p = &parent->rb_right;
 		else {
+			if (!list_empty(&sp->so_lru))
+				list_del_init(&sp->so_lru);
 			atomic_inc(&sp->so_count);
 			return sp;
 		}
@@ -453,6 +458,7 @@ nfs4_alloc_state_owner(void)
 	spin_lock_init(&sp->so_sequence.lock);
 	INIT_LIST_HEAD(&sp->so_sequence.list);
 	atomic_set(&sp->so_count, 1);
+	INIT_LIST_HEAD(&sp->so_lru);
 	return sp;
 }
 
@@ -470,6 +476,38 @@ nfs4_drop_state_owner(struct nfs4_state_owner *sp)
 	}
 }
 
+static void nfs4_free_state_owner(struct nfs4_state_owner *sp)
+{
+	rpc_destroy_wait_queue(&sp->so_sequence.wait);
+	put_rpccred(sp->so_cred);
+	kfree(sp);
+}
+
+static void nfs4_gc_state_owners(struct nfs_server *server)
+{
+	struct nfs_client *clp = server->nfs_client;
+	struct nfs4_state_owner *sp, *tmp;
+	unsigned long time_min, time_max;
+	LIST_HEAD(doomed);
+
+	spin_lock(&clp->cl_lock);
+	time_max = jiffies;
+	time_min = (long)time_max - (long)clp->cl_lease_time;
+	list_for_each_entry_safe(sp, tmp, &server->state_owners_lru, so_lru) {
+		/* NB: LRU is sorted so that oldest is at the head */
+		if (time_in_range(sp->so_expires, time_min, time_max))
+			break;
+		list_move(&sp->so_lru, &doomed);
+		nfs4_remove_state_owner_locked(sp);
+	}
+	spin_unlock(&clp->cl_lock);
+
+	list_for_each_entry_safe(sp, tmp, &doomed, so_lru) {
+		list_del(&sp->so_lru);
+		nfs4_free_state_owner(sp);
+	}
+}
+
 /**
  * nfs4_get_state_owner - Look up a state owner given a credential
  * @server: nfs_server to search
@@ -487,10 +525,10 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server,
 	sp = nfs4_find_state_owner_locked(server, cred);
 	spin_unlock(&clp->cl_lock);
 	if (sp != NULL)
-		return sp;
+		goto out;
 	new = nfs4_alloc_state_owner();
 	if (new == NULL)
-		return NULL;
+		goto out;
 	new->so_server = server;
 	new->so_cred = cred;
 	spin_lock(&clp->cl_lock);
@@ -502,26 +540,58 @@ struct nfs4_state_owner *nfs4_get_state_owner(struct nfs_server *server,
 		rpc_destroy_wait_queue(&new->so_sequence.wait);
 		kfree(new);
 	}
+out:
+	nfs4_gc_state_owners(server);
 	return sp;
 }
 
 /**
  * nfs4_put_state_owner - Release a nfs4_state_owner
  * @sp: state owner data to release
- *
  */
 void nfs4_put_state_owner(struct nfs4_state_owner *sp)
 {
-	struct nfs_client *clp = sp->so_server->nfs_client;
-	struct rpc_cred *cred = sp->so_cred;
+	struct nfs_server *server = sp->so_server;
+	struct nfs_client *clp = server->nfs_client;
 
 	if (!atomic_dec_and_lock(&sp->so_count, &clp->cl_lock))
 		return;
-	nfs4_remove_state_owner_locked(sp);
+
+	if (!RB_EMPTY_NODE(&sp->so_server_node)) {
+		sp->so_expires = jiffies;
+		list_add_tail(&sp->so_lru, &server->state_owners_lru);
+		spin_unlock(&clp->cl_lock);
+	} else {
+		nfs4_remove_state_owner_locked(sp);
+		spin_unlock(&clp->cl_lock);
+		nfs4_free_state_owner(sp);
+	}
+}
+
+/**
+ * nfs4_purge_state_owners - Release all cached state owners
+ * @server: nfs_server with cached state owners to release
+ *
+ * Called at umount time.  Remaining state owners will be on
+ * the LRU with ref count of zero.
+ */
+void nfs4_purge_state_owners(struct nfs_server *server)
+{
+	struct nfs_client *clp = server->nfs_client;
+	struct nfs4_state_owner *sp, *tmp;
+	LIST_HEAD(doomed);
+
+	spin_lock(&clp->cl_lock);
+	list_for_each_entry_safe(sp, tmp, &server->state_owners_lru, so_lru) {
+		list_move(&sp->so_lru, &doomed);
+		nfs4_remove_state_owner_locked(sp);
+	}
 	spin_unlock(&clp->cl_lock);
-	rpc_destroy_wait_queue(&sp->so_sequence.wait);
-	put_rpccred(cred);
-	kfree(sp);
+
+	list_for_each_entry_safe(sp, tmp, &doomed, so_lru) {
+		list_del(&sp->so_lru);
+		nfs4_free_state_owner(sp);
+	}
 }
 
 static struct nfs4_state *
@@ -1393,6 +1463,7 @@ static int nfs4_do_reclaim(struct nfs_client *clp, const struct nfs4_state_recov
 restart:
 	rcu_read_lock();
 	list_for_each_entry_rcu(server, &clp->cl_superblocks, client_link) {
+		nfs4_purge_state_owners(server);
 		spin_lock(&clp->cl_lock);
 		for (pos = rb_first(&server->state_owners);
 		     pos != NULL;
author	Chuck Lever <chuck.lever@oracle.com>	2011-12-06 16:13:48 -0500
committer	Trond Myklebust <Trond.Myklebust@netapp.com>	2012-01-05 11:59:18 -0500
commit	0aaaf5c424c7ffd6b0c4253251356558b16ef3a2 (patch)
tree	8ef0eebc41a8e247d52280fd79d36934a71fcb00 /fs/nfs/nfs4state.c
parent	414adf14cd3b52e411f79d941a15d0fd4af427fc (diff)
download	op-kernel-dev-0aaaf5c424c7ffd6b0c4253251356558b16ef3a2.zip op-kernel-dev-0aaaf5c424c7ffd6b0c4253251356558b16ef3a2.tar.gz