1. Add a {pointer, v_id} pair to the vnode to store the reference to the

".." vnode. This is cheaper storagewise than keeping it in the namecache, and it makes more sense since it's a 1:1 mapping. 2. Also handle the case of "." more intelligently rather than stuff the namecache with pointless entries. 3. Add two lists to the vnode and hang namecache entries which go from or to this vnode. When cleaning a vnode, delete all namecache entries it invalidates. 4. Never reuse namecache enties, malloc new ones when we need it, free old ones when they die. No longer a hard limit on how many we can have. 5. Remove the upper limit on namelength of namecache entries. 6. Make a global list for negative namecache entries, limit their number to a sysctl'able (debug.ncnegfactor) fraction of the total namecache. Currently the default fraction is 1/16th. (Suggestions for better default wanted!) 7. Assign v_id correctly in the face of 32bit rollover. 8. Remove the LRU list for namecache entries, not needed. Remove the #ifdef NCH_STATISTICS stuff, it's not needed either. 9. Use the vnode freelist as a true LRU list, also for namecache accesses. 10. Reuse vnodes more aggresively but also more selectively, if we can't reuse, malloc a new one. There is no longer a hard limit on their number, they grow to the point where we don't reuse potentially usable vnodes. A vnode will not get recycled if still has pages in core or if it is the source of namecache entries (Yes, this does indeed work :-) "." and ".." are not namecache entries any longer...) 11. Do not overload the v_id field in namecache entries with whiteout information, use a char sized flags field instead, so we can get rid of the vpid and v_id fields from the namecache struct. Since we're linked to the vnodes and purged when they're cleaned, we don't have to check the v_id any more. 12. NFS knew about the limitation on name length in the namecache, it shouldn't and doesn't now. Bugs: The namecache statistics no longer includes the hits for ".." and "." hits. Performance impact: Generally in the +/- 0.5% for "normal" workstations, but I hope this will allow the system to be selftuning over a bigger range of "special" applications. The case where RAM is available but unused for cache because we don't have any vnodes should be gone. Future work: Straighten out the namecache statistics. "desiredvnodes" is still used to (bogusly ?) size hash tables in the filesystems. I have still to find a way to safely free unused vnodes back so their number can shrink when not needed. There is a few uses of the v_id field left in the filesystems, scheduled for demolition at a later time. Maybe a one slot cache for unused namecache entries should be implemented to decrease the malloc/free frequency.
author: phk <phk@FreeBSD.org> 1997-05-04 09:17:38 +0000
committer: phk <phk@FreeBSD.org> 1997-05-04 09:17:38 +0000
commit: aa8738a5f336011292e62aa26b7cf0ada736ca83 (patch)
tree: 8de31b9e14d4699f2ab7ee0e9fe5d3dfd25e98bc /sys/kern/vfs_export.c
parent: 0f5d18fb9c7e65b06ec51611f5caf29f12bfd1f4 (diff)
download: FreeBSD-src-aa8738a5f336011292e62aa26b7cf0ada736ca83.zip
FreeBSD-src-aa8738a5f336011292e62aa26b7cf0ada736ca83.tar.gz
1 files changed, 52 insertions, 44 deletions
diff --git a/sys/kern/vfs_export.c b/sys/kern/vfs_export.c
index 61a4d14..295c233 100644
--- a/sys/kern/vfs_export.c
+++ b/sys/kern/vfs_export.c
@@ -36,7 +36,7 @@
  * SUCH DAMAGE.
  *
  *	@(#)vfs_subr.c	8.31 (Berkeley) 5/26/95
- * $Id: vfs_subr.c,v 1.83 1997/04/25 06:47:12 peter Exp $
+ * $Id: vfs_subr.c,v 1.84 1997/04/30 03:09:15 dyson Exp $
  */
 
 /*
@@ -78,6 +78,7 @@ extern void	printlockedvnodes __P((void));
 static void	vclean __P((struct vnode *vp, int flags, struct proc *p));
 static void	vgonel __P((struct vnode *vp, struct proc *p));
 unsigned long	numvnodes;
+SYSCTL_INT(_debug, OID_AUTO, numvnodes, CTLFLAG_RD, &numvnodes, 0, "");
 static void	vputrele __P((struct vnode *vp, int put));
 
 enum vtype iftovt_tab[16] = {
@@ -342,54 +343,36 @@ getnewvnode(tag, mp, vops, vpp)
 	struct proc *p = curproc;	/* XXX */
 	struct vnode *vp;
 
-	simple_lock(&vnode_free_list_slock);
-retry:
 	/*
-	 * we allocate a new vnode if
-	 * 	1. we don't have any free
-	 *		Pretty obvious, we actually used to panic, but that
-	 *		is a silly thing to do.
-	 *	2. we havn't filled our pool yet
-	 *		We don't want to trash the incore (VM-)vnodecache.
-	 *	3. if less that 1/4th of our vnodes are free.
-	 *		We don't want to trash the namei cache either.
+	 * We take the least recently used vnode from the freelist
+	 * if we can get it and it has no cached pages, and no
+	 * namecache entries are relative to it.
+	 * Otherwise we allocate a new vnode
 	 */
-	if (freevnodes < (numvnodes >> 2) ||
-	    numvnodes < desiredvnodes ||
-	    vnode_free_list.tqh_first == NULL) {
-		simple_unlock(&vnode_free_list_slock);
-		vp = (struct vnode *) malloc((u_long) sizeof *vp,
-		    M_VNODE, M_WAITOK);
-		bzero((char *) vp, sizeof *vp);
-		numvnodes++;
-	} else {
-		for (vp = vnode_free_list.tqh_first;
-				vp != NULLVP; vp = vp->v_freelist.tqe_next) {
-			if (simple_lock_try(&vp->v_interlock))
-				break;
-		}
-		/*
-		 * Unless this is a bad time of the month, at most
-		 * the first NCPUS items on the free list are
-		 * locked, so this is close enough to being empty.
-		 */
-		if (vp == NULLVP) {
-			simple_unlock(&vnode_free_list_slock);
-			tablefull("vnode");
-			*vpp = 0;
-			return (ENFILE);
-		}
+
+	simple_lock(&vnode_free_list_slock);
+
+	TAILQ_FOREACH(vp, &vnode_free_list, v_freelist) {
+		if (!simple_lock_try(&vp->v_interlock)) 
+			continue;
 		if (vp->v_usecount)
 			panic("free vnode isn't");
-		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
-		if (vp->v_usage > 0) {
+
+		if (vp->v_object && vp->v_object->resident_page_count) {
+			/* Don't recycle if it's caching some pages */
 			simple_unlock(&vp->v_interlock);
-			--vp->v_usage;
-			TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
-			goto retry;
+			continue;
+		} else if (LIST_FIRST(&vp->v_cache_src)) {
+			/* Don't recycle if active in the namecache */
+			simple_unlock(&vp->v_interlock);
+			continue;
+		} else {
+			break;
 		}
+	}
+	if (vp) {
+		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
 		freevnodes--;
-
 		/* see comment on why 0xdeadb is set at end of vgone (below) */
 		vp->v_freelist.tqe_prev = (struct vnode **) 0xdeadb;
 		simple_unlock(&vnode_free_list_slock);
@@ -420,8 +403,17 @@ retry:
 		vp->v_clen = 0;
 		vp->v_socket = 0;
 		vp->v_writecount = 0;	/* XXX */
-		vp->v_usage = 0;
+	} else {
+		simple_unlock(&vnode_free_list_slock);
+		vp = (struct vnode *) malloc((u_long) sizeof *vp,
+		    M_VNODE, M_WAITOK);
+		bzero((char *) vp, sizeof *vp);
+		vp->v_dd = vp;
+		LIST_INIT(&vp->v_cache_src);
+		TAILQ_INIT(&vp->v_cache_dst);
+		numvnodes++;
 	}
+
 	vp->v_type = VNON;
 	cache_purge(vp);
 	vp->v_tag = tag;
@@ -1119,7 +1111,6 @@ vputrele(vp, put)
 	simple_lock(&vnode_free_list_slock);
 	if (vp->v_flag & VAGE) {
 		vp->v_flag &= ~VAGE;
-		vp->v_usage = 0;
 		if(vp->v_tag != VT_TFS)
 			TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
 	} else {
@@ -2147,3 +2138,20 @@ retry:
 retn:
 	return error;
 }
+
+void
+vtouch(vp)
+	struct vnode *vp;
+{
+	simple_lock(&vp->v_interlock);
+	if (vp->v_usecount) {
+		simple_unlock(&vp->v_interlock);
+		return;
+	}
+	if (simple_lock_try(&vnode_free_list_slock)) {
+		TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+		TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
+		simple_unlock(&vnode_free_list_slock);
+	}
+	simple_unlock(&vp->v_interlock);
+}
author	phk <phk@FreeBSD.org>	1997-05-04 09:17:38 +0000
committer	phk <phk@FreeBSD.org>	1997-05-04 09:17:38 +0000
commit	aa8738a5f336011292e62aa26b7cf0ada736ca83 (patch)
tree	8de31b9e14d4699f2ab7ee0e9fe5d3dfd25e98bc /sys/kern/vfs_export.c
parent	0f5d18fb9c7e65b06ec51611f5caf29f12bfd1f4 (diff)
download	FreeBSD-src-aa8738a5f336011292e62aa26b7cf0ada736ca83.zip FreeBSD-src-aa8738a5f336011292e62aa26b7cf0ada736ca83.tar.gz