summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authordillon <dillon@FreeBSD.org>2001-10-01 04:33:35 +0000
committerdillon <dillon@FreeBSD.org>2001-10-01 04:33:35 +0000
commit5a5b9f79f48be499ceca11f3d54c0525935b9ac1 (patch)
tree24142bf3c28ad5430fe7af0d88da2999098b7f21
parent86409d0972b660f68712b2324920bd859a72cf72 (diff)
downloadFreeBSD-src-5a5b9f79f48be499ceca11f3d54c0525935b9ac1.zip
FreeBSD-src-5a5b9f79f48be499ceca11f3d54c0525935b9ac1.tar.gz
After extensive testing it has been determined that adding complexity
to avoid removing higher level directory vnodes from the namecache has no perceivable effect and will be removed. This is especially true when vmiodirenable is turned on, which it is by default now. ( vmiodirenable makes a huge difference in directory caching ). The vfs.vmiodirenable and vfs.nameileafonly sysctls have been left in to allow further testing, but I expect to rip out vfs.nameileafonly soon too. I have also determined through testing that the real problem with numvnodes getting too large is due to the VM Page cache preventing the vnode from being reclaimed. The directory stuff made only a tiny dent relative to Poul's original code, enough so that some tests succeeded. But tests with several million small files show that the bigger problem is the VM Page cache. This will have to be addressed by a future commit. MFC after: 3 days
-rw-r--r--sys/kern/vfs_cache.c31
-rw-r--r--sys/kern/vfs_subr.c97
-rw-r--r--sys/sys/vnode.h2
3 files changed, 100 insertions, 30 deletions
diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c
index 9483b9b..8301821 100644
--- a/sys/kern/vfs_cache.c
+++ b/sys/kern/vfs_cache.c
@@ -101,8 +101,10 @@ static u_long numcache; /* number of cache entries allocated */
SYSCTL_ULONG(_debug, OID_AUTO, numcache, CTLFLAG_RD, &numcache, 0, "");
static u_long numcachehv; /* number of cache entries with vnodes held */
SYSCTL_ULONG(_debug, OID_AUTO, numcachehv, CTLFLAG_RD, &numcachehv, 0, "");
+#if 0
static u_long numcachepl; /* number of cache purge for leaf entries */
SYSCTL_ULONG(_debug, OID_AUTO, numcachepl, CTLFLAG_RD, &numcachepl, 0, "");
+#endif
struct nchstats nchstats; /* cache effectiveness statistics */
static int doingcache = 1; /* 1 => enable the cache */
@@ -247,6 +249,31 @@ cache_zap(ncp)
}
/*
+ * cache_leaf_test()
+ *
+ * Test whether this (directory) vnode's namei cache entry contains
+ * subdirectories or not. Used to determine whether the directory is
+ * a leaf in the namei cache or not. Note: the directory may still
+ * contain files in the namei cache.
+ *
+ * Returns 0 if the directory is a leaf, -1 if it isn't.
+ */
+int
+cache_leaf_test(struct vnode *vp)
+{
+ struct namecache *ncpc;
+
+ for (ncpc = LIST_FIRST(&vp->v_cache_src);
+ ncpc != NULL;
+ ncpc = LIST_NEXT(ncpc, nc_src)
+ ) {
+ if (ncpc->nc_vp != NULL && ncpc->nc_vp->v_type == VDIR)
+ return(-1);
+ }
+ return(0);
+}
+
+/*
* Lookup an entry in the cache
*
* We don't do this if the segment name is long, simply so the cache
@@ -499,6 +526,8 @@ cache_purgevfs(mp)
}
}
+#if 0
+
/*
* Flush all dirctory entries with no child directories held in
* the cache.
@@ -555,6 +584,8 @@ cache_purgeleafdirs(ndir)
numcachepl++;
}
+#endif
+
/*
* Perform canonical checks and cache lookup and pass on to filesystem
* through the vop_cachedlookup only if needed.
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 8c67662..8fdd423 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -110,6 +110,8 @@ SYSCTL_LONG(_debug, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, ""
/* Number of vnodes in the free list. */
static u_long freevnodes = 0;
SYSCTL_LONG(_debug, OID_AUTO, freevnodes, CTLFLAG_RD, &freevnodes, 0, "");
+
+#if 0
/* Number of vnode allocation. */
static u_long vnodeallocs = 0;
SYSCTL_LONG(_debug, OID_AUTO, vnodeallocs, CTLFLAG_RD, &vnodeallocs, 0, "");
@@ -125,6 +127,7 @@ SYSCTL_LONG(_debug, OID_AUTO, vnoderecycleminfreevn, CTLFLAG_RW, &vnoderecyclemi
/* Number of vnodes attempted to recycle at a time. */
static u_long vnoderecyclenumber = 3000;
SYSCTL_LONG(_debug, OID_AUTO, vnoderecyclenumber, CTLFLAG_RW, &vnoderecyclenumber, 0, "");
+#endif
/*
* Various variables used for debugging the new implementation of
@@ -142,6 +145,8 @@ SYSCTL_INT(_vfs, OID_AUTO, reassignbufsortbad, CTLFLAG_RW, &reassignbufsortbad,
/* Set to 0 for old insertion-sort based reassignbuf, 1 for modern method. */
static int reassignbufmethod = 1;
SYSCTL_INT(_vfs, OID_AUTO, reassignbufmethod, CTLFLAG_RW, &reassignbufmethod, 0, "");
+static int nameileafonly = 0;
+SYSCTL_INT(_vfs, OID_AUTO, nameileafonly, CTLFLAG_RW, &nameileafonly, 0, "");
#ifdef ENABLE_VFS_IOOPT
/* See NOTES for a description of this setting. */
@@ -238,6 +243,9 @@ SYSCTL_INT(_debug, OID_AUTO, rush_requests, CTLFLAG_RW, &stat_rush_requests, 0,
int desiredvnodes;
SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW,
&desiredvnodes, 0, "Maximum number of vnodes");
+static int minvnodes;
+SYSCTL_INT(_kern, KERN_MAXVNODES, minvnodes, CTLFLAG_RW,
+ &minvnodes, 0, "Minimum number of vnodes");
/*
* Initialize the vnode management data structures.
@@ -247,6 +255,7 @@ vntblinit(void *dummy __unused)
{
desiredvnodes = maxproc + cnt.v_page_count / 4;
+ minvnodes = desiredvnodes / 4;
mtx_init(&mountlist_mtx, "mountlist", MTX_DEF);
mtx_init(&mntvnode_mtx, "mntvnode", MTX_DEF);
mtx_init(&mntid_mtx, "mntid", MTX_DEF);
@@ -539,40 +548,68 @@ getnewvnode(tag, mp, vops, vpp)
s = splbio();
mtx_lock(&vnode_free_list_mtx);
- if (wantfreevnodes && freevnodes < wantfreevnodes) {
- vp = NULL;
- } else if (!wantfreevnodes && freevnodes <= desiredvnodes) {
- /*
- * XXX: this is only here to be backwards compatible
- */
+ if (freevnodes < wantfreevnodes) {
vp = NULL;
- } else for (count = 0; count < freevnodes; count++) {
- vp = TAILQ_FIRST(&vnode_free_list);
- if (vp == NULL || vp->v_usecount)
- panic("getnewvnode: free vnode isn't");
- TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+ } else if (numvnodes >= minvnodes) {
+ for (count = 0; count < freevnodes; count++) {
+ vp = TAILQ_FIRST(&vnode_free_list);
+ if (vp == NULL || vp->v_usecount)
+ panic("getnewvnode: free vnode isn't");
+ TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
- /*
- * Don't recycle if active in the namecache or
- * if it still has cached pages or we cannot get
- * its interlock.
- */
- if (LIST_FIRST(&vp->v_cache_src) != NULL ||
- (VOP_GETVOBJECT(vp, &object) == 0 &&
- (object->resident_page_count || object->ref_count)) ||
- !mtx_trylock(&vp->v_interlock)) {
+ /*
+ * Don't recycle if we still have cached pages or if
+ * we cannot get the interlock.
+ */
+ if ((VOP_GETVOBJECT(vp, &object) == 0 &&
+ (object->resident_page_count ||
+ object->ref_count)) ||
+ !mtx_trylock(&vp->v_interlock)) {
+ TAILQ_INSERT_TAIL(&vnode_free_list, vp,
+ v_freelist);
+ vp = NULL;
+ continue;
+ }
+ if (LIST_FIRST(&vp->v_cache_src)) {
+ /*
+ * note: nameileafonly sysctl is temporary,
+ * for debugging only, and will eventually be
+ * removed.
+ */
+ if (nameileafonly > 0) {
+ /*
+ * Do not reuse namei-cached directory
+ * vnodes that have cached
+ * subdirectories.
+ */
+ if (cache_leaf_test(vp) < 0) {
+ mtx_unlock(&vp->v_interlock);
+ vp = NULL;
+ continue;
+ }
+ } else if (nameileafonly < 0 ||
+ vmiodirenable == 0) {
+ /*
+ * Do not reuse namei-cached directory
+ * vnodes if nameileafonly is -1 or
+ * if VMIO backing for directories is
+ * turned off (otherwise we reuse them
+ * too quickly).
+ */
+ mtx_unlock(&vp->v_interlock);
+ vp = NULL;
+ continue;
+ }
+ }
+ /*
+ * Skip over it if its filesystem is being suspended.
+ */
+ if (vn_start_write(vp, &vnmp, V_NOWAIT) == 0)
+ break;
+ mtx_unlock(&vp->v_interlock);
TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
vp = NULL;
- continue;
}
- /*
- * Skip over it if its filesystem is being suspended.
- */
- if (vn_start_write(vp, &vnmp, V_NOWAIT) == 0)
- break;
- mtx_unlock(&vp->v_interlock);
- TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
- vp = NULL;
}
if (vp) {
vp->v_flag |= VDOOMED;
@@ -636,6 +673,7 @@ getnewvnode(tag, mp, vops, vpp)
vfs_object_create(vp, td, td->td_proc->p_ucred);
+#if 0
vnodeallocs++;
if (vnodeallocs % vnoderecycleperiod == 0 &&
freevnodes < vnoderecycleminfreevn &&
@@ -643,6 +681,7 @@ getnewvnode(tag, mp, vops, vpp)
/* Recycle vnodes. */
cache_purgeleafdirs(vnoderecyclenumber);
}
+#endif
return (0);
}
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index f3d0995..58221812 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -559,7 +559,7 @@ int cache_lookup __P((struct vnode *dvp, struct vnode **vpp,
struct componentname *cnp));
void cache_purge __P((struct vnode *vp));
void cache_purgevfs __P((struct mount *mp));
-void cache_purgeleafdirs __P((int ndir));
+int cache_leaf_test __P((struct vnode *vp));
void cvtstat __P((struct stat *st, struct ostat *ost));
void cvtnstat __P((struct stat *sb, struct nstat *nsb));
int getnewvnode __P((enum vtagtype tag,
OpenPOWER on IntegriCloud