summaryrefslogtreecommitdiffstats
path: root/sys/kern
diff options
context:
space:
mode:
authorjeff <jeff@FreeBSD.org>2005-03-25 05:34:39 +0000
committerjeff <jeff@FreeBSD.org>2005-03-25 05:34:39 +0000
commit6d72a7bd6045cc1f68678453d98166ef7d09756a (patch)
treef75be7c40c37c5f9dc44ae661dbe3de17b43233b /sys/kern
parentda9b203aafc8ae8081f508c8c207f9711a8ca52c (diff)
downloadFreeBSD-src-6d72a7bd6045cc1f68678453d98166ef7d09756a.zip
FreeBSD-src-6d72a7bd6045cc1f68678453d98166ef7d09756a.tar.gz
- Don't recycle vnodes anymore. Free them once they are dead. getnewvnode
now always allocates a new vnode. - Define a new function, vnlru_free, which frees vnodes from the free list. It takes as a parameter the number of vnodes to free, which is wantfreevnodes - freevnodes when called from vnlru_proc or 1 when called from getnewvnode(). For now, getnewvnode() still tries to reclaim a free vnode before creating a new one when we are near the limit. - Define a function, vdestroy, which handles the actual release of memory and teardown of locks, etc. This could become a uma_dtor() routine. - Get rid of minvnodes. Now wantfreevnodes is 1/4th the max vnodes. This keeps more unreferenced vnodes around so that files which have only been stat'd are less likely to be kicked out of the system before we have a chance to read them, etc. These vnodes may still be freed via the normal vnlru_proc() routines which may some day become a real lru.
Diffstat (limited to 'sys/kern')
-rw-r--r--sys/kern/vfs_subr.c237
1 files changed, 116 insertions, 121 deletions
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index bf12f58..187f272 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -93,6 +93,8 @@ static void vdropl(struct vnode *vp);
static void vinactive(struct vnode *, struct thread *);
static void v_incr_usecount(struct vnode *, int);
static void vfree(struct vnode *);
+static void vnlru_free(int);
+static void vdestroy(struct vnode *);
/*
* Enable Giant pushdown based on whether or not the vm is mpsafe in this
@@ -134,10 +136,11 @@ int vttoif_tab[9] = {
static TAILQ_HEAD(freelst, vnode) vnode_free_list;
/*
- * Minimum number of free vnodes. If there are fewer than this free vnodes,
- * getnewvnode() will return a newly allocated vnode.
+ * Free vnode target. Free vnodes may simply be files which have been stat'd
+ * but not read. This is somewhat common, and a small cache of such files
+ * should be kept to avoid recreation costs.
*/
-static u_long wantfreevnodes = 25;
+static u_long wantfreevnodes;
SYSCTL_LONG(_vfs, OID_AUTO, wantfreevnodes, CTLFLAG_RW, &wantfreevnodes, 0, "");
/* Number of vnodes in the free list. */
static u_long freevnodes;
@@ -251,9 +254,8 @@ static enum { SYNCER_RUNNING, SYNCER_SHUTTING_DOWN, SYNCER_FINAL_DELAY }
int desiredvnodes;
SYSCTL_INT(_kern, KERN_MAXVNODES, maxvnodes, CTLFLAG_RW,
&desiredvnodes, 0, "Maximum number of vnodes");
-static int minvnodes;
SYSCTL_INT(_kern, OID_AUTO, minvnodes, CTLFLAG_RW,
- &minvnodes, 0, "Minimum number of vnodes");
+ &wantfreevnodes, 0, "Minimum number of vnodes (legacy)");
static int vnlru_nowhere;
SYSCTL_INT(_debug, OID_AUTO, vnlru_nowhere, CTLFLAG_RW,
&vnlru_nowhere, 0, "Number of times the vnlru process ran without success");
@@ -295,7 +297,7 @@ vntblinit(void *dummy __unused)
desiredvnodes, MAXVNODES_MAX);
desiredvnodes = MAXVNODES_MAX;
}
- minvnodes = desiredvnodes / 4;
+ wantfreevnodes = desiredvnodes / 4;
mtx_init(&mountlist_mtx, "mountlist", NULL, MTX_DEF);
mtx_init(&mntid_mtx, "mntid", NULL, MTX_DEF);
TAILQ_INIT(&vnode_free_list);
@@ -588,6 +590,51 @@ vlrureclaim(struct mount *mp)
}
/*
+ * Attempt to keep the free list at wantfreevnodes length.
+ */
+static void
+vnlru_free(int count)
+{
+ struct vnode *vp;
+
+ mtx_assert(&vnode_free_list_mtx, MA_OWNED);
+ for (; count > 0; count--) {
+ vp = TAILQ_FIRST(&vnode_free_list);
+ /*
+ * The list can be modified while the free_list_mtx
+ * has been dropped and vp could be NULL here.
+ */
+ if (!vp)
+ break;
+ TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+ TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
+ /*
+ * Don't recycle if we can't get the interlock.
+ */
+ if (!VI_TRYLOCK(vp))
+ continue;
+ if (!VCANRECYCLE(vp)) {
+ VI_UNLOCK(vp);
+ continue;
+ }
+ /*
+ * We assume success to avoid having to relock the frelist
+ * in the common case, simply restore counts on failure.
+ */
+ freevnodes--;
+ numvnodes--;
+ mtx_unlock(&vnode_free_list_mtx);
+ if (vtryrecycle(vp) != 0) {
+ mtx_lock(&vnode_free_list_mtx);
+ freevnodes++;
+ numvnodes++;
+ continue;
+ }
+ vdestroy(vp);
+ mtx_lock(&vnode_free_list_mtx);
+ }
+}
+/*
* Attempt to recycle vnodes in a context that is always safe to block.
* Calling vlrurecycle() from the bowels of filesystem code has some
* interesting deadlock problems.
@@ -611,7 +658,9 @@ vnlru_proc(void)
for (;;) {
kthread_suspend_check(p);
mtx_lock(&vnode_free_list_mtx);
- if (numvnodes - freevnodes <= desiredvnodes * 9 / 10) {
+ if (freevnodes > wantfreevnodes)
+ vnlru_free(freevnodes - wantfreevnodes);
+ if (numvnodes <= desiredvnodes * 9 / 10) {
vnlruproc_sig = 0;
wakeup(&vnlruproc_sig);
msleep(vnlruproc, &vnode_free_list_mtx,
@@ -657,6 +706,33 @@ SYSINIT(vnlru, SI_SUB_KTHREAD_UPDATE, SI_ORDER_FIRST, kproc_start, &vnlru_kp)
* Routines having to do with the management of the vnode table.
*/
+static void
+vdestroy(struct vnode *vp)
+{
+ struct bufobj *bo;
+
+ bo = &vp->v_bufobj;
+ VNASSERT(vp->v_data == NULL, vp, ("cleaned vnode isn't"));
+ VNASSERT(vp->v_usecount == 0, vp, ("Non-zero use count"));
+ VNASSERT(vp->v_writecount == 0, vp, ("Non-zero write count"));
+ VNASSERT(bo->bo_numoutput == 0, vp, ("Clean vnode has pending I/O's"));
+ VNASSERT(bo->bo_clean.bv_cnt == 0, vp, ("cleanbufcnt not 0"));
+ VNASSERT(bo->bo_clean.bv_root == NULL, vp, ("cleanblkroot not NULL"));
+ VNASSERT(bo->bo_dirty.bv_cnt == 0, vp, ("dirtybufcnt not 0"));
+ VNASSERT(bo->bo_dirty.bv_root == NULL, vp, ("dirtyblkroot not NULL"));
+#ifdef MAC
+ mac_destroy_vnode(vp);
+#endif
+ if (vp->v_pollinfo != NULL) {
+ knlist_destroy(&vp->v_pollinfo->vpi_selinfo.si_note);
+ mtx_destroy(&vp->v_pollinfo->vpi_lock);
+ uma_zfree(vnodepoll_zone, vp->v_pollinfo);
+ }
+ lockdestroy(vp->v_vnlock);
+ mtx_destroy(&vp->v_interlock);
+ uma_zfree(vnode_zone, vp);
+}
+
/*
* Check to see if a free vnode can be recycled. If it can,
* recycle it and return it with the vnode interlock held.
@@ -728,18 +804,18 @@ getnewvnode(tag, mp, vops, vpp)
struct vnode **vpp;
{
struct vnode *vp = NULL;
- struct vpollinfo *pollinfo = NULL;
struct bufobj *bo;
mtx_lock(&vnode_free_list_mtx);
-
/*
- * Try to reuse vnodes if we hit the max. This situation only
- * occurs in certain large-memory (2G+) situations. We cannot
- * attempt to directly reclaim vnodes due to nasty recursion
- * problems.
+ * Lend our context to reclaim vnodes if they've exceeded the max.
*/
- while (numvnodes - freevnodes > desiredvnodes) {
+ if (freevnodes > wantfreevnodes)
+ vnlru_free(1);
+ /*
+ * Wait for available vnodes.
+ */
+ while (numvnodes > desiredvnodes) {
if (vnlruproc_sig == 0) {
vnlruproc_sig = 1; /* avoid unnecessary wakeups */
wakeup(vnlruproc);
@@ -747,122 +823,40 @@ getnewvnode(tag, mp, vops, vpp)
msleep(&vnlruproc_sig, &vnode_free_list_mtx, PVFS,
"vlruwk", hz);
}
-
+ numvnodes++;
+ mtx_unlock(&vnode_free_list_mtx);
+ vp = (struct vnode *) uma_zalloc(vnode_zone, M_WAITOK|M_ZERO);
/*
- * Attempt to reuse a vnode already on the free list, allocating
- * a new vnode if we can't find one or if we have not reached a
- * good minimum for good LRU performance.
+ * Setup locks.
*/
-
- if (freevnodes >= wantfreevnodes && numvnodes >= minvnodes) {
- int error;
- int count;
-
- for (count = 0; count < freevnodes; vp = NULL, count++) {
- vp = TAILQ_FIRST(&vnode_free_list);
- /*
- * The list can be modified while the free_list_mtx
- * has been dropped and vp could be NULL here.
- */
- if (!vp)
- break;
- TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
- TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
- /*
- * Don't recycle if we can't get the interlock.
- */
- if (!VI_TRYLOCK(vp))
- continue;
- if (!VCANRECYCLE(vp)) {
- VI_UNLOCK(vp);
- continue;
- }
- mtx_unlock(&vnode_free_list_mtx);
- error = vtryrecycle(vp);
- mtx_lock(&vnode_free_list_mtx);
- if (error == 0)
- break;
- }
- }
- if (vp) {
- freevnodes--;
- bo = &vp->v_bufobj;
- mtx_unlock(&vnode_free_list_mtx);
-
-#ifdef INVARIANTS
- {
- if (vp->v_data)
- printf("cleaned vnode isn't, "
- "address %p, inode %p\n",
- vp, vp->v_data);
- if (bo->bo_numoutput)
- panic("%p: Clean vnode has pending I/O's", vp);
- if (vp->v_usecount != 0)
- panic("%p: Non-zero use count", vp);
- if (vp->v_writecount != 0)
- panic("%p: Non-zero write count", vp);
- }
-#endif
- if ((pollinfo = vp->v_pollinfo) != NULL) {
- /*
- * To avoid lock order reversals, the call to
- * uma_zfree() must be delayed until the vnode
- * interlock is released.
- */
- vp->v_pollinfo = NULL;
- }
-#ifdef MAC
- mac_destroy_vnode(vp);
-#endif
- vp->v_iflag = 0;
- vp->v_vflag = 0;
- vp->v_lastw = 0;
- vp->v_lasta = 0;
- vp->v_cstart = 0;
- vp->v_clen = 0;
- bzero(&vp->v_un, sizeof vp->v_un);
- lockdestroy(vp->v_vnlock);
- lockinit(vp->v_vnlock, PVFS, tag, VLKTIMEOUT, LK_NOPAUSE);
- VNASSERT(bo->bo_clean.bv_cnt == 0, vp,
- ("cleanbufcnt not 0"));
- VNASSERT(bo->bo_clean.bv_root == NULL, vp,
- ("cleanblkroot not NULL"));
- VNASSERT(bo->bo_dirty.bv_cnt == 0, vp,
- ("dirtybufcnt not 0"));
- VNASSERT(bo->bo_dirty.bv_root == NULL, vp,
- ("dirtyblkroot not NULL"));
- } else {
- numvnodes++;
- mtx_unlock(&vnode_free_list_mtx);
-
- vp = (struct vnode *) uma_zalloc(vnode_zone, M_WAITOK|M_ZERO);
- mtx_init(&vp->v_interlock, "vnode interlock", NULL, MTX_DEF);
- vp->v_dd = vp;
- bo = &vp->v_bufobj;
- bo->__bo_vnode = vp;
- bo->bo_mtx = &vp->v_interlock;
- vp->v_vnlock = &vp->v_lock;
- lockinit(vp->v_vnlock, PVFS, tag, VLKTIMEOUT, LK_NOPAUSE);
- cache_purge(vp); /* Sets up v_id. */
- LIST_INIT(&vp->v_cache_src);
- TAILQ_INIT(&vp->v_cache_dst);
- }
-
- TAILQ_INIT(&bo->bo_clean.bv_hd);
- TAILQ_INIT(&bo->bo_dirty.bv_hd);
+ vp->v_vnlock = &vp->v_lock;
+ mtx_init(&vp->v_interlock, "vnode interlock", NULL, MTX_DEF);
+ lockinit(vp->v_vnlock, PVFS, tag, VLKTIMEOUT, LK_NOPAUSE);
+ /*
+ * Initialize bufobj.
+ */
+ bo = &vp->v_bufobj;
+ bo->__bo_vnode = vp;
+ bo->bo_mtx = &vp->v_interlock;
bo->bo_ops = &buf_ops_bio;
bo->bo_private = vp;
+ TAILQ_INIT(&bo->bo_clean.bv_hd);
+ TAILQ_INIT(&bo->bo_dirty.bv_hd);
+ /*
+ * Initialize namecache.
+ */
+ vp->v_dd = vp;
+ LIST_INIT(&vp->v_cache_src);
+ TAILQ_INIT(&vp->v_cache_dst);
+ cache_purge(vp); /* Sets up v_id. */
+ /*
+ * Finalize various vnode identity bits.
+ */
vp->v_type = VNON;
vp->v_tag = tag;
vp->v_op = vops;
- *vpp = vp;
v_incr_usecount(vp, 1);
vp->v_data = 0;
- if (pollinfo != NULL) {
- knlist_destroy(&pollinfo->vpi_selinfo.si_note);
- mtx_destroy(&pollinfo->vpi_lock);
- uma_zfree(vnodepoll_zone, pollinfo);
- }
#ifdef MAC
mac_init_vnode(vp);
if (mp != NULL && (mp->mnt_flag & MNT_MULTILABEL) == 0)
@@ -876,6 +870,7 @@ getnewvnode(tag, mp, vops, vpp)
bo->bo_bsize = mp->mnt_stat.f_iosize;
}
+ *vpp = vp;
return (0);
}
OpenPOWER on IntegriCloud