summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authormckusick <mckusick@FreeBSD.org>2012-04-20 06:50:44 +0000
committermckusick <mckusick@FreeBSD.org>2012-04-20 06:50:44 +0000
commit5b7b29e35b332608560671cf15919cf03d76f9ca (patch)
tree0b75b6bd0feadd4a7a71deb104b1c2c9f1871dda
parent17ad55bc6eb6ab65042b08f80f22694d8de3571d (diff)
downloadFreeBSD-src-5b7b29e35b332608560671cf15919cf03d76f9ca.zip
FreeBSD-src-5b7b29e35b332608560671cf15919cf03d76f9ca.tar.gz
This change creates a new list of active vnodes associated with
a mount point. Active vnodes are those with a non-zero use or hold count, e.g., those vnodes that are not on the free list. Note that this list is in addition to the list of all the vnodes associated with a mount point. To avoid adding another set of linkage pointers to the vnode structure, the active list uses the existing linkage pointers used by the free list (previously named v_freelist, now renamed v_actfreelist). This update adds the MNT_VNODE_FOREACH_ACTIVE interface that loops over just the active vnodes associated with a mount point (typically less than 1% of the vnodes associated with the mount point). Reviewed by: kib Tested by: Peter Holm MFC after: 2 weeks
-rw-r--r--sys/fs/msdosfs/msdosfs_vfsops.c2
-rw-r--r--sys/fs/nfsserver/nfs_nfsdport.c2
-rw-r--r--sys/kern/vfs_mount.c4
-rw-r--r--sys/kern/vfs_subr.c183
-rw-r--r--sys/sys/mount.h21
-rw-r--r--sys/sys/vnode.h3
6 files changed, 203 insertions, 12 deletions
diff --git a/sys/fs/msdosfs/msdosfs_vfsops.c b/sys/fs/msdosfs/msdosfs_vfsops.c
index d2f49d9..7bdd2fa 100644
--- a/sys/fs/msdosfs/msdosfs_vfsops.c
+++ b/sys/fs/msdosfs/msdosfs_vfsops.c
@@ -834,7 +834,7 @@ msdosfs_unmount(struct mount *mp, int mntflags)
vn_printf(vp,
"msdosfs_umount(): just before calling VOP_CLOSE()\n");
printf("freef %p, freeb %p, mount %p\n",
- TAILQ_NEXT(vp, v_freelist), vp->v_freelist.tqe_prev,
+ TAILQ_NEXT(vp, v_actfreelist), vp->v_actfreelist.tqe_prev,
vp->v_mount);
printf("cleanblkhd %p, dirtyblkhd %p, numoutput %ld, type %d\n",
TAILQ_FIRST(&vp->v_bufobj.bo_clean.bv_hd),
diff --git a/sys/fs/nfsserver/nfs_nfsdport.c b/sys/fs/nfsserver/nfs_nfsdport.c
index d67f7e6..2c0e33d 100644
--- a/sys/fs/nfsserver/nfs_nfsdport.c
+++ b/sys/fs/nfsserver/nfs_nfsdport.c
@@ -2907,12 +2907,14 @@ nfsd_mntinit(void)
inited = 1;
nfsv4root_mnt.mnt_flag = (MNT_RDONLY | MNT_EXPORTED);
TAILQ_INIT(&nfsv4root_mnt.mnt_nvnodelist);
+ TAILQ_INIT(&nfsv4root_mnt.mnt_activevnodelist);
nfsv4root_mnt.mnt_export = NULL;
TAILQ_INIT(&nfsv4root_opt);
TAILQ_INIT(&nfsv4root_newopt);
nfsv4root_mnt.mnt_opt = &nfsv4root_opt;
nfsv4root_mnt.mnt_optnew = &nfsv4root_newopt;
nfsv4root_mnt.mnt_nvnodelistsize = 0;
+ nfsv4root_mnt.mnt_activevnodelistsize = 0;
}
/*
diff --git a/sys/kern/vfs_mount.c b/sys/kern/vfs_mount.c
index 6a5dfb0..e9d3abe 100644
--- a/sys/kern/vfs_mount.c
+++ b/sys/kern/vfs_mount.c
@@ -461,6 +461,8 @@ vfs_mount_alloc(struct vnode *vp, struct vfsconf *vfsp, const char *fspath,
__rangeof(struct mount, mnt_startzero, mnt_endzero));
TAILQ_INIT(&mp->mnt_nvnodelist);
mp->mnt_nvnodelistsize = 0;
+ TAILQ_INIT(&mp->mnt_activevnodelist);
+ mp->mnt_activevnodelistsize = 0;
mp->mnt_ref = 0;
(void) vfs_busy(mp, MBF_NOWAIT);
mp->mnt_op = vfsp->vfc_vfsops;
@@ -514,6 +516,8 @@ vfs_mount_destroy(struct mount *mp)
}
if (mp->mnt_nvnodelistsize != 0)
panic("vfs_mount_destroy: nonzero nvnodelistsize");
+ if (mp->mnt_activevnodelistsize != 0)
+ panic("vfs_mount_destroy: nonzero activevnodelistsize");
if (mp->mnt_lockref != 0)
panic("vfs_mount_destroy: nonzero lock refcount");
MNT_IUNLOCK(mp);
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 7dbd1c4..7af71e9 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -775,12 +775,16 @@ vnlru_free(int count)
break;
VNASSERT(vp->v_op != NULL, vp,
("vnlru_free: vnode already reclaimed."));
- TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+ KASSERT((vp->v_iflag & VI_FREE) != 0,
+ ("Removing vnode not on freelist"));
+ KASSERT((vp->v_iflag & VI_ACTIVE) == 0,
+ ("Mangling active vnode"));
+ TAILQ_REMOVE(&vnode_free_list, vp, v_actfreelist);
/*
* Don't recycle if we can't get the interlock.
*/
if (!VI_TRYLOCK(vp)) {
- TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
+ TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_actfreelist);
continue;
}
VNASSERT(VCANRECYCLE(vp), vp,
@@ -1035,12 +1039,26 @@ static void
delmntque(struct vnode *vp)
{
struct mount *mp;
+ int active;
mp = vp->v_mount;
if (mp == NULL)
return;
MNT_ILOCK(mp);
+ VI_LOCK(vp);
+ KASSERT(mp->mnt_activevnodelistsize <= mp->mnt_nvnodelistsize,
+ ("Active vnode list size %d > Vnode list size %d",
+ mp->mnt_activevnodelistsize, mp->mnt_nvnodelistsize));
+ active = vp->v_iflag & VI_ACTIVE;
+ vp->v_iflag &= ~VI_ACTIVE;
+ if (active) {
+ mtx_lock(&vnode_free_list_mtx);
+ TAILQ_REMOVE(&mp->mnt_activevnodelist, vp, v_actfreelist);
+ mp->mnt_activevnodelistsize--;
+ mtx_unlock(&vnode_free_list_mtx);
+ }
vp->v_mount = NULL;
+ VI_UNLOCK(vp);
VNASSERT(mp->mnt_nvnodelistsize > 0, vp,
("bad mount point vnode list size"));
TAILQ_REMOVE(&mp->mnt_nvnodelist, vp, v_nmntvnodes);
@@ -1080,13 +1098,24 @@ insmntque1(struct vnode *vp, struct mount *mp,
ASSERT_VOP_ELOCKED(vp,
"insmntque: mp-safe fs and non-locked vp");
#endif
+ /*
+ * We acquire the vnode interlock early to ensure that the
+ * vnode cannot be recycled by another process releasing a
+ * holdcnt on it before we get it on both the vnode list
+ * and the active vnode list. The mount mutex protects only
+ * manipulation of the vnode list and the vnode freelist
+ * mutex protects only manipulation of the active vnode list.
+ * Hence the need to hold the vnode interlock throughout.
+ */
MNT_ILOCK(mp);
+ VI_LOCK(vp);
if ((mp->mnt_kern_flag & MNTK_NOINSMNTQ) != 0 &&
((mp->mnt_kern_flag & MNTK_UNMOUNTF) != 0 ||
mp->mnt_nvnodelistsize == 0)) {
locked = VOP_ISLOCKED(vp);
if (!locked || (locked == LK_EXCLUSIVE &&
(vp->v_vflag & VV_FORCEINSMQ) == 0)) {
+ VI_UNLOCK(vp);
MNT_IUNLOCK(mp);
if (dtr != NULL)
dtr(vp, dtr_arg);
@@ -1099,6 +1128,14 @@ insmntque1(struct vnode *vp, struct mount *mp,
VNASSERT(mp->mnt_nvnodelistsize >= 0, vp,
("neg mount point vnode list size"));
mp->mnt_nvnodelistsize++;
+ KASSERT((vp->v_iflag & VI_ACTIVE) == 0,
+ ("Activating already active vnode"));
+ vp->v_iflag |= VI_ACTIVE;
+ mtx_lock(&vnode_free_list_mtx);
+ TAILQ_INSERT_HEAD(&mp->mnt_activevnodelist, vp, v_actfreelist);
+ mp->mnt_activevnodelistsize++;
+ mtx_unlock(&vnode_free_list_mtx);
+ VI_UNLOCK(vp);
MNT_IUNLOCK(mp);
return (0);
}
@@ -2309,6 +2346,7 @@ vhold(struct vnode *vp)
void
vholdl(struct vnode *vp)
{
+ struct mount *mp;
CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
vp->v_holdcnt++;
@@ -2318,12 +2356,19 @@ vholdl(struct vnode *vp)
VNASSERT((vp->v_iflag & VI_FREE) != 0, vp, ("vnode not free"));
VNASSERT(vp->v_op != NULL, vp, ("vholdl: vnode already reclaimed."));
/*
- * Remove a vnode from the free list and mark it as in use.
+ * Remove a vnode from the free list, mark it as in use,
+ * and put it on the active list.
*/
mtx_lock(&vnode_free_list_mtx);
- TAILQ_REMOVE(&vnode_free_list, vp, v_freelist);
+ TAILQ_REMOVE(&vnode_free_list, vp, v_actfreelist);
freevnodes--;
vp->v_iflag &= ~(VI_FREE|VI_AGE);
+ KASSERT((vp->v_iflag & VI_ACTIVE) == 0,
+ ("Activating already active vnode"));
+ vp->v_iflag |= VI_ACTIVE;
+ mp = vp->v_mount;
+ TAILQ_INSERT_HEAD(&mp->mnt_activevnodelist, vp, v_actfreelist);
+ mp->mnt_activevnodelistsize++;
mtx_unlock(&vnode_free_list_mtx);
}
@@ -2348,6 +2393,8 @@ void
vdropl(struct vnode *vp)
{
struct bufobj *bo;
+ struct mount *mp;
+ int active;
ASSERT_VI_LOCKED(vp, "vdropl");
CTR2(KTR_VFS, "%s: vp %p", __func__, vp);
@@ -2360,19 +2407,28 @@ vdropl(struct vnode *vp)
}
if ((vp->v_iflag & VI_DOOMED) == 0) {
/*
- * Mark a vnode as free, putting it up for recycling.
+ * Mark a vnode as free: remove it from its active list
+ * and put it up for recycling on the freelist.
*/
- mtx_lock(&vnode_free_list_mtx);
VNASSERT(vp->v_op != NULL, vp,
("vdropl: vnode already reclaimed."));
VNASSERT((vp->v_iflag & VI_FREE) == 0, vp,
("vnode already free"));
VNASSERT(VSHOULDFREE(vp), vp,
("vdropl: freeing when we shouldn't"));
+ active = vp->v_iflag & VI_ACTIVE;
+ vp->v_iflag &= ~VI_ACTIVE;
+ mp = vp->v_mount;
+ mtx_lock(&vnode_free_list_mtx);
+ if (active) {
+ TAILQ_REMOVE(&mp->mnt_activevnodelist, vp,
+ v_actfreelist);
+ mp->mnt_activevnodelistsize--;
+ }
if (vp->v_iflag & VI_AGE) {
- TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_freelist);
+ TAILQ_INSERT_HEAD(&vnode_free_list, vp, v_actfreelist);
} else {
- TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_freelist);
+ TAILQ_INSERT_TAIL(&vnode_free_list, vp, v_actfreelist);
}
freevnodes++;
vp->v_iflag &= ~VI_AGE;
@@ -3010,6 +3066,8 @@ DB_SHOW_COMMAND(mount, db_show_mount)
db_printf(" mnt_ref = %d\n", mp->mnt_ref);
db_printf(" mnt_gen = %d\n", mp->mnt_gen);
db_printf(" mnt_nvnodelistsize = %d\n", mp->mnt_nvnodelistsize);
+ db_printf(" mnt_activevnodelistsize = %d\n",
+ mp->mnt_activevnodelistsize);
db_printf(" mnt_writeopcount = %d\n", mp->mnt_writeopcount);
db_printf(" mnt_maxsymlinklen = %d\n", mp->mnt_maxsymlinklen);
db_printf(" mnt_iosize_max = %d\n", mp->mnt_iosize_max);
@@ -3019,15 +3077,23 @@ DB_SHOW_COMMAND(mount, db_show_mount)
mp->mnt_secondary_accwrites);
db_printf(" mnt_gjprovider = %s\n",
mp->mnt_gjprovider != NULL ? mp->mnt_gjprovider : "NULL");
- db_printf("\n");
- TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) {
+ db_printf("\n\nList of active vnodes\n");
+ TAILQ_FOREACH(vp, &mp->mnt_activevnodelist, v_actfreelist) {
if (vp->v_type != VMARKER) {
vn_printf(vp, "vnode ");
if (db_pager_quit)
break;
}
}
+ db_printf("\n\nList of inactive vnodes\n");
+ TAILQ_FOREACH(vp, &mp->mnt_nvnodelist, v_nmntvnodes) {
+ if (vp->v_type != VMARKER && (vp->v_iflag & VI_ACTIVE) == 0) {
+ vn_printf(vp, "vnode ");
+ if (db_pager_quit)
+ break;
+ }
+ }
}
#endif /* DDB */
@@ -4558,3 +4624,100 @@ __mnt_vnode_markerfree_all(struct vnode **mvp, struct mount *mp)
free(*mvp, M_VNODE_MARKER);
*mvp = NULL;
}
+
+/*
+ * These are helper functions for filesystems to traverse their
+ * active vnodes. See MNT_VNODE_FOREACH_ACTIVE() in sys/mount.h
+ */
+struct vnode *
+__mnt_vnode_next_active(struct vnode **mvp, struct mount *mp)
+{
+ struct vnode *vp, *nvp;
+
+ if (should_yield())
+ kern_yield(PRI_UNCHANGED);
+ MNT_ILOCK(mp);
+ KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
+ vp = TAILQ_NEXT(*mvp, v_actfreelist);
+ while (vp != NULL) {
+ VI_LOCK(vp);
+ if (vp->v_mount == mp && vp->v_type != VMARKER &&
+ (vp->v_iflag & VI_DOOMED) == 0)
+ break;
+ nvp = TAILQ_NEXT(vp, v_actfreelist);
+ VI_UNLOCK(vp);
+ vp = nvp;
+ }
+
+ /* Check if we are done */
+ if (vp == NULL) {
+ __mnt_vnode_markerfree_active(mvp, mp);
+ /* MNT_IUNLOCK(mp); -- done in above function */
+ mtx_assert(MNT_MTX(mp), MA_NOTOWNED);
+ return (NULL);
+ }
+ mtx_lock(&vnode_free_list_mtx);
+ TAILQ_REMOVE(&mp->mnt_activevnodelist, *mvp, v_actfreelist);
+ TAILQ_INSERT_AFTER(&mp->mnt_activevnodelist, vp, *mvp, v_actfreelist);
+ mtx_unlock(&vnode_free_list_mtx);
+ MNT_IUNLOCK(mp);
+ return (vp);
+}
+
+struct vnode *
+__mnt_vnode_first_active(struct vnode **mvp, struct mount *mp)
+{
+ struct vnode *vp, *nvp;
+
+ *mvp = malloc(sizeof(struct vnode), M_VNODE_MARKER, M_WAITOK | M_ZERO);
+ MNT_ILOCK(mp);
+ MNT_REF(mp);
+ (*mvp)->v_type = VMARKER;
+
+ vp = TAILQ_NEXT(*mvp, v_actfreelist);
+ while (vp != NULL) {
+ VI_LOCK(vp);
+ if (vp->v_mount == mp && vp->v_type != VMARKER &&
+ (vp->v_iflag & VI_DOOMED) == 0)
+ break;
+ nvp = TAILQ_NEXT(vp, v_actfreelist);
+ VI_UNLOCK(vp);
+ vp = nvp;
+ }
+
+ /* Check if we are done */
+ if (vp == NULL) {
+ MNT_REL(mp);
+ MNT_IUNLOCK(mp);
+ free(*mvp, M_VNODE_MARKER);
+ *mvp = NULL;
+ return (NULL);
+ }
+ (*mvp)->v_mount = mp;
+ mtx_lock(&vnode_free_list_mtx);
+ TAILQ_INSERT_AFTER(&mp->mnt_activevnodelist, vp, *mvp, v_actfreelist);
+ mtx_unlock(&vnode_free_list_mtx);
+ MNT_IUNLOCK(mp);
+ return (vp);
+}
+
+void
+__mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *mp)
+{
+
+ if (*mvp == NULL) {
+ MNT_IUNLOCK(mp);
+ return;
+ }
+
+ mtx_assert(MNT_MTX(mp), MA_OWNED);
+
+ KASSERT((*mvp)->v_mount == mp, ("marker vnode mount list mismatch"));
+ mtx_lock(&vnode_free_list_mtx);
+ TAILQ_REMOVE(&mp->mnt_activevnodelist, *mvp, v_actfreelist);
+ mtx_unlock(&vnode_free_list_mtx);
+ MNT_REL(mp);
+ MNT_IUNLOCK(mp);
+ free(*mvp, M_VNODE_MARKER);
+ *mvp = NULL;
+}
diff --git a/sys/sys/mount.h b/sys/sys/mount.h
index 79740fb..319e094 100644
--- a/sys/sys/mount.h
+++ b/sys/sys/mount.h
@@ -164,6 +164,8 @@ struct mount {
int mnt_ref; /* (i) Reference count */
struct vnodelst mnt_nvnodelist; /* (i) list of vnodes */
int mnt_nvnodelistsize; /* (i) # of vnodes */
+ struct vnodelst mnt_activevnodelist; /* (i) list of active vnodes */
+ int mnt_activevnodelistsize;/* (i) # of active vnodes */
int mnt_writeopcount; /* (i) write syscalls pending */
int mnt_kern_flag; /* (i) kernel only flags */
uint64_t mnt_flag; /* (i) flags shared with user */
@@ -207,6 +209,25 @@ void __mnt_vnode_markerfree_all(struct vnode **mvp, struct mount *mp);
} while (0)
/*
+ * Definitions for MNT_VNODE_FOREACH_ACTIVE.
+ */
+struct vnode *__mnt_vnode_next_active(struct vnode **mvp, struct mount *mp);
+struct vnode *__mnt_vnode_first_active(struct vnode **mvp, struct mount *mp);
+void __mnt_vnode_markerfree_active(struct vnode **mvp, struct mount *);
+
+#define MNT_VNODE_FOREACH_ACTIVE(vp, mp, mvp) \
+ for (vp = __mnt_vnode_first_active(&(mvp), (mp)); \
+ (vp) != NULL; vp = __mnt_vnode_next_active(&(mvp), (mp)))
+
+#define MNT_VNODE_FOREACH_ACTIVE_ABORT(mp, mvp) \
+ do { \
+ MNT_ILOCK(mp); \
+ __mnt_vnode_markerfree_active(&(mvp), (mp)); \
+ /* MNT_IUNLOCK(mp); -- done in above function */ \
+ mtx_assert(MNT_MTX(mp), MA_NOTOWNED); \
+ } while (0)
+
+/*
* Definitions for MNT_VNODE_FOREACH.
*
* This interface has been deprecated in favor of MNT_VNODE_FOREACH_ALL.
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index 3946405..352ebca 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -156,7 +156,7 @@ struct vnode {
/*
* The machinery of being a vnode
*/
- TAILQ_ENTRY(vnode) v_freelist; /* f vnode freelist */
+ TAILQ_ENTRY(vnode) v_actfreelist; /* f vnode active/free lists */
struct bufobj v_bufobj; /* * Buffer cache object */
/*
@@ -232,6 +232,7 @@ struct xvnode {
#define VI_AGE 0x0040 /* Insert vnode at head of free list */
#define VI_DOOMED 0x0080 /* This vnode is being recycled */
#define VI_FREE 0x0100 /* This vnode is on the freelist */
+#define VI_ACTIVE 0x0200 /* This vnode is on the active list */
#define VI_DOINGINACT 0x0800 /* VOP_INACTIVE is in progress */
#define VI_OWEINACT 0x1000 /* Need to call inactive */
OpenPOWER on IntegriCloud