summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authordillon <dillon@FreeBSD.org>2001-10-26 00:08:05 +0000
committerdillon <dillon@FreeBSD.org>2001-10-26 00:08:05 +0000
commitf883ef447af57985b21cde8cd13232ca845190a4 (patch)
treed2fcc74cff2c1c1ff478c189a5bc5cafc79d4509 /sys
parent9e3e7670a88869e7b562b5608cd584c56c3a6517 (diff)
downloadFreeBSD-src-f883ef447af57985b21cde8cd13232ca845190a4.zip
FreeBSD-src-f883ef447af57985b21cde8cd13232ca845190a4.tar.gz
Implement kern.maxvnodes. adjusting kern.maxvnodes now actually has a
real effect. Optimize vfs_msync(). Avoid having to continually drop and re-obtain mutexes when scanning the vnode list. Improves looping case by 500%. Optimize ffs_sync(). Avoid having to continually drop and re-obtain mutexes when scanning the vnode list. This makes a couple of assumptions, which I believe are ok, in regards to vnode stability when the mount list mutex is held. Improves looping case by 500%. (more optimization work is needed on top of these fixes) MFC after: 1 week
Diffstat (limited to 'sys')
-rw-r--r--sys/kern/vfs_subr.c112
-rw-r--r--sys/sys/vnode.h5
-rw-r--r--sys/ufs/ffs/ffs_vfsops.c38
-rw-r--r--sys/vm/vm_fault.c3
-rw-r--r--sys/vm/vm_object.c41
-rw-r--r--sys/vm/vm_object.h1
-rw-r--r--sys/vm/vm_page.c2
7 files changed, 145 insertions, 57 deletions
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index fde8ce2..d690ab35 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -519,6 +519,49 @@ vattr_null(vap)
}
/*
+ * This routine is called when we have too many vnodes. It attempts
+ * to free <count> vnodes and will potentially free vnodes that still
+ * have VM backing store (VM backing store is typically the cause
+ * of a vnode blowout so we want to do this). Therefore, this operation
+ * is not considered cheap.
+ *
+ * A number of conditions may prevent a vnode from being reclaimed.
+ * the buffer cache may have references on the vnode, a directory
+ * vnode may still have references due to the namei cache representing
+ * underlying files, or the vnode may be in active use. It is not
+ * desireable to reuse such vnodes. These conditions may cause the
+ * number of vnodes to reach some minimum value regardless of what
+ * you set kern.maxvnodes to. Do not set kernl.maxvnodes too low.
+ */
+static void
+vlrureclaim(struct mount *mp, int count)
+{
+ struct vnode *vp;
+
+ mtx_lock(&mntvnode_mtx);
+ while (count && (vp = TAILQ_FIRST(&mp->mnt_nvnodelist)) != NULL) {
+ TAILQ_REMOVE(&mp->mnt_nvnodelist, vp, v_nmntvnodes);
+ TAILQ_INSERT_TAIL(&mp->mnt_nvnodelist, vp, v_nmntvnodes);
+
+ if (vp->v_type != VNON &&
+ vp->v_type != VBAD &&
+ VMIGHTFREE(vp) && /* critical path opt */
+ mtx_trylock(&vp->v_interlock)
+ ) {
+ mtx_unlock(&mntvnode_mtx);
+ if (VMIGHTFREE(vp)) {
+ vgonel(vp, curthread);
+ } else {
+ mtx_unlock(&vp->v_interlock);
+ }
+ mtx_lock(&mntvnode_mtx);
+ }
+ --count;
+ }
+ mtx_unlock(&mntvnode_mtx);
+}
+
+/*
* Routines having to do with the management of the vnode table.
*/
@@ -532,25 +575,33 @@ getnewvnode(tag, mp, vops, vpp)
vop_t **vops;
struct vnode **vpp;
{
- int s, count;
+ int s;
struct thread *td = curthread; /* XXX */
struct vnode *vp = NULL;
struct mount *vnmp;
vm_object_t object;
+ s = splbio();
/*
- * We take the least recently used vnode from the freelist
- * if we can get it and it has no cached pages, and no
- * namecache entries are relative to it.
- * Otherwise we allocate a new vnode
+ * Try to reuse vnodes if we hit the max. This situation only
+ * occurs in certain large-memory (2G+) situations. For the
+ * algorithm to be stable we have to try to reuse at least 2.
+ * No hysteresis should be necessary.
+ */
+ if (numvnodes - freevnodes > desiredvnodes)
+ vlrureclaim(mp, 2);
+
+ /*
+ * Attempt to reuse a vnode already on the free list, allocating
+ * a new vnode if we can't find one or if we have not reached a
+ * good minimum for good LRU performance.
*/
- s = splbio();
mtx_lock(&vnode_free_list_mtx);
- if (freevnodes < wantfreevnodes) {
- vp = NULL;
- } else if (numvnodes >= minvnodes) {
+ if (freevnodes >= wantfreevnodes && numvnodes >= minvnodes) {
+ int count;
+
for (count = 0; count < freevnodes; count++) {
vp = TAILQ_FIRST(&vnode_free_list);
if (vp == NULL || vp->v_usecount)
@@ -2408,22 +2459,20 @@ vfs_msync(struct mount *mp, int flags)
{
struct vnode *vp, *nvp;
struct vm_object *obj;
- int anyio, tries;
+ int tries;
GIANT_REQUIRED;
tries = 5;
-loop:
- anyio = 0;
mtx_lock(&mntvnode_mtx);
+loop:
for (vp = TAILQ_FIRST(&mp->mnt_nvnodelist); vp != NULL; vp = nvp) {
-
- nvp = TAILQ_NEXT(vp, v_nmntvnodes);
-
if (vp->v_mount != mp) {
- mtx_unlock(&mntvnode_mtx);
- goto loop;
+ if (--tries > 0)
+ goto loop;
+ break;
}
+ nvp = TAILQ_NEXT(vp, v_nmntvnodes);
if (vp->v_flag & VXLOCK) /* XXX: what if MNT_WAIT? */
continue;
@@ -2431,36 +2480,27 @@ loop:
if (vp->v_flag & VNOSYNC) /* unlinked, skip it */
continue;
- if (flags != MNT_WAIT) {
- if (VOP_GETVOBJECT(vp, &obj) != 0 ||
- (obj->flags & OBJ_MIGHTBEDIRTY) == 0)
- continue;
- if (VOP_ISLOCKED(vp, NULL))
- continue;
- }
-
- mtx_unlock(&mntvnode_mtx);
- mtx_lock(&vp->v_interlock);
- if (VOP_GETVOBJECT(vp, &obj) == 0 &&
- (obj->flags & OBJ_MIGHTBEDIRTY)) {
+ if ((vp->v_flag & VOBJDIRTY) &&
+ (flags == MNT_WAIT || VOP_ISLOCKED(vp, NULL) == 0)) {
+ mtx_unlock(&mntvnode_mtx);
if (!vget(vp,
- LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curthread)) {
+ LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curthread)) {
if (VOP_GETVOBJECT(vp, &obj) == 0) {
vm_object_page_clean(obj, 0, 0,
flags == MNT_WAIT ?
OBJPC_SYNC : OBJPC_NOSYNC);
- anyio = 1;
}
vput(vp);
}
- } else {
- mtx_unlock(&vp->v_interlock);
+ mtx_lock(&mntvnode_mtx);
+ if (TAILQ_NEXT(vp, v_nmntvnodes) != nvp) {
+ if (--tries > 0)
+ goto loop;
+ break;
+ }
}
- mtx_lock(&mntvnode_mtx);
}
mtx_unlock(&mntvnode_mtx);
- if (anyio && (--tries > 0))
- goto loop;
}
/*
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index 5986579..0d78fcb 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -175,6 +175,7 @@ struct vnode {
/* open for business 0x100000 */
#define VONWORKLST 0x200000 /* On syncer work-list */
#define VMOUNT 0x400000 /* Mount in progress */
+#define VOBJDIRTY 0x800000 /* object might be dirty */
/*
* Vnode attributes. A field value of VNOVAL represents a field whose value
@@ -311,6 +312,10 @@ extern void (*lease_updatetime) __P((int deltat));
(!(vp)->v_object || \
!((vp)->v_object->ref_count || (vp)->v_object->resident_page_count)))
+#define VMIGHTFREE(vp) \
+ (!((vp)->v_flag & (VFREE|VDOOMED)) && \
+ !(vp)->v_holdcnt && !(vp)->v_usecount)
+
#define VSHOULDBUSY(vp) \
(((vp)->v_flag & VFREE) && \
((vp)->v_holdcnt || (vp)->v_usecount))
diff --git a/sys/ufs/ffs/ffs_vfsops.c b/sys/ufs/ffs/ffs_vfsops.c
index ad4c24d..d080fbb 100644
--- a/sys/ufs/ffs/ffs_vfsops.c
+++ b/sys/ufs/ffs/ffs_vfsops.c
@@ -1001,10 +1001,10 @@ ffs_sync(mp, waitfor, cred, td)
* Write back each (modified) inode.
*/
wait = 0;
- lockreq = LK_EXCLUSIVE | LK_NOWAIT | LK_INTERLOCK;
+ lockreq = LK_EXCLUSIVE | LK_NOWAIT;
if (waitfor == MNT_WAIT) {
wait = 1;
- lockreq = LK_EXCLUSIVE | LK_INTERLOCK;
+ lockreq = LK_EXCLUSIVE;
}
mtx_lock(&mntvnode_mtx);
loop:
@@ -1015,34 +1015,40 @@ loop:
*/
if (vp->v_mount != mp)
goto loop;
- nvp = TAILQ_NEXT(vp, v_nmntvnodes);
- mtx_unlock(&mntvnode_mtx);
- mtx_lock(&vp->v_interlock);
+ /*
+ * Depend on the mntvnode_slock to keep things stable enough
+ * for a quick test. Since there might be hundreds of
+ * thousands of vnodes, we cannot afford even a subroutine
+ * call unless there's a good chance that we have work to do.
+ */
+ nvp = TAILQ_NEXT(vp, v_nmntvnodes);
ip = VTOI(vp);
if (vp->v_type == VNON || ((ip->i_flag &
- (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
- TAILQ_EMPTY(&vp->v_dirtyblkhd))) {
- mtx_unlock(&vp->v_interlock);
- mtx_lock(&mntvnode_mtx);
+ (IN_ACCESS | IN_CHANGE | IN_MODIFIED | IN_UPDATE)) == 0 &&
+ TAILQ_EMPTY(&vp->v_dirtyblkhd))) {
continue;
}
if (vp->v_type != VCHR) {
+ mtx_unlock(&mntvnode_mtx);
if ((error = vget(vp, lockreq, td)) != 0) {
mtx_lock(&mntvnode_mtx);
if (error == ENOENT)
goto loop;
- continue;
+ } else {
+ if ((error = VOP_FSYNC(vp, cred, waitfor, td)) != 0)
+ allerror = error;
+ VOP_UNLOCK(vp, 0, td);
+ vrele(vp);
+ mtx_lock(&mntvnode_mtx);
}
- if ((error = VOP_FSYNC(vp, cred, waitfor, td)) != 0)
- allerror = error;
- VOP_UNLOCK(vp, 0, td);
- vrele(vp);
} else {
- mtx_unlock(&vp->v_interlock);
+ mtx_unlock(&mntvnode_mtx);
UFS_UPDATE(vp, wait);
+ mtx_lock(&mntvnode_mtx);
}
- mtx_lock(&mntvnode_mtx);
+ if (TAILQ_NEXT(vp, v_nmntvnodes) != nvp)
+ goto loop;
}
mtx_unlock(&mntvnode_mtx);
/*
diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
index 3507353..8814ae5 100644
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
@@ -806,8 +806,7 @@ readrest:
if (prot & VM_PROT_WRITE) {
vm_page_flag_set(fs.m, PG_WRITEABLE);
- vm_object_set_flag(fs.m->object,
- OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY);
+ vm_object_set_writeable_dirty(fs.m->object);
/*
* If the fault is a write, we know that this page is being
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
index 1d5a989..b7613eb 100644
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@@ -321,8 +321,11 @@ vm_object_reference(vm_object_t object)
if (object == NULL)
return;
+#if 0
+ /* object can be re-referenced during final cleaning */
KASSERT(!(object->flags & OBJ_DEAD),
("vm_object_reference: attempting to reference dead obj"));
+#endif
object->ref_count++;
if (object->type == OBJT_VNODE) {
@@ -454,8 +457,13 @@ doterm:
temp->generation++;
object->backing_object = NULL;
}
- vm_object_terminate(object);
- /* unlocks and deallocates object */
+ /*
+ * Don't double-terminate, we could be in a termination
+ * recursion due to the terminate having to sync data
+ * to disk.
+ */
+ if ((object->flags & OBJ_DEAD) == 0)
+ vm_object_terminate(object);
object = temp;
}
}
@@ -627,7 +635,17 @@ vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int
}
if (clearobjflags && (tstart == 0) && (tend == object->size)) {
+ struct vnode *vp;
+
vm_object_clear_flag(object, OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY);
+ if (object->type == OBJT_VNODE &&
+ (vp = (struct vnode *)object->handle) != NULL) {
+ if (vp->v_flag & VOBJDIRTY) {
+ mtx_lock(&vp->v_interlock);
+ vp->v_flag &= ~VOBJDIRTY;
+ mtx_unlock(&vp->v_interlock);
+ }
+ }
}
rescan:
@@ -1357,6 +1375,8 @@ vm_object_collapse(vm_object_t object)
* and no object references within it, all that is
* necessary is to dispose of it.
*/
+ KASSERT(backing_object->ref_count == 1, ("backing_object %p was somehow re-referenced during collapse!", backing_object));
+ KASSERT(TAILQ_FIRST(&backing_object->memq) == NULL, ("backing_object %p somehow has left over pages during collapse!", backing_object));
TAILQ_REMOVE(
&vm_object_list,
@@ -1684,6 +1704,23 @@ vm_object_in_map(vm_object_t object)
return 0;
}
+void
+vm_object_set_writeable_dirty(vm_object_t object)
+{
+ struct vnode *vp;
+
+ vm_object_set_flag(object, OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY);
+ if (object->type == OBJT_VNODE &&
+ (vp = (struct vnode *)object->handle) != NULL) {
+ if ((vp->v_flag & VOBJDIRTY) == 0) {
+ mtx_lock(&vp->v_interlock);
+ vp->v_flag |= VOBJDIRTY;
+ mtx_unlock(&vp->v_interlock);
+ }
+ }
+}
+
+
DB_SHOW_COMMAND(vmochk, vm_object_check)
{
vm_object_t object;
diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h
index c126cb6..1256e85 100644
--- a/sys/vm/vm_object.h
+++ b/sys/vm/vm_object.h
@@ -184,6 +184,7 @@ void vm_object_collapse (vm_object_t);
void vm_object_deallocate (vm_object_t);
void vm_object_terminate (vm_object_t);
void vm_object_vndeallocate (vm_object_t);
+void vm_object_set_writeable_dirty (vm_object_t);
void vm_object_init (void);
void vm_object_page_clean (vm_object_t, vm_pindex_t, vm_pindex_t, boolean_t);
void vm_object_page_remove (vm_object_t, vm_pindex_t, vm_pindex_t, boolean_t);
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 61d821f..0eb06fc 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -609,7 +609,7 @@ vm_page_insert(vm_page_t m, vm_object_t object, vm_pindex_t pindex)
* update the object's OBJ_WRITEABLE and OBJ_MIGHTBEDIRTY flags.
*/
if (m->flags & PG_WRITEABLE)
- vm_object_set_flag(object, OBJ_WRITEABLE|OBJ_MIGHTBEDIRTY);
+ vm_object_set_writeable_dirty(object);
}
/*
OpenPOWER on IntegriCloud