summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
Diffstat (limited to 'sys')
-rw-r--r--sys/kern/vfs_bio.c208
-rw-r--r--sys/kern/vfs_cluster.c6
-rw-r--r--sys/kern/vfs_export.c14
-rw-r--r--sys/kern/vfs_subr.c14
-rw-r--r--sys/kern/vfs_vnops.c4
-rw-r--r--sys/sys/buf.h1
-rw-r--r--sys/sys/vnode.h1
-rw-r--r--sys/ufs/ffs/ffs_inode.c3
-rw-r--r--sys/ufs/ffs/ffs_softdep.c36
-rw-r--r--sys/ufs/ufs/ufs_readwrite.c4
-rw-r--r--sys/vm/swap_pager.c4
-rw-r--r--sys/vm/vm_page.c35
-rw-r--r--sys/vm/vm_page.h2
-rw-r--r--sys/vm/vm_pageout.c168
14 files changed, 315 insertions, 185 deletions
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index 17def1b..9a9aae7 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -597,8 +597,14 @@ bwrite(struct buf * bp)
* If this buffer is marked for background writing and we
* do not have to wait for it, make a copy and write the
* copy so as to leave this buffer ready for further use.
+ *
+ * This optimization eats a lot of memory. If we have a page
+ * or buffer shortfall we can't do it.
*/
- if ((bp->b_xflags & BX_BKGRDWRITE) && (bp->b_flags & B_ASYNC)) {
+ if ((bp->b_xflags & BX_BKGRDWRITE) &&
+ (bp->b_flags & B_ASYNC) &&
+ !vm_page_count_severe() &&
+ !buf_dirty_count_severe()) {
if (bp->b_iodone != NULL) {
printf("bp->b_iodone = %p\n", bp->b_iodone);
panic("bwrite: need chained iodone");
@@ -682,7 +688,10 @@ vfs_backgroundwritedone(bp)
/*
* Clear the BX_BKGRDINPROG flag in the original buffer
* and awaken it if it is waiting for the write to complete.
+ * If BX_BKGRDINPROG is not set in the original buffer it must
+ * have been released and re-instantiated - which is not legal.
*/
+ KASSERT((origbp->b_xflags & BX_BKGRDINPROG), ("backgroundwritedone: lost buffer2"));
origbp->b_xflags &= ~BX_BKGRDINPROG;
if (origbp->b_xflags & BX_BKGRDWAIT) {
origbp->b_xflags &= ~BX_BKGRDWAIT;
@@ -903,6 +912,15 @@ bwillwrite(void)
}
/*
+ * Return true if we have too many dirty buffers.
+ */
+int
+buf_dirty_count_severe(void)
+{
+ return(numdirtybuffers >= hidirtybuffers);
+}
+
+/*
* brelse:
*
* Release a busy buffer and, if requested, free its resources. The
@@ -964,10 +982,14 @@ brelse(struct buf * bp)
*
* We still allow the B_INVAL case to call vfs_vmio_release(), even
* if B_DELWRI is set.
+ *
+ * If B_DELWRI is not set we may have to set B_RELBUF if we are low
+ * on pages to return pages to the VM page queues.
*/
-
if (bp->b_flags & B_DELWRI)
bp->b_flags &= ~B_RELBUF;
+ else if (vm_page_count_severe() && !(bp->b_xflags & BX_BKGRDINPROG))
+ bp->b_flags |= B_RELBUF;
/*
* VMIO buffer rundown. It is not very necessary to keep a VMIO buffer
@@ -989,8 +1011,7 @@ brelse(struct buf * bp)
if ((bp->b_flags & B_VMIO)
&& !(bp->b_vp->v_tag == VT_NFS &&
!vn_isdisk(bp->b_vp, NULL) &&
- (bp->b_flags & B_DELWRI) &&
- (bp->b_xflags & BX_BKGRDINPROG))
+ (bp->b_flags & B_DELWRI))
) {
int i, j, resid;
@@ -1017,32 +1038,40 @@ brelse(struct buf * bp)
*
* See man buf(9) for more information
*/
-
resid = bp->b_bufsize;
foff = bp->b_offset;
for (i = 0; i < bp->b_npages; i++) {
+ int had_bogus = 0;
+
m = bp->b_pages[i];
vm_page_flag_clear(m, PG_ZERO);
- if (m == bogus_page) {
+ /*
+ * If we hit a bogus page, fixup *all* the bogus pages
+ * now.
+ */
+ if (m == bogus_page) {
VOP_GETVOBJECT(vp, &obj);
poff = OFF_TO_IDX(bp->b_offset);
+ had_bogus = 1;
for (j = i; j < bp->b_npages; j++) {
- m = bp->b_pages[j];
- if (m == bogus_page) {
- m = vm_page_lookup(obj, poff + j);
- if (!m) {
+ vm_page_t mtmp;
+ mtmp = bp->b_pages[j];
+ if (mtmp == bogus_page) {
+ mtmp = vm_page_lookup(obj, poff + j);
+ if (!mtmp) {
panic("brelse: page missing\n");
}
- bp->b_pages[j] = m;
+ bp->b_pages[j] = mtmp;
}
}
if ((bp->b_flags & B_INVAL) == 0) {
pmap_qenter(trunc_page((vm_offset_t)bp->b_data), bp->b_pages, bp->b_npages);
}
+ m = bp->b_pages[i];
}
if ((bp->b_flags & B_NOCACHE) || (bp->b_ioflags & BIO_ERROR)) {
int poffset = foff & PAGE_MASK;
@@ -1051,9 +1080,11 @@ brelse(struct buf * bp)
KASSERT(presid >= 0, ("brelse: extra page"));
vm_page_set_invalid(m, poffset, presid);
+ if (had_bogus)
+ printf("avoided corruption bug in bogus_page/brelse code\n");
}
resid -= PAGE_SIZE - (foff & PAGE_MASK);
- foff = (foff + PAGE_SIZE) & ~PAGE_MASK;
+ foff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK;
}
if (bp->b_flags & (B_INVAL | B_RELBUF))
@@ -1171,7 +1202,7 @@ brelse(struct buf * bp)
/*
* Release a buffer back to the appropriate queue but do not try to free
- * it.
+ * it. The buffer is expected to be used again soon.
*
* bqrelse() is used by bdwrite() to requeue a delayed write, and used by
* biodone() to requeue an async I/O on completion. It is also used when
@@ -1203,6 +1234,15 @@ bqrelse(struct buf * bp)
} else if (bp->b_flags & B_DELWRI) {
bp->b_qindex = QUEUE_DIRTY;
TAILQ_INSERT_TAIL(&bufqueues[QUEUE_DIRTY], bp, b_freelist);
+ } else if (vm_page_count_severe()) {
+ /*
+ * We are too low on memory, we have to try to free the
+ * buffer (most importantly: the wired pages making up its
+ * backing store) *now*.
+ */
+ splx(s);
+ brelse(bp);
+ return;
} else {
bp->b_qindex = QUEUE_CLEAN;
TAILQ_INSERT_TAIL(&bufqueues[QUEUE_CLEAN], bp, b_freelist);
@@ -1264,6 +1304,8 @@ vfs_vmio_release(bp)
vm_page_busy(m);
vm_page_protect(m, VM_PROT_NONE);
vm_page_free(m);
+ } else if (vm_page_count_severe()) {
+ vm_page_try_to_cache(m);
}
}
}
@@ -1419,15 +1461,15 @@ getnewbuf(int slpflag, int slptimeo, int size, int maxsize)
struct buf *nbp;
int defrag = 0;
int nqindex;
- int isspecial;
static int flushingbufs;
- if (curproc != idleproc &&
- (curproc->p_flag & (P_COWINPROGRESS|P_BUFEXHAUST)) == 0)
- isspecial = 0;
- else
- isspecial = 1;
-
+ /*
+ * We can't afford to block since we might be holding a vnode lock,
+ * which may prevent system daemons from running. We deal with
+ * low-memory situations by proactively returning memory and running
+ * async I/O rather then sync I/O.
+ */
+
++getnewbufcalls;
--getnewbufrestarts;
restart:
@@ -1445,42 +1487,28 @@ restart:
* However, there are a number of cases (defragging, reusing, ...)
* where we cannot backup.
*/
+ nqindex = QUEUE_EMPTYKVA;
+ nbp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTYKVA]);
- if (isspecial == 0 && numfreebuffers < lofreebuffers) {
+ if (nbp == NULL) {
/*
- * This will cause an immediate failure
+ * If no EMPTYKVA buffers and we are either
+ * defragging or reusing, locate a CLEAN buffer
+ * to free or reuse. If bufspace useage is low
+ * skip this step so we can allocate a new buffer.
*/
- nqindex = QUEUE_CLEAN;
- nbp = NULL;
- } else {
+ if (defrag || bufspace >= lobufspace) {
+ nqindex = QUEUE_CLEAN;
+ nbp = TAILQ_FIRST(&bufqueues[QUEUE_CLEAN]);
+ }
+
/*
- * Locate a buffer which already has KVA assigned. First
- * try EMPTYKVA buffers.
+ * Nada. If we are allowed to allocate an EMPTY
+ * buffer, go get one.
*/
- nqindex = QUEUE_EMPTYKVA;
- nbp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTYKVA]);
-
- if (nbp == NULL) {
- /*
- * If no EMPTYKVA buffers and we are either
- * defragging or reusing, locate a CLEAN buffer
- * to free or reuse. If bufspace useage is low
- * skip this step so we can allocate a new buffer.
- */
- if (defrag || bufspace >= lobufspace) {
- nqindex = QUEUE_CLEAN;
- nbp = TAILQ_FIRST(&bufqueues[QUEUE_CLEAN]);
- }
-
- /*
- * Nada. If we are allowed to allocate an EMPTY
- * buffer, go get one.
- */
- if (nbp == NULL && defrag == 0 &&
- (isspecial || bufspace < hibufspace)) {
- nqindex = QUEUE_EMPTY;
- nbp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTY]);
- }
+ if (nbp == NULL && defrag == 0 && bufspace < hibufspace) {
+ nqindex = QUEUE_EMPTY;
+ nbp = TAILQ_FIRST(&bufqueues[QUEUE_EMPTY]);
}
}
@@ -1610,26 +1638,16 @@ restart:
goto restart;
}
- /*
- * If we are a normal process then deal with bufspace
- * hysteresis. A normal process tries to keep bufspace
- * between lobufspace and hibufspace. Note: if we encounter
- * a buffer with b_kvasize == 0 then it means we started
- * our scan on the EMPTY list and should allocate a new
- * buffer.
- */
- if (isspecial == 0) {
- if (bufspace > hibufspace)
- flushingbufs = 1;
- if (flushingbufs && bp->b_kvasize != 0) {
- bp->b_flags |= B_INVAL;
- bfreekva(bp);
- brelse(bp);
- goto restart;
- }
- if (bufspace < lobufspace)
- flushingbufs = 0;
+ if (bufspace >= hibufspace)
+ flushingbufs = 1;
+ if (flushingbufs && bp->b_kvasize != 0) {
+ bp->b_flags |= B_INVAL;
+ bfreekva(bp);
+ brelse(bp);
+ goto restart;
}
+ if (bufspace < lobufspace)
+ flushingbufs = 0;
break;
}
@@ -1705,6 +1723,7 @@ restart:
return(bp);
}
+#if 0
/*
* waitfreebuffers:
*
@@ -1723,6 +1742,8 @@ waitfreebuffers(int slpflag, int slptimeo)
}
}
+#endif
+
/*
* buf_daemon:
*
@@ -2073,8 +2094,12 @@ loop:
* If this check ever becomes a bottleneck it may be better to
* move it into the else, when gbincore() fails. At the moment
* it isn't a problem.
+ *
+ * XXX remove if 0 sections (clean this up after its proven)
*/
+#if 0
if (curproc == idleproc || (curproc->p_flag & P_BUFEXHAUST)) {
+#endif
if (numfreebuffers == 0) {
if (curproc == idleproc)
return NULL;
@@ -2082,9 +2107,11 @@ loop:
tsleep(&needsbuffer, (PRIBIO + 4) | slpflag, "newbuf",
slptimeo);
}
+#if 0
} else if (numfreebuffers < lofreebuffers) {
waitfreebuffers(slpflag, slptimeo);
}
+#endif
if ((bp = gbincore(vp, blkno))) {
/*
@@ -2468,7 +2495,13 @@ allocbuf(struct buf *bp, int size)
pi = OFF_TO_IDX(bp->b_offset) + bp->b_npages;
if ((m = vm_page_lookup(obj, pi)) == NULL) {
- m = vm_page_alloc(obj, pi, VM_ALLOC_NORMAL);
+ /*
+ * note: must allocate system pages
+ * since blocking here could intefere
+ * with paging I/O, no matter which
+ * process we are.
+ */
+ m = vm_page_alloc(obj, pi, VM_ALLOC_SYSTEM);
if (m == NULL) {
VM_WAIT;
vm_pageout_deficit += desiredpages - bp->b_npages;
@@ -2671,7 +2704,7 @@ bufdone(struct buf *bp)
buf_complete(bp);
if (bp->b_flags & B_VMIO) {
- int i, resid;
+ int i;
vm_ooffset_t foff;
vm_page_t m;
vm_object_t obj;
@@ -2722,16 +2755,29 @@ bufdone(struct buf *bp)
for (i = 0; i < bp->b_npages; i++) {
int bogusflag = 0;
+ int resid;
+
+ resid = ((foff + PAGE_SIZE) & ~(off_t)PAGE_MASK) - foff;
+ if (resid > iosize)
+ resid = iosize;
+
+ /*
+ * cleanup bogus pages, restoring the originals
+ */
m = bp->b_pages[i];
if (m == bogus_page) {
bogusflag = 1;
m = vm_page_lookup(obj, OFF_TO_IDX(foff));
if (!m) {
+ panic("biodone: page disappeared!");
#if defined(VFS_BIO_DEBUG)
printf("biodone: page disappeared\n");
#endif
vm_object_pip_subtract(obj, 1);
bp->b_flags &= ~B_CACHE;
+ foff = (foff + PAGE_SIZE) &
+ ~(off_t)PAGE_MASK;
+ iosize -= resid;
continue;
}
bp->b_pages[i] = m;
@@ -2744,9 +2790,6 @@ bufdone(struct buf *bp)
(unsigned long)foff, m->pindex);
}
#endif
- resid = IDX_TO_OFF(m->pindex + 1) - foff;
- if (resid > iosize)
- resid = iosize;
/*
* In the write case, the valid and clean bits are
@@ -2784,7 +2827,7 @@ bufdone(struct buf *bp)
}
vm_page_io_finish(m);
vm_object_pip_subtract(obj, 1);
- foff += resid;
+ foff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK;
iosize -= resid;
}
if (obj)
@@ -2862,7 +2905,7 @@ vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, int pageno, vm_page_t m)
* of the buffer.
*/
soff = off;
- eoff = (off + PAGE_SIZE) & ~PAGE_MASK;
+ eoff = (off + PAGE_SIZE) & ~(off_t)PAGE_MASK;
if (eoff > bp->b_offset + bp->b_bcount)
eoff = bp->b_offset + bp->b_bcount;
@@ -2948,7 +2991,7 @@ retry:
bp->b_pages[i] = bogus_page;
bogus++;
}
- foff = (foff + PAGE_SIZE) & ~PAGE_MASK;
+ foff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK;
}
if (bogus)
pmap_qenter(trunc_page((vm_offset_t)bp->b_data), bp->b_pages, bp->b_npages);
@@ -2976,7 +3019,7 @@ vfs_clean_pages(struct buf * bp)
("vfs_clean_pages: no buffer offset"));
for (i = 0; i < bp->b_npages; i++) {
vm_page_t m = bp->b_pages[i];
- vm_ooffset_t noff = (foff + PAGE_SIZE) & ~PAGE_MASK;
+ vm_ooffset_t noff = (foff + PAGE_SIZE) & ~(off_t)PAGE_MASK;
vm_ooffset_t eoff = noff;
if (eoff > bp->b_offset + bp->b_bufsize)
@@ -3104,9 +3147,14 @@ vm_hold_load_pages(struct buf * bp, vm_offset_t from, vm_offset_t to)
tryagain:
+ /*
+ * note: must allocate system pages since blocking here
+ * could intefere with paging I/O, no matter which
+ * process we are.
+ */
p = vm_page_alloc(kernel_object,
((pg - VM_MIN_KERNEL_ADDRESS) >> PAGE_SHIFT),
- VM_ALLOC_NORMAL);
+ VM_ALLOC_SYSTEM);
if (!p) {
vm_pageout_deficit += (to - from) >> PAGE_SHIFT;
VM_WAIT;
diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c
index 4f1aecf..29a1879 100644
--- a/sys/kern/vfs_cluster.c
+++ b/sys/kern/vfs_cluster.c
@@ -48,6 +48,7 @@
#include <sys/malloc.h>
#include <sys/mount.h>
#include <sys/resourcevar.h>
+#include <sys/vmmeter.h>
#include <vm/vm.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
@@ -665,6 +666,11 @@ cluster_write(bp, filesize, seqcount)
cluster_wbuild_wb(vp, lblocksize, vp->v_cstart, vp->v_clen + 1);
vp->v_clen = 0;
vp->v_cstart = lbn + 1;
+ } else if (vm_page_count_severe()) {
+ /*
+ * We are low on memory, get it going NOW
+ */
+ bawrite(bp);
} else {
/*
* In the middle of a cluster, so just delay the I/O for now.
diff --git a/sys/kern/vfs_export.c b/sys/kern/vfs_export.c
index fd81bc8..cb46c34 100644
--- a/sys/kern/vfs_export.c
+++ b/sys/kern/vfs_export.c
@@ -1438,10 +1438,14 @@ vget(vp, flags, p)
if ((flags & LK_INTERLOCK) == 0)
mtx_enter(&vp->v_interlock, MTX_DEF);
if (vp->v_flag & VXLOCK) {
- vp->v_flag |= VXWANT;
- mtx_exit(&vp->v_interlock, MTX_DEF);
- tsleep((caddr_t)vp, PINOD, "vget", 0);
- return (ENOENT);
+ if (vp->v_vxproc == curproc) {
+ printf("VXLOCK interlock avoided\n");
+ } else {
+ vp->v_flag |= VXWANT;
+ mtx_exit(&vp->v_interlock, MTX_DEF);
+ tsleep((caddr_t)vp, PINOD, "vget", 0);
+ return (ENOENT);
+ }
}
vp->v_usecount++;
@@ -1731,6 +1735,7 @@ vclean(vp, flags, p)
if (vp->v_flag & VXLOCK)
panic("vclean: deadlock");
vp->v_flag |= VXLOCK;
+ vp->v_vxproc = curproc;
/*
* Even if the count is zero, the VOP_INACTIVE routine may still
* have the object locked while it cleans it out. The VOP_LOCK
@@ -1807,6 +1812,7 @@ vclean(vp, flags, p)
vn_pollgone(vp);
vp->v_tag = VT_NON;
vp->v_flag &= ~VXLOCK;
+ vp->v_vxproc = NULL;
if (vp->v_flag & VXWANT) {
vp->v_flag &= ~VXWANT;
wakeup((caddr_t) vp);
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index fd81bc8..cb46c34 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -1438,10 +1438,14 @@ vget(vp, flags, p)
if ((flags & LK_INTERLOCK) == 0)
mtx_enter(&vp->v_interlock, MTX_DEF);
if (vp->v_flag & VXLOCK) {
- vp->v_flag |= VXWANT;
- mtx_exit(&vp->v_interlock, MTX_DEF);
- tsleep((caddr_t)vp, PINOD, "vget", 0);
- return (ENOENT);
+ if (vp->v_vxproc == curproc) {
+ printf("VXLOCK interlock avoided\n");
+ } else {
+ vp->v_flag |= VXWANT;
+ mtx_exit(&vp->v_interlock, MTX_DEF);
+ tsleep((caddr_t)vp, PINOD, "vget", 0);
+ return (ENOENT);
+ }
}
vp->v_usecount++;
@@ -1731,6 +1735,7 @@ vclean(vp, flags, p)
if (vp->v_flag & VXLOCK)
panic("vclean: deadlock");
vp->v_flag |= VXLOCK;
+ vp->v_vxproc = curproc;
/*
* Even if the count is zero, the VOP_INACTIVE routine may still
* have the object locked while it cleans it out. The VOP_LOCK
@@ -1807,6 +1812,7 @@ vclean(vp, flags, p)
vn_pollgone(vp);
vp->v_tag = VT_NON;
vp->v_flag &= ~VXLOCK;
+ vp->v_vxproc = NULL;
if (vp->v_flag & VXWANT) {
vp->v_flag &= ~VXWANT;
wakeup((caddr_t) vp);
diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c
index 448a2a6..b7cea77 100644
--- a/sys/kern/vfs_vnops.c
+++ b/sys/kern/vfs_vnops.c
@@ -642,12 +642,14 @@ debug_vn_lock(vp, flags, p, filename, line)
do {
if ((flags & LK_INTERLOCK) == 0)
mtx_enter(&vp->v_interlock, MTX_DEF);
- if (vp->v_flag & VXLOCK) {
+ if ((vp->v_flag & VXLOCK) && vp->v_vxproc != curproc) {
vp->v_flag |= VXWANT;
mtx_exit(&vp->v_interlock, MTX_DEF);
tsleep((caddr_t)vp, PINOD, "vn_lock", 0);
error = ENOENT;
} else {
+ if (vp->v_vxproc != NULL)
+ printf("VXLOCK interlock avoided in vn_lock\n");
#ifdef DEBUG_LOCKS
vp->filename = filename;
vp->line = line;
diff --git a/sys/sys/buf.h b/sys/sys/buf.h
index d085de6..a10083f 100644
--- a/sys/sys/buf.h
+++ b/sys/sys/buf.h
@@ -494,6 +494,7 @@ struct uio;
caddr_t bufhashinit __P((caddr_t));
void bufinit __P((void));
void bwillwrite __P((void));
+int buf_dirty_count_severe __P((void));
void bremfree __P((struct buf *));
int bread __P((struct vnode *, daddr_t, int,
struct ucred *, struct buf **));
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index 582d00c..75462f6 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -129,6 +129,7 @@ struct vnode {
short vpi_events; /* what they are looking for */
short vpi_revents; /* what has happened */
} v_pollinfo;
+ struct proc *v_vxproc; /* proc owning VXLOCK */
#ifdef DEBUG_LOCKS
const char *filename; /* Source file doing locking */
int line; /* Line number doing locking */
diff --git a/sys/ufs/ffs/ffs_inode.c b/sys/ufs/ffs/ffs_inode.c
index 30f36ee7..a8ae464 100644
--- a/sys/ufs/ffs/ffs_inode.c
+++ b/sys/ufs/ffs/ffs_inode.c
@@ -45,6 +45,7 @@
#include <sys/vnode.h>
#include <sys/malloc.h>
#include <sys/resourcevar.h>
+#include <sys/vmmeter.h>
#include <sys/stat.h>
#include <vm/vm.h>
@@ -111,6 +112,8 @@ ffs_update(vp, waitfor)
ino_to_fsbo(fs, ip->i_number)) = ip->i_din;
if (waitfor && !DOINGASYNC(vp)) {
return (bwrite(bp));
+ } else if (vm_page_count_severe() || buf_dirty_count_severe()) {
+ return (bwrite(bp));
} else {
if (bp->b_bufsize == fs->fs_bsize)
bp->b_flags |= B_CLUSTEROK;
diff --git a/sys/ufs/ffs/ffs_softdep.c b/sys/ufs/ffs/ffs_softdep.c
index 98ad959..c6ac0bd 100644
--- a/sys/ufs/ffs/ffs_softdep.c
+++ b/sys/ufs/ffs/ffs_softdep.c
@@ -91,6 +91,8 @@ MALLOC_DEFINE(M_DIRADD, "diradd","New directory entry");
MALLOC_DEFINE(M_MKDIR, "mkdir","New directory");
MALLOC_DEFINE(M_DIRREM, "dirrem","Directory entry deleted");
+#define M_SOFTDEP_FLAGS (M_WAITOK | M_USE_RESERVE)
+
#define D_PAGEDEP 0
#define D_INODEDEP 1
#define D_NEWBLK 2
@@ -802,7 +804,7 @@ top:
goto top;
}
MALLOC(pagedep, struct pagedep *, sizeof(struct pagedep), M_PAGEDEP,
- M_WAITOK);
+ M_SOFTDEP_FLAGS);
bzero(pagedep, sizeof(struct pagedep));
pagedep->pd_list.wk_type = D_PAGEDEP;
pagedep->pd_mnt = mp;
@@ -879,7 +881,7 @@ top:
}
num_inodedep += 1;
MALLOC(inodedep, struct inodedep *, sizeof(struct inodedep),
- M_INODEDEP, M_WAITOK);
+ M_INODEDEP, M_SOFTDEP_FLAGS);
inodedep->id_list.wk_type = D_INODEDEP;
inodedep->id_fs = fs;
inodedep->id_ino = inum;
@@ -941,7 +943,7 @@ top:
if (sema_get(&newblk_in_progress, 0) == 0)
goto top;
MALLOC(newblk, struct newblk *, sizeof(struct newblk),
- M_NEWBLK, M_WAITOK);
+ M_NEWBLK, M_SOFTDEP_FLAGS);
newblk->nb_state = 0;
newblk->nb_fs = fs;
newblk->nb_newblkno = newblkno;
@@ -1127,7 +1129,7 @@ bmsafemap_lookup(bp)
return (WK_BMSAFEMAP(wk));
FREE_LOCK(&lk);
MALLOC(bmsafemap, struct bmsafemap *, sizeof(struct bmsafemap),
- M_BMSAFEMAP, M_WAITOK);
+ M_BMSAFEMAP, M_SOFTDEP_FLAGS);
bmsafemap->sm_list.wk_type = D_BMSAFEMAP;
bmsafemap->sm_list.wk_state = 0;
bmsafemap->sm_buf = bp;
@@ -1187,7 +1189,7 @@ softdep_setup_allocdirect(ip, lbn, newblkno, oldblkno, newsize, oldsize, bp)
struct newblk *newblk;
MALLOC(adp, struct allocdirect *, sizeof(struct allocdirect),
- M_ALLOCDIRECT, M_WAITOK);
+ M_ALLOCDIRECT, M_SOFTDEP_FLAGS);
bzero(adp, sizeof(struct allocdirect));
adp->ad_list.wk_type = D_ALLOCDIRECT;
adp->ad_lbn = lbn;
@@ -1339,7 +1341,7 @@ newfreefrag(ip, blkno, size)
if (fragnum(fs, blkno) + numfrags(fs, size) > fs->fs_frag)
panic("newfreefrag: frag size");
MALLOC(freefrag, struct freefrag *, sizeof(struct freefrag),
- M_FREEFRAG, M_WAITOK);
+ M_FREEFRAG, M_SOFTDEP_FLAGS);
freefrag->ff_list.wk_type = D_FREEFRAG;
freefrag->ff_state = ip->i_uid & ~ONWORKLIST; /* XXX - used below */
freefrag->ff_inum = ip->i_number;
@@ -1408,7 +1410,7 @@ newallocindir(ip, ptrno, newblkno, oldblkno)
struct allocindir *aip;
MALLOC(aip, struct allocindir *, sizeof(struct allocindir),
- M_ALLOCINDIR, M_WAITOK);
+ M_ALLOCINDIR, M_SOFTDEP_FLAGS);
bzero(aip, sizeof(struct allocindir));
aip->ai_list.wk_type = D_ALLOCINDIR;
aip->ai_state = ATTACHED;
@@ -1561,7 +1563,7 @@ setup_allocindir_phase2(bp, ip, aip)
if (indirdep)
break;
MALLOC(newindirdep, struct indirdep *, sizeof(struct indirdep),
- M_INDIRDEP, M_WAITOK);
+ M_INDIRDEP, M_SOFTDEP_FLAGS);
newindirdep->ir_list.wk_type = D_INDIRDEP;
newindirdep->ir_state = ATTACHED;
LIST_INIT(&newindirdep->ir_deplisthd);
@@ -1623,7 +1625,7 @@ softdep_setup_freeblocks(ip, length)
if (length != 0)
panic("softde_setup_freeblocks: non-zero length");
MALLOC(freeblks, struct freeblks *, sizeof(struct freeblks),
- M_FREEBLKS, M_WAITOK);
+ M_FREEBLKS, M_SOFTDEP_FLAGS);
bzero(freeblks, sizeof(struct freeblks));
freeblks->fb_list.wk_type = D_FREEBLKS;
freeblks->fb_uid = ip->i_uid;
@@ -1870,7 +1872,7 @@ softdep_freefile(pvp, ino, mode)
* This sets up the inode de-allocation dependency.
*/
MALLOC(freefile, struct freefile *, sizeof(struct freefile),
- M_FREEFILE, M_WAITOK);
+ M_FREEFILE, M_SOFTDEP_FLAGS);
freefile->fx_list.wk_type = D_FREEFILE;
freefile->fx_list.wk_state = 0;
freefile->fx_mode = mode;
@@ -2186,7 +2188,7 @@ softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp)
fs = dp->i_fs;
lbn = lblkno(fs, diroffset);
offset = blkoff(fs, diroffset);
- MALLOC(dap, struct diradd *, sizeof(struct diradd), M_DIRADD, M_WAITOK);
+ MALLOC(dap, struct diradd *, sizeof(struct diradd), M_DIRADD, M_SOFTDEP_FLAGS);
bzero(dap, sizeof(struct diradd));
dap->da_list.wk_type = D_DIRADD;
dap->da_offset = offset;
@@ -2198,12 +2200,12 @@ softdep_setup_directory_add(bp, dp, diroffset, newinum, newdirbp)
} else {
dap->da_state |= MKDIR_BODY | MKDIR_PARENT;
MALLOC(mkdir1, struct mkdir *, sizeof(struct mkdir), M_MKDIR,
- M_WAITOK);
+ M_SOFTDEP_FLAGS);
mkdir1->md_list.wk_type = D_MKDIR;
mkdir1->md_state = MKDIR_BODY;
mkdir1->md_diradd = dap;
MALLOC(mkdir2, struct mkdir *, sizeof(struct mkdir), M_MKDIR,
- M_WAITOK);
+ M_SOFTDEP_FLAGS);
mkdir2->md_list.wk_type = D_MKDIR;
mkdir2->md_state = MKDIR_PARENT;
mkdir2->md_diradd = dap;
@@ -2438,7 +2440,7 @@ newdirrem(bp, dp, ip, isrmdir, prevdirremp)
(void) request_cleanup(FLUSH_REMOVE, 0);
num_dirrem += 1;
MALLOC(dirrem, struct dirrem *, sizeof(struct dirrem),
- M_DIRREM, M_WAITOK);
+ M_DIRREM, M_SOFTDEP_FLAGS);
bzero(dirrem, sizeof(struct dirrem));
dirrem->dm_list.wk_type = D_DIRREM;
dirrem->dm_state = isrmdir ? RMDIR : 0;
@@ -2535,7 +2537,7 @@ softdep_setup_directory_change(bp, dp, ip, newinum, isrmdir)
*/
if (newinum != WINO) {
MALLOC(dap, struct diradd *, sizeof(struct diradd),
- M_DIRADD, M_WAITOK);
+ M_DIRADD, M_SOFTDEP_FLAGS);
bzero(dap, sizeof(struct diradd));
dap->da_list.wk_type = D_DIRADD;
dap->da_state = DIRCHG | ATTACHED | DEPCOMPLETE;
@@ -2841,7 +2843,7 @@ softdep_disk_io_initiation(bp)
* Replace up-to-date version with safe version.
*/
MALLOC(indirdep->ir_saveddata, caddr_t, bp->b_bcount,
- M_INDIRDEP, M_WAITOK);
+ M_INDIRDEP, M_SOFTDEP_FLAGS);
ACQUIRE_LOCK(&lk);
indirdep->ir_state &= ~ATTACHED;
indirdep->ir_state |= UNDONE;
@@ -2942,7 +2944,7 @@ initiate_write_inodeblock(inodedep, bp)
if (inodedep->id_savedino != NULL)
panic("initiate_write_inodeblock: already doing I/O");
MALLOC(inodedep->id_savedino, struct dinode *,
- sizeof(struct dinode), M_INODEDEP, M_WAITOK);
+ sizeof(struct dinode), M_INODEDEP, M_SOFTDEP_FLAGS);
*inodedep->id_savedino = *dp;
bzero((caddr_t)dp, sizeof(struct dinode));
return;
diff --git a/sys/ufs/ufs/ufs_readwrite.c b/sys/ufs/ufs/ufs_readwrite.c
index be43550..785219c 100644
--- a/sys/ufs/ufs/ufs_readwrite.c
+++ b/sys/ufs/ufs/ufs_readwrite.c
@@ -48,6 +48,7 @@
#include <vm/vm_map.h>
#include <vm/vnode_pager.h>
#include <sys/event.h>
+#include <sys/vmmeter.h>
#define VN_KNOTE(vp, b) \
KNOTE((struct klist *)&vp->v_pollinfo.vpi_selinfo.si_note, (b))
@@ -501,6 +502,9 @@ WRITE(ap)
} else {
bawrite(bp);
}
+ } else if (vm_page_count_severe() || buf_dirty_count_severe()) {
+ bp->b_flags |= B_CLUSTEROK;
+ bawrite(bp);
} else {
bp->b_flags |= B_CLUSTEROK;
bdwrite(bp);
diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
index 6a427c9..a625bc8 100644
--- a/sys/vm/swap_pager.c
+++ b/sys/vm/swap_pager.c
@@ -80,6 +80,7 @@
#include <sys/sysctl.h>
#include <sys/blist.h>
#include <sys/lock.h>
+#include <sys/vmmeter.h>
#ifndef MAX_PAGEOUT_CLUSTER
#define MAX_PAGEOUT_CLUSTER 16
@@ -1619,10 +1620,11 @@ swp_pager_async_iodone(bp)
* status, then finish the I/O ( which decrements the
* busy count and possibly wakes waiter's up ).
*/
- vm_page_protect(m, VM_PROT_READ);
pmap_clear_modify(m);
vm_page_undirty(m);
vm_page_io_finish(m);
+ if (!vm_page_count_severe() || !vm_page_try_to_cache(m))
+ vm_page_protect(m, VM_PROT_READ);
}
}
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 038a5ad..9c868fc 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -860,7 +860,7 @@ loop:
* Don't wakeup too often - wakeup the pageout daemon when
* we would be nearly out of memory.
*/
- if (vm_paging_needed() || cnt.v_free_count < cnt.v_pageout_free_min)
+ if (vm_paging_needed())
pagedaemon_wakeup();
splx(s);
@@ -882,10 +882,10 @@ vm_wait()
s = splvm();
if (curproc == pageproc) {
vm_pageout_pages_needed = 1;
- tsleep(&vm_pageout_pages_needed, PSWP, "vmwait", 0);
+ tsleep(&vm_pageout_pages_needed, PSWP, "VMWait", 0);
} else {
if (!vm_pages_needed) {
- vm_pages_needed++;
+ vm_pages_needed = 1;
wakeup(&vm_pages_needed);
}
tsleep(&cnt.v_free_count, PVM, "vmwait", 0);
@@ -1030,7 +1030,8 @@ vm_page_free_wakeup()
* if pageout daemon needs pages, then tell it that there are
* some free.
*/
- if (vm_pageout_pages_needed) {
+ if (vm_pageout_pages_needed &&
+ cnt.v_cache_count + cnt.v_free_count >= cnt.v_pageout_free_min) {
wakeup(&vm_pageout_pages_needed);
vm_pageout_pages_needed = 0;
}
@@ -1039,9 +1040,9 @@ vm_page_free_wakeup()
* high water mark. And wakeup scheduler process if we have
* lots of memory. this process will swapin processes.
*/
- if (vm_pages_needed && vm_page_count_min()) {
- wakeup(&cnt.v_free_count);
+ if (vm_pages_needed && !vm_page_count_min()) {
vm_pages_needed = 0;
+ wakeup(&cnt.v_free_count);
}
}
@@ -1240,6 +1241,9 @@ vm_page_wire(m)
* processes. This optimization causes one-time-use metadata to be
* reused more quickly.
*
+ * BUT, if we are in a low-memory situation we have no choice but to
+ * put clean pages on the cache queue.
+ *
* A number of routines use vm_page_unwire() to guarantee that the page
* will go into either the inactive or active queues, and will NEVER
* be placed in the cache - for example, just after dirtying a page.
@@ -1326,6 +1330,25 @@ vm_page_deactivate(vm_page_t m)
}
/*
+ * vm_page_try_to_cache:
+ *
+ * Returns 0 on failure, 1 on success
+ */
+int
+vm_page_try_to_cache(vm_page_t m)
+{
+ if (m->dirty || m->hold_count || m->busy || m->wire_count ||
+ (m->flags & (PG_BUSY|PG_UNMANAGED))) {
+ return(0);
+ }
+ vm_page_test_dirty(m);
+ if (m->dirty)
+ return(0);
+ vm_page_cache(m);
+ return(1);
+}
+
+/*
* vm_page_cache
*
* Put the specified page onto the page cache queue (if appropriate).
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
index cf58985..4c31df9 100644
--- a/sys/vm/vm_page.h
+++ b/sys/vm/vm_page.h
@@ -251,6 +251,7 @@ extern struct vpgqueues vm_page_queues[PQ_COUNT];
#define PG_SWAPINPROG 0x0200 /* swap I/O in progress on page */
#define PG_NOSYNC 0x0400 /* do not collect for syncer */
#define PG_UNMANAGED 0x0800 /* No PV management for page */
+#define PG_MARKER 0x1000 /* special queue marker page */
/*
* Misc constants.
@@ -403,6 +404,7 @@ void vm_page_activate __P((vm_page_t));
vm_page_t vm_page_alloc __P((vm_object_t, vm_pindex_t, int));
vm_page_t vm_page_grab __P((vm_object_t, vm_pindex_t, int));
void vm_page_cache __P((register vm_page_t));
+int vm_page_try_to_cache __P((vm_page_t));
void vm_page_dontneed __P((register vm_page_t));
static __inline void vm_page_copy __P((vm_page_t, vm_page_t));
static __inline void vm_page_free __P((vm_page_t));
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index d12ecac..4ab3930 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -146,6 +146,7 @@ static int defer_swap_pageouts=0;
static int disable_swap_pageouts=0;
static int max_page_launder=100;
+static int vm_pageout_actcmp=0;
#if defined(NO_SWAPPING)
static int vm_swap_enabled=0;
static int vm_swap_idle_enabled=0;
@@ -189,6 +190,8 @@ SYSCTL_INT(_vm, OID_AUTO, disable_swapspace_pageouts,
SYSCTL_INT(_vm, OID_AUTO, max_page_launder,
CTLFLAG_RW, &max_page_launder, 0, "Maximum number of pages to clean per pass");
+SYSCTL_INT(_vm, OID_AUTO, vm_pageout_actcmp,
+ CTLFLAG_RD, &vm_pageout_actcmp, 0, "pagedaemon agressiveness");
#define VM_PAGEOUT_PAGE_COUNT 16
@@ -372,6 +375,7 @@ vm_pageout_flush(mc, count, flags)
*/
for (i = 0; i < count; i++) {
+ KASSERT(mc[i]->valid == VM_PAGE_BITS_ALL && mc[i]->dirty == VM_PAGE_BITS_ALL, ("vm_pageout_flush page %p index %d/%d: partially dirty page", mc[i], i, count));
vm_page_io_start(mc[i]);
vm_page_protect(mc[i], VM_PROT_READ);
}
@@ -424,6 +428,8 @@ vm_pageout_flush(mc, count, flags)
if (pageout_status[i] != VM_PAGER_PEND) {
vm_object_pip_wakeup(object);
vm_page_io_finish(mt);
+ if (!vm_page_count_severe() || !vm_page_try_to_cache(mt))
+ vm_page_protect(mt, VM_PROT_READ);
}
}
return numpagedout;
@@ -621,10 +627,10 @@ static int
vm_pageout_scan()
{
vm_page_t m, next;
+ struct vm_page marker;
int page_shortage, maxscan, pcount;
int addl_page_shortage, addl_page_shortage_init;
int maxlaunder;
- int launder_loop = 0;
struct proc *p, *bigproc;
vm_offset_t size, bigsize;
vm_object_t object;
@@ -646,33 +652,37 @@ vm_pageout_scan()
/*
* Calculate the number of pages we want to either free or move
- * to the cache.
+ * to the cache. Be more agressive if we aren't making our target.
*/
- page_shortage = vm_paging_target() + addl_page_shortage_init;
+ page_shortage = vm_paging_target() +
+ addl_page_shortage_init + vm_pageout_actcmp;
/*
- * Figure out what to do with dirty pages when they are encountered.
- * Assume that 1/3 of the pages on the inactive list are clean. If
- * we think we can reach our target, disable laundering (do not
- * clean any dirty pages). If we miss the target we will loop back
- * up and do a laundering run.
+ * Figure out how agressively we should flush dirty pages.
*/
+ {
+ int factor = vm_pageout_actcmp;
- if (cnt.v_inactive_count / 3 > page_shortage) {
- maxlaunder = 0;
- launder_loop = 0;
- } else {
- maxlaunder =
- (cnt.v_inactive_target > max_page_launder) ?
- max_page_launder : cnt.v_inactive_target;
- launder_loop = 1;
+ maxlaunder = cnt.v_inactive_target / 3 + factor;
+ if (maxlaunder > max_page_launder + factor)
+ maxlaunder = max_page_launder + factor;
}
/*
+ * Initialize our marker
+ */
+ bzero(&marker, sizeof(marker));
+ marker.flags = PG_BUSY | PG_FICTITIOUS | PG_MARKER;
+ marker.queue = PQ_INACTIVE;
+ marker.wire_count = 1;
+
+ /*
* Start scanning the inactive queue for pages we can move to the
* cache or free. The scan will stop when the target is reached or
- * we have scanned the entire inactive queue.
+ * we have scanned the entire inactive queue. Note that m->act_count
+ * is not used to form decisions for the inactive queue, only for the
+ * active queue.
*/
rescan0:
@@ -690,6 +700,12 @@ rescan0:
next = TAILQ_NEXT(m, pageq);
+ /*
+ * skip marker pages
+ */
+ if (m->flags & PG_MARKER)
+ continue;
+
if (m->hold_count) {
s = splvm();
TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
@@ -766,7 +782,8 @@ rescan0:
--page_shortage;
/*
- * Clean pages can be placed onto the cache queue.
+ * Clean pages can be placed onto the cache queue. This
+ * effectively frees them.
*/
} else if (m->dirty == 0) {
vm_page_cache(m);
@@ -777,7 +794,6 @@ rescan0:
* only a limited number of pages per pagedaemon pass.
*/
} else if (maxlaunder > 0) {
- int written;
int swap_pageouts_ok;
struct vnode *vp = NULL;
struct mount *mp;
@@ -806,29 +822,6 @@ rescan0:
}
/*
- * For now we protect against potential memory
- * deadlocks by requiring significant memory to be
- * free if the object is not OBJT_DEFAULT or OBJT_SWAP.
- * We do not 'trust' any other object type to operate
- * with low memory, not even OBJT_DEVICE. The VM
- * allocator will special case allocations done by
- * the pageout daemon so the check below actually
- * does have some hysteresis in it. It isn't the best
- * solution, though.
- */
-
- if (object->type != OBJT_DEFAULT &&
- object->type != OBJT_SWAP &&
- cnt.v_free_count < cnt.v_free_reserved) {
- s = splvm();
- TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
- TAILQ_INSERT_TAIL(&vm_page_queues[PQ_INACTIVE].pl, m,
- pageq);
- splx(s);
- continue;
- }
-
- /*
* Presumably we have sufficient free memory to do
* the more sophisticated checks and locking required
* for vnodes.
@@ -879,10 +872,15 @@ rescan0:
}
/*
- * The page might have been moved to another queue
- * during potential blocking in vget() above.
+ * The page might have been moved to another
+ * queue during potential blocking in vget()
+ * above. The page might have been freed and
+ * reused for another vnode. The object might
+ * have been reused for another vnode.
*/
- if (m->queue != PQ_INACTIVE) {
+ if (m->queue != PQ_INACTIVE ||
+ m->object != object ||
+ object->handle != vp) {
if (object->flags & OBJ_MIGHTBEDIRTY)
vnodes_skipped++;
vput(vp);
@@ -891,9 +889,10 @@ rescan0:
}
/*
- * The page may have been busied during the blocking in
- * vput(); We don't move the page back onto the end of
- * the queue so that statistics are more correct if we don't.
+ * The page may have been busied during the
+ * blocking in vput(); We don't move the
+ * page back onto the end of the queue so that
+ * statistics are more correct if we don't.
*/
if (m->busy || (m->flags & PG_BUSY)) {
vput(vp);
@@ -921,42 +920,57 @@ rescan0:
* If a page is dirty, then it is either being washed
* (but not yet cleaned) or it is still in the
* laundry. If it is still in the laundry, then we
- * start the cleaning operation.
+ * start the cleaning operation. maxlaunder nominally
+ * counts I/O cost (seeks) rather then bytes.
+ *
+ * This operation may cluster, invalidating the 'next'
+ * pointer. To prevent an inordinate number of
+ * restarts we use our marker to remember our place.
*/
- written = vm_pageout_clean(m);
+ s = splvm();
+ TAILQ_INSERT_AFTER(&vm_page_queues[PQ_INACTIVE].pl, m, &marker, pageq);
+ splx(s);
+ if (vm_pageout_clean(m) != 0)
+ --maxlaunder;
+ s = splvm();
+ next = TAILQ_NEXT(&marker, pageq);
+ TAILQ_REMOVE(&vm_page_queues[PQ_INACTIVE].pl, &marker, pageq);
+ splx(s);
if (vp) {
vput(vp);
vn_finished_write(mp);
}
-
- maxlaunder -= written;
}
}
/*
- * If we still have a page shortage and we didn't launder anything,
- * run the inactive scan again and launder something this time.
+ * If we were not able to meet our target, increase actcmp
*/
- if (launder_loop == 0 && page_shortage > 0) {
- launder_loop = 1;
- maxlaunder =
- (cnt.v_inactive_target > max_page_launder) ?
- max_page_launder : cnt.v_inactive_target;
- goto rescan0;
+ if (vm_page_count_min()) {
+ if (vm_pageout_actcmp < ACT_MAX / 2)
+ vm_pageout_actcmp += ACT_ADVANCE;
+ } else {
+ if (vm_pageout_actcmp < ACT_DECLINE)
+ vm_pageout_actcmp = 0;
+ else
+ vm_pageout_actcmp -= ACT_DECLINE;
}
/*
- * Compute the page shortage from the point of view of having to
- * move pages from the active queue to the inactive queue.
+ * Compute the number of pages we want to try to move from the
+ * active queue to the inactive queue.
*/
- page_shortage = (cnt.v_inactive_target + cnt.v_cache_min) -
- (cnt.v_free_count + cnt.v_inactive_count + cnt.v_cache_count);
+ page_shortage = vm_paging_target() +
+ cnt.v_inactive_target - cnt.v_inactive_count;
page_shortage += addl_page_shortage;
+ page_shortage += vm_pageout_actcmp;
/*
- * Scan the active queue for things we can deactivate
+ * Scan the active queue for things we can deactivate. We nominally
+ * track the per-page activity counter and use it to locate
+ * deactivation candidates.
*/
pcount = cnt.v_active_count;
@@ -1026,7 +1040,8 @@ rescan0:
} else {
m->act_count -= min(m->act_count, ACT_DECLINE);
if (vm_pageout_algorithm_lru ||
- (m->object->ref_count == 0) || (m->act_count == 0)) {
+ (m->object->ref_count == 0) ||
+ (m->act_count <= vm_pageout_actcmp)) {
page_shortage--;
if (m->object->ref_count == 0) {
vm_page_protect(m, VM_PROT_NONE);
@@ -1111,7 +1126,7 @@ rescan0:
* make sure that we have swap space -- if we are low on memory and
* swap -- then kill the biggest process.
*/
- if ((vm_swap_size == 0 || swap_pager_full) && vm_page_count_min()) {
+ if ((vm_swap_size < 64 || swap_pager_full) && vm_page_count_min()) {
bigproc = NULL;
bigsize = 0;
for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
@@ -1349,20 +1364,31 @@ vm_pageout()
int error;
int s = splvm();
- if (vm_pages_needed && vm_page_count_min()) {
+ /*
+ * If we have enough free memory, wakeup waiters. Do
+ * not clear vm_pages_needed until we reach our target,
+ * otherwise we may be woken up over and over again and
+ * waste a lot of cpu.
+ */
+ if (vm_pages_needed && !vm_page_count_min()) {
+ if (vm_paging_needed() <= 0)
+ vm_pages_needed = 0;
+ wakeup(&cnt.v_free_count);
+ }
+ if (vm_pages_needed) {
/*
* Still not done, sleep a bit and go again
*/
- vm_pages_needed = 0;
tsleep(&vm_pages_needed, PVM, "psleep", hz/2);
} else {
/*
* Good enough, sleep & handle stats
*/
- vm_pages_needed = 0;
error = tsleep(&vm_pages_needed,
PVM, "psleep", vm_pageout_stats_interval * hz);
if (error && !vm_pages_needed) {
+ if (vm_pageout_actcmp > 0)
+ --vm_pageout_actcmp;
splx(s);
vm_pageout_page_stats();
continue;
@@ -1371,11 +1397,9 @@ vm_pageout()
if (vm_pages_needed)
cnt.v_pdwakeups++;
- vm_pages_needed = 0;
splx(s);
vm_pageout_scan();
vm_pageout_deficit = 0;
- wakeup(&cnt.v_free_count);
}
}
OpenPOWER on IntegriCloud