summaryrefslogtreecommitdiffstats
path: root/sys/vm/vm_object.c
diff options
context:
space:
mode:
authordillon <dillon@FreeBSD.org>2002-03-06 02:42:56 +0000
committerdillon <dillon@FreeBSD.org>2002-03-06 02:42:56 +0000
commit904a90fbd58e8551b6d5df48853875e9e4e2a97b (patch)
treef4f014a662c183f4e4b98d8e257d0e40dd3636e7 /sys/vm/vm_object.c
parent95a814930f2b5f06711fe8a580f334e004f1ce65 (diff)
downloadFreeBSD-src-904a90fbd58e8551b6d5df48853875e9e4e2a97b.zip
FreeBSD-src-904a90fbd58e8551b6d5df48853875e9e4e2a97b.tar.gz
Add a sequential iteration optimization to vm_object_page_clean(). This
moderately improves msync's and VM object flushing for objects containing randomly dirtied pages (fsync(), msync(), filesystem update daemon), and improves cpu use for small-ranged sequential msync()s in the face of very large mmap()ings from O(N) to O(1) as might be performed by a database. A sysctl, vm.msync_flush_flag, has been added and defaults to 3 (the two committed optimizations are turned on by default). 0 will turn off both optimizations. This code has already been tested under stable and is one in a series of memq / vp->v_dirtyblkhd / fsync optimizations to remove O(N^2) restart conditions that will be coming down the pipe. MFC after: 3 days
Diffstat (limited to 'sys/vm/vm_object.c')
-rw-r--r--sys/vm/vm_object.c269
1 files changed, 194 insertions, 75 deletions
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
index 128e767..f3936d9 100644
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@@ -73,6 +73,8 @@
#include <sys/lock.h>
#include <sys/mman.h>
#include <sys/mount.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
#include <sys/mutex.h>
#include <sys/proc.h> /* for curproc, pageproc */
#include <sys/socket.h>
@@ -93,7 +95,20 @@
#include <vm/vm_kern.h>
#include <vm/vm_extern.h>
-static void vm_object_qcollapse __P((vm_object_t object));
+#define EASY_SCAN_FACTOR 8
+
+#define MSYNC_FLUSH_HARDSEQ 0x01
+#define MSYNC_FLUSH_SOFTSEQ 0x02
+
+/*
+ * msync / VM object flushing optimizations
+ */
+static int msync_flush_flags = MSYNC_FLUSH_HARDSEQ | MSYNC_FLUSH_SOFTSEQ;
+SYSCTL_INT(_vm, OID_AUTO, msync_flush_flags,
+ CTLFLAG_RW, &msync_flush_flags, 0, "");
+
+static void vm_object_qcollapse(vm_object_t object);
+static int vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration, int pagerflags);
/*
* Virtual memory objects maintain the actual data
@@ -578,21 +593,12 @@ vm_object_terminate(vm_object_t object)
void
vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int flags)
{
- vm_page_t p, np, tp;
+ vm_page_t p, np;
vm_offset_t tstart, tend;
vm_pindex_t pi;
- int s;
struct vnode *vp;
- int runlen;
- int maxf;
- int chkb;
- int maxb;
- int i;
int clearobjflags;
int pagerflags;
- vm_page_t maf[vm_pageout_page_count];
- vm_page_t mab[vm_pageout_page_count];
- vm_page_t ma[vm_pageout_page_count];
int curgeneration;
GIANT_REQUIRED;
@@ -616,6 +622,72 @@ vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int
}
/*
+ * If the caller is smart and only msync()s a range he knows is
+ * dirty, we may be able to avoid an object scan. This results in
+ * a phenominal improvement in performance. We cannot do this
+ * as a matter of course because the object may be huge - e.g.
+ * the size might be in the gigabytes or terrabytes.
+ */
+ if (msync_flush_flags & MSYNC_FLUSH_HARDSEQ) {
+ vm_offset_t tscan;
+ int scanlimit;
+ int scanreset;
+
+ scanreset = object->resident_page_count / EASY_SCAN_FACTOR;
+ if (scanreset < 16)
+ scanreset = 16;
+
+ scanlimit = scanreset;
+ tscan = tstart;
+ while (tscan < tend) {
+ curgeneration = object->generation;
+ p = vm_page_lookup(object, tscan);
+ if (p == NULL || p->valid == 0 ||
+ (p->queue - p->pc) == PQ_CACHE) {
+ if (--scanlimit == 0)
+ break;
+ ++tscan;
+ continue;
+ }
+ vm_page_test_dirty(p);
+ if ((p->dirty & p->valid) == 0) {
+ if (--scanlimit == 0)
+ break;
+ ++tscan;
+ continue;
+ }
+ /*
+ * If we have been asked to skip nosync pages and
+ * this is a nosync page, we can't continue.
+ */
+ if ((flags & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC)) {
+ if (--scanlimit == 0)
+ break;
+ ++tscan;
+ continue;
+ }
+ scanlimit = scanreset;
+
+ /*
+ * This returns 0 if it was unable to busy the first
+ * page (i.e. had to sleep).
+ */
+ tscan += vm_object_page_collect_flush(object, p, curgeneration, pagerflags);
+ }
+
+ /*
+ * If everything was dirty and we flushed it successfully,
+ * and the requested range is not the entire object, we
+ * don't have to mess with CLEANCHK or MIGHTBEDIRTY and can
+ * return immediately.
+ */
+ if (tscan >= tend && (tstart || tend < object->size)) {
+ vm_object_clear_flag(object, OBJ_CLEANING);
+ return;
+ }
+ }
+
+ /*
* Generally set CLEANCHK interlock and make the page read-only so
* we can then clear the object flags.
*
@@ -652,8 +724,11 @@ rescan:
curgeneration = object->generation;
for (p = TAILQ_FIRST(&object->memq); p; p = np) {
+ int n;
+
np = TAILQ_NEXT(p, listq);
+again:
pi = p->pindex;
if (((p->flags & PG_CLEANCHK) == 0) ||
(pi < tstart) || (pi >= tend) ||
@@ -679,17 +754,87 @@ rescan:
continue;
}
- s = splvm();
- while (vm_page_sleep_busy(p, TRUE, "vpcwai")) {
- if (object->generation != curgeneration) {
- splx(s);
- goto rescan;
+ n = vm_object_page_collect_flush(object, p,
+ curgeneration, pagerflags);
+ if (n == 0)
+ goto rescan;
+
+ if (object->generation != curgeneration)
+ goto rescan;
+
+ /*
+ * Try to optimize the next page. If we can't we pick up
+ * our (random) scan where we left off.
+ */
+ if (msync_flush_flags & MSYNC_FLUSH_SOFTSEQ) {
+ if ((p = vm_page_lookup(object, pi + n)) != NULL)
+ goto again;
+ }
+ }
+
+#if 0
+ VOP_FSYNC(vp, NULL, (pagerflags & VM_PAGER_PUT_SYNC)?MNT_WAIT:0, curproc);
+#endif
+
+ vm_object_clear_flag(object, OBJ_CLEANING);
+ return;
+}
+
+static int
+vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration, int pagerflags)
+{
+ int runlen;
+ int s;
+ int maxf;
+ int chkb;
+ int maxb;
+ int i;
+ vm_pindex_t pi;
+ vm_page_t maf[vm_pageout_page_count];
+ vm_page_t mab[vm_pageout_page_count];
+ vm_page_t ma[vm_pageout_page_count];
+
+ s = splvm();
+ pi = p->pindex;
+ while (vm_page_sleep_busy(p, TRUE, "vpcwai")) {
+ if (object->generation != curgeneration) {
+ splx(s);
+ return(0);
+ }
+ }
+
+ maxf = 0;
+ for(i = 1; i < vm_pageout_page_count; i++) {
+ vm_page_t tp;
+
+ if ((tp = vm_page_lookup(object, pi + i)) != NULL) {
+ if ((tp->flags & PG_BUSY) ||
+ (tp->flags & PG_CLEANCHK) == 0 ||
+ (tp->busy != 0))
+ break;
+ if((tp->queue - tp->pc) == PQ_CACHE) {
+ vm_page_flag_clear(tp, PG_CLEANCHK);
+ break;
}
+ vm_page_test_dirty(tp);
+ if ((tp->dirty & tp->valid) == 0) {
+ vm_page_flag_clear(tp, PG_CLEANCHK);
+ break;
+ }
+ maf[ i - 1 ] = tp;
+ maxf++;
+ continue;
}
+ break;
+ }
+
+ maxb = 0;
+ chkb = vm_pageout_page_count - maxf;
+ if (chkb) {
+ for(i = 1; i < chkb;i++) {
+ vm_page_t tp;
- maxf = 0;
- for (i = 1; i < vm_pageout_page_count; i++) {
- if ((tp = vm_page_lookup(object, pi + i)) != NULL) {
+ if ((tp = vm_page_lookup(object, pi - i)) != NULL) {
if ((tp->flags & PG_BUSY) ||
(tp->flags & PG_CLEANCHK) == 0 ||
(tp->busy != 0))
@@ -703,71 +848,45 @@ rescan:
vm_page_flag_clear(tp, PG_CLEANCHK);
break;
}
- maf[ i - 1 ] = tp;
- maxf++;
+ mab[ i - 1 ] = tp;
+ maxb++;
continue;
}
break;
}
+ }
- maxb = 0;
- chkb = vm_pageout_page_count - maxf;
- if (chkb) {
- for (i = 1; i < chkb; i++) {
- if ((tp = vm_page_lookup(object, pi - i)) != NULL) {
- if ((tp->flags & PG_BUSY) ||
- (tp->flags & PG_CLEANCHK) == 0 ||
- (tp->busy != 0))
- break;
- if((tp->queue - tp->pc) == PQ_CACHE) {
- vm_page_flag_clear(tp, PG_CLEANCHK);
- break;
- }
- vm_page_test_dirty(tp);
- if ((tp->dirty & tp->valid) == 0) {
- vm_page_flag_clear(tp, PG_CLEANCHK);
- break;
- }
- mab[ i - 1 ] = tp;
- maxb++;
- continue;
- }
- break;
- }
- }
+ for(i = 0; i < maxb; i++) {
+ int index = (maxb - i) - 1;
+ ma[index] = mab[i];
+ vm_page_flag_clear(ma[index], PG_CLEANCHK);
+ }
+ vm_page_flag_clear(p, PG_CLEANCHK);
+ ma[maxb] = p;
+ for(i = 0; i < maxf; i++) {
+ int index = (maxb + i) + 1;
+ ma[index] = maf[i];
+ vm_page_flag_clear(ma[index], PG_CLEANCHK);
+ }
+ runlen = maxb + maxf + 1;
- for (i = 0; i < maxb; i++) {
- int index = (maxb - i) - 1;
- ma[index] = mab[i];
- vm_page_flag_clear(ma[index], PG_CLEANCHK);
- }
- vm_page_flag_clear(p, PG_CLEANCHK);
- ma[maxb] = p;
- for (i = 0 ; i < maxf; i++) {
- int index = (maxb + i) + 1;
- ma[index] = maf[i];
- vm_page_flag_clear(ma[index], PG_CLEANCHK);
- }
- runlen = maxb + maxf + 1;
+ splx(s);
+ vm_pageout_flush(ma, runlen, pagerflags);
+ for (i = 0; i < runlen; i++) {
+ if (ma[i]->valid & ma[i]->dirty) {
+ vm_page_protect(ma[i], VM_PROT_READ);
+ vm_page_flag_set(ma[i], PG_CLEANCHK);
- splx(s);
- vm_pageout_flush(ma, runlen, pagerflags);
- for (i = 0; i < runlen; i++) {
- if (ma[i]->valid & ma[i]->dirty) {
- vm_page_protect(ma[i], VM_PROT_READ);
- vm_page_flag_set(ma[i], PG_CLEANCHK);
- }
+ /*
+ * maxf will end up being the actual number of pages
+ * we wrote out contiguously, non-inclusive of the
+ * first page. We do not count look-behind pages.
+ */
+ if (i >= maxb + 1 && (maxf > i - maxb - 1))
+ maxf = i - maxb - 1;
}
- if (object->generation != curgeneration)
- goto rescan;
}
-
-#if 0
- VOP_FSYNC(vp, NULL, (pagerflags & VM_PAGER_PUT_SYNC)?MNT_WAIT:0, curproc);
-#endif
-
- vm_object_clear_flag(object, OBJ_CLEANING);
- return;
+ return(maxf + 1);
}
/*
OpenPOWER on IntegriCloud