diff options
author | dillon <dillon@FreeBSD.org> | 2002-03-06 02:42:56 +0000 |
---|---|---|
committer | dillon <dillon@FreeBSD.org> | 2002-03-06 02:42:56 +0000 |
commit | 904a90fbd58e8551b6d5df48853875e9e4e2a97b (patch) | |
tree | f4f014a662c183f4e4b98d8e257d0e40dd3636e7 /sys/vm/vm_object.c | |
parent | 95a814930f2b5f06711fe8a580f334e004f1ce65 (diff) | |
download | FreeBSD-src-904a90fbd58e8551b6d5df48853875e9e4e2a97b.zip FreeBSD-src-904a90fbd58e8551b6d5df48853875e9e4e2a97b.tar.gz |
Add a sequential iteration optimization to vm_object_page_clean(). This
moderately improves msync's and VM object flushing for objects containing
randomly dirtied pages (fsync(), msync(), filesystem update daemon),
and improves cpu use for small-ranged sequential msync()s in the face of
very large mmap()ings from O(N) to O(1) as might be performed by a database.
A sysctl, vm.msync_flush_flag, has been added and defaults to 3 (the two
committed optimizations are turned on by default). 0 will turn off both
optimizations.
This code has already been tested under stable and is one in a series of
memq / vp->v_dirtyblkhd / fsync optimizations to remove O(N^2) restart
conditions that will be coming down the pipe.
MFC after: 3 days
Diffstat (limited to 'sys/vm/vm_object.c')
-rw-r--r-- | sys/vm/vm_object.c | 269 |
1 files changed, 194 insertions, 75 deletions
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 128e767..f3936d9 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -73,6 +73,8 @@ #include <sys/lock.h> #include <sys/mman.h> #include <sys/mount.h> +#include <sys/kernel.h> +#include <sys/sysctl.h> #include <sys/mutex.h> #include <sys/proc.h> /* for curproc, pageproc */ #include <sys/socket.h> @@ -93,7 +95,20 @@ #include <vm/vm_kern.h> #include <vm/vm_extern.h> -static void vm_object_qcollapse __P((vm_object_t object)); +#define EASY_SCAN_FACTOR 8 + +#define MSYNC_FLUSH_HARDSEQ 0x01 +#define MSYNC_FLUSH_SOFTSEQ 0x02 + +/* + * msync / VM object flushing optimizations + */ +static int msync_flush_flags = MSYNC_FLUSH_HARDSEQ | MSYNC_FLUSH_SOFTSEQ; +SYSCTL_INT(_vm, OID_AUTO, msync_flush_flags, + CTLFLAG_RW, &msync_flush_flags, 0, ""); + +static void vm_object_qcollapse(vm_object_t object); +static int vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration, int pagerflags); /* * Virtual memory objects maintain the actual data @@ -578,21 +593,12 @@ vm_object_terminate(vm_object_t object) void vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int flags) { - vm_page_t p, np, tp; + vm_page_t p, np; vm_offset_t tstart, tend; vm_pindex_t pi; - int s; struct vnode *vp; - int runlen; - int maxf; - int chkb; - int maxb; - int i; int clearobjflags; int pagerflags; - vm_page_t maf[vm_pageout_page_count]; - vm_page_t mab[vm_pageout_page_count]; - vm_page_t ma[vm_pageout_page_count]; int curgeneration; GIANT_REQUIRED; @@ -616,6 +622,72 @@ vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int } /* + * If the caller is smart and only msync()s a range he knows is + * dirty, we may be able to avoid an object scan. This results in + * a phenominal improvement in performance. We cannot do this + * as a matter of course because the object may be huge - e.g. + * the size might be in the gigabytes or terrabytes. + */ + if (msync_flush_flags & MSYNC_FLUSH_HARDSEQ) { + vm_offset_t tscan; + int scanlimit; + int scanreset; + + scanreset = object->resident_page_count / EASY_SCAN_FACTOR; + if (scanreset < 16) + scanreset = 16; + + scanlimit = scanreset; + tscan = tstart; + while (tscan < tend) { + curgeneration = object->generation; + p = vm_page_lookup(object, tscan); + if (p == NULL || p->valid == 0 || + (p->queue - p->pc) == PQ_CACHE) { + if (--scanlimit == 0) + break; + ++tscan; + continue; + } + vm_page_test_dirty(p); + if ((p->dirty & p->valid) == 0) { + if (--scanlimit == 0) + break; + ++tscan; + continue; + } + /* + * If we have been asked to skip nosync pages and + * this is a nosync page, we can't continue. + */ + if ((flags & OBJPC_NOSYNC) && (p->flags & PG_NOSYNC)) { + if (--scanlimit == 0) + break; + ++tscan; + continue; + } + scanlimit = scanreset; + + /* + * This returns 0 if it was unable to busy the first + * page (i.e. had to sleep). + */ + tscan += vm_object_page_collect_flush(object, p, curgeneration, pagerflags); + } + + /* + * If everything was dirty and we flushed it successfully, + * and the requested range is not the entire object, we + * don't have to mess with CLEANCHK or MIGHTBEDIRTY and can + * return immediately. + */ + if (tscan >= tend && (tstart || tend < object->size)) { + vm_object_clear_flag(object, OBJ_CLEANING); + return; + } + } + + /* * Generally set CLEANCHK interlock and make the page read-only so * we can then clear the object flags. * @@ -652,8 +724,11 @@ rescan: curgeneration = object->generation; for (p = TAILQ_FIRST(&object->memq); p; p = np) { + int n; + np = TAILQ_NEXT(p, listq); +again: pi = p->pindex; if (((p->flags & PG_CLEANCHK) == 0) || (pi < tstart) || (pi >= tend) || @@ -679,17 +754,87 @@ rescan: continue; } - s = splvm(); - while (vm_page_sleep_busy(p, TRUE, "vpcwai")) { - if (object->generation != curgeneration) { - splx(s); - goto rescan; + n = vm_object_page_collect_flush(object, p, + curgeneration, pagerflags); + if (n == 0) + goto rescan; + + if (object->generation != curgeneration) + goto rescan; + + /* + * Try to optimize the next page. If we can't we pick up + * our (random) scan where we left off. + */ + if (msync_flush_flags & MSYNC_FLUSH_SOFTSEQ) { + if ((p = vm_page_lookup(object, pi + n)) != NULL) + goto again; + } + } + +#if 0 + VOP_FSYNC(vp, NULL, (pagerflags & VM_PAGER_PUT_SYNC)?MNT_WAIT:0, curproc); +#endif + + vm_object_clear_flag(object, OBJ_CLEANING); + return; +} + +static int +vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration, int pagerflags) +{ + int runlen; + int s; + int maxf; + int chkb; + int maxb; + int i; + vm_pindex_t pi; + vm_page_t maf[vm_pageout_page_count]; + vm_page_t mab[vm_pageout_page_count]; + vm_page_t ma[vm_pageout_page_count]; + + s = splvm(); + pi = p->pindex; + while (vm_page_sleep_busy(p, TRUE, "vpcwai")) { + if (object->generation != curgeneration) { + splx(s); + return(0); + } + } + + maxf = 0; + for(i = 1; i < vm_pageout_page_count; i++) { + vm_page_t tp; + + if ((tp = vm_page_lookup(object, pi + i)) != NULL) { + if ((tp->flags & PG_BUSY) || + (tp->flags & PG_CLEANCHK) == 0 || + (tp->busy != 0)) + break; + if((tp->queue - tp->pc) == PQ_CACHE) { + vm_page_flag_clear(tp, PG_CLEANCHK); + break; } + vm_page_test_dirty(tp); + if ((tp->dirty & tp->valid) == 0) { + vm_page_flag_clear(tp, PG_CLEANCHK); + break; + } + maf[ i - 1 ] = tp; + maxf++; + continue; } + break; + } + + maxb = 0; + chkb = vm_pageout_page_count - maxf; + if (chkb) { + for(i = 1; i < chkb;i++) { + vm_page_t tp; - maxf = 0; - for (i = 1; i < vm_pageout_page_count; i++) { - if ((tp = vm_page_lookup(object, pi + i)) != NULL) { + if ((tp = vm_page_lookup(object, pi - i)) != NULL) { if ((tp->flags & PG_BUSY) || (tp->flags & PG_CLEANCHK) == 0 || (tp->busy != 0)) @@ -703,71 +848,45 @@ rescan: vm_page_flag_clear(tp, PG_CLEANCHK); break; } - maf[ i - 1 ] = tp; - maxf++; + mab[ i - 1 ] = tp; + maxb++; continue; } break; } + } - maxb = 0; - chkb = vm_pageout_page_count - maxf; - if (chkb) { - for (i = 1; i < chkb; i++) { - if ((tp = vm_page_lookup(object, pi - i)) != NULL) { - if ((tp->flags & PG_BUSY) || - (tp->flags & PG_CLEANCHK) == 0 || - (tp->busy != 0)) - break; - if((tp->queue - tp->pc) == PQ_CACHE) { - vm_page_flag_clear(tp, PG_CLEANCHK); - break; - } - vm_page_test_dirty(tp); - if ((tp->dirty & tp->valid) == 0) { - vm_page_flag_clear(tp, PG_CLEANCHK); - break; - } - mab[ i - 1 ] = tp; - maxb++; - continue; - } - break; - } - } + for(i = 0; i < maxb; i++) { + int index = (maxb - i) - 1; + ma[index] = mab[i]; + vm_page_flag_clear(ma[index], PG_CLEANCHK); + } + vm_page_flag_clear(p, PG_CLEANCHK); + ma[maxb] = p; + for(i = 0; i < maxf; i++) { + int index = (maxb + i) + 1; + ma[index] = maf[i]; + vm_page_flag_clear(ma[index], PG_CLEANCHK); + } + runlen = maxb + maxf + 1; - for (i = 0; i < maxb; i++) { - int index = (maxb - i) - 1; - ma[index] = mab[i]; - vm_page_flag_clear(ma[index], PG_CLEANCHK); - } - vm_page_flag_clear(p, PG_CLEANCHK); - ma[maxb] = p; - for (i = 0 ; i < maxf; i++) { - int index = (maxb + i) + 1; - ma[index] = maf[i]; - vm_page_flag_clear(ma[index], PG_CLEANCHK); - } - runlen = maxb + maxf + 1; + splx(s); + vm_pageout_flush(ma, runlen, pagerflags); + for (i = 0; i < runlen; i++) { + if (ma[i]->valid & ma[i]->dirty) { + vm_page_protect(ma[i], VM_PROT_READ); + vm_page_flag_set(ma[i], PG_CLEANCHK); - splx(s); - vm_pageout_flush(ma, runlen, pagerflags); - for (i = 0; i < runlen; i++) { - if (ma[i]->valid & ma[i]->dirty) { - vm_page_protect(ma[i], VM_PROT_READ); - vm_page_flag_set(ma[i], PG_CLEANCHK); - } + /* + * maxf will end up being the actual number of pages + * we wrote out contiguously, non-inclusive of the + * first page. We do not count look-behind pages. + */ + if (i >= maxb + 1 && (maxf > i - maxb - 1)) + maxf = i - maxb - 1; } - if (object->generation != curgeneration) - goto rescan; } - -#if 0 - VOP_FSYNC(vp, NULL, (pagerflags & VM_PAGER_PUT_SYNC)?MNT_WAIT:0, curproc); -#endif - - vm_object_clear_flag(object, OBJ_CLEANING); - return; + return(maxf + 1); } /* |