summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/sys/vmmeter.h86
-rw-r--r--sys/vm/vm_glue.c15
-rw-r--r--sys/vm/vm_meter.c2
-rw-r--r--sys/vm/vm_page.c96
-rw-r--r--sys/vm/vm_page.h4
-rw-r--r--sys/vm/vm_pageout.c187
6 files changed, 274 insertions, 116 deletions
diff --git a/sys/sys/vmmeter.h b/sys/sys/vmmeter.h
index e382d90..2ae45a3 100644
--- a/sys/sys/vmmeter.h
+++ b/sys/sys/vmmeter.h
@@ -91,9 +91,95 @@ struct vmmeter {
u_int v_cache_max; /* max number of pages in cached obj */
u_int v_pageout_free_min; /* min number pages reserved for kernel */
u_int v_interrupt_free_min; /* reserved number of pages for int code */
+ u_int v_free_severe; /* severe depletion of pages below this pt */
};
#ifdef KERNEL
+
extern struct vmmeter cnt;
+
+/*
+ * Return TRUE if we are under our reserved low-free-pages threshold
+ */
+
+static __inline
+int
+vm_page_count_reserved(void)
+{
+ return (cnt.v_free_reserved > (cnt.v_free_count + cnt.v_cache_count));
+}
+
+/*
+ * Return TRUE if we are under our severe low-free-pages threshold
+ *
+ * This routine is typically used at the user<->system interface to determine
+ * whether we need to block in order to avoid a low memory deadlock.
+ */
+
+static __inline
+int
+vm_page_count_severe(void)
+{
+ return (cnt.v_free_severe > (cnt.v_free_count + cnt.v_cache_count));
+}
+
+/*
+ * Return TRUE if we are under our minimum low-free-pages threshold.
+ *
+ * This routine is typically used within the system to determine whether
+ * we can execute potentially very expensive code in terms of memory. It
+ * is also used by the pageout daemon to calculate when to sleep, when
+ * to wake waiters up, and when (after making a pass) to become more
+ * desparate.
+ */
+
+static __inline
+int
+vm_page_count_min(void)
+{
+ return (cnt.v_free_min > (cnt.v_free_count + cnt.v_cache_count));
+}
+
+/*
+ * Return TRUE if we have not reached our free page target during
+ * free page recovery operations.
+ */
+
+static __inline
+int
+vm_page_count_target(void)
+{
+ return (cnt.v_free_target > (cnt.v_free_count + cnt.v_cache_count));
+}
+
+/*
+ * Return the number of pages we need to free-up or cache
+ * A positive number indicates that we do not have enough free pages.
+ */
+
+static __inline
+int
+vm_paging_target(void)
+{
+ return (
+ (cnt.v_free_target + cnt.v_cache_min) -
+ (cnt.v_free_count + cnt.v_cache_count)
+ );
+}
+
+/*
+ * Return a positive number if the pagedaemon needs to be woken up.
+ */
+
+static __inline
+int
+vm_paging_needed(void)
+{
+ return (
+ (cnt.v_free_reserved + cnt.v_cache_min) >
+ (cnt.v_free_count + cnt.v_cache_count)
+ );
+}
+
#endif
/* systemwide totals computed every five seconds */
diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c
index e53079a..1d7157c 100644
--- a/sys/vm/vm_glue.c
+++ b/sys/vm/vm_glue.c
@@ -209,19 +209,9 @@ vm_fork(p1, p2, flags)
p1->p_vmspace->vm_refcnt++;
}
- /*
- * Great, so we have a memory-heavy process and the
- * entire machine comes to a screaching halt because
- * nobody can fork/exec anything. What we really need
- * to do is fix the process swapper so it swaps out the right
- * processes.
- */
-#if 0
- while ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min) {
- vm_pageout_deficit += (UPAGES + VM_INITIAL_PAGEIN);
+ while (vm_page_count_severe()) {
VM_WAIT;
}
-#endif
if ((flags & RFMEM) == 0) {
p2->p_vmspace = vmspace_fork(p1->p_vmspace);
@@ -339,8 +329,9 @@ scheduler(dummy)
int ppri;
loop:
- while ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min) {
+ if (vm_page_count_min()) {
VM_WAIT;
+ goto loop;
}
pp = NULL;
diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c
index 4e7f0fb..6c69562 100644
--- a/sys/vm/vm_meter.c
+++ b/sys/vm/vm_meter.c
@@ -119,6 +119,8 @@ SYSCTL_INT(_vm, VM_V_CACHE_MAX, v_cache_max,
CTLFLAG_RW, &cnt.v_cache_max, 0, "");
SYSCTL_INT(_vm, VM_V_PAGEOUT_FREE_MIN, v_pageout_free_min,
CTLFLAG_RW, &cnt.v_pageout_free_min, 0, "");
+SYSCTL_INT(_vm, OID_AUTO, v_free_severe,
+ CTLFLAG_RW, &cnt.v_free_severe, 0, "");
SYSCTL_STRUCT(_vm, VM_LOADAVG, loadavg, CTLFLAG_RD,
&averunnable, loadavg, "Machine loadaverage history");
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index f6db00e..533ba37 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -615,8 +615,7 @@ vm_page_unqueue(m)
(*pq->cnt)--;
pq->lcnt--;
if ((queue - m->pc) == PQ_CACHE) {
- if ((cnt.v_cache_count + cnt.v_free_count) <
- (cnt.v_free_reserved + cnt.v_cache_min))
+ if (vm_paging_needed())
pagedaemon_wakeup();
}
}
@@ -871,9 +870,7 @@ loop:
* Don't wakeup too often - wakeup the pageout daemon when
* we would be nearly out of memory.
*/
- if (((cnt.v_free_count + cnt.v_cache_count) <
- (cnt.v_free_reserved + cnt.v_cache_min)) ||
- (cnt.v_free_count < cnt.v_pageout_free_min))
+ if (vm_paging_needed() || cnt.v_free_count < cnt.v_pageout_free_min)
pagedaemon_wakeup();
splx(s);
@@ -991,6 +988,8 @@ vm_page_asleep(vm_page_t m, char *msg, char *busy) {
* vm_page_activate:
*
* Put the specified page on the active list (if appropriate).
+ * Ensure that act_count is at least ACT_INIT but do not otherwise
+ * mess with it.
*
* The page queues must be locked.
* This routine may not block.
@@ -1050,8 +1049,7 @@ vm_page_free_wakeup()
* high water mark. And wakeup scheduler process if we have
* lots of memory. this process will swapin processes.
*/
- if (vm_pages_needed &&
- ((cnt.v_free_count + cnt.v_cache_count) >= cnt.v_free_min)) {
+ if (vm_pages_needed && vm_page_count_min()) {
wakeup(&cnt.v_free_count);
vm_pages_needed = 0;
}
@@ -1261,11 +1259,14 @@ vm_page_unwire(m, activate)
* Move the specified page to the inactive queue. If the page has
* any associated swap, the swap is deallocated.
*
+ * Normally athead is 0 resulting in LRU operation. athead is set
+ * to 1 if we want this page to be 'as if it were placed in the cache',
+ * except without unmapping it from the process address space.
+ *
* This routine may not block.
*/
-void
-vm_page_deactivate(m)
- register vm_page_t m;
+static __inline void
+_vm_page_deactivate(vm_page_t m, int athead)
{
int s;
@@ -1280,7 +1281,10 @@ vm_page_deactivate(m)
if ((m->queue - m->pc) == PQ_CACHE)
cnt.v_reactivated++;
vm_page_unqueue(m);
- TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq);
+ if (athead)
+ TAILQ_INSERT_HEAD(&vm_page_queue_inactive, m, pageq);
+ else
+ TAILQ_INSERT_TAIL(&vm_page_queue_inactive, m, pageq);
m->queue = PQ_INACTIVE;
vm_page_queues[PQ_INACTIVE].lcnt++;
cnt.v_inactive_count++;
@@ -1288,6 +1292,12 @@ vm_page_deactivate(m)
splx(s);
}
+void
+vm_page_deactivate(vm_page_t m)
+{
+ _vm_page_deactivate(m, 0);
+}
+
/*
* vm_page_cache
*
@@ -1333,6 +1343,70 @@ vm_page_cache(m)
}
/*
+ * vm_page_dontneed
+ *
+ * Cache, deactivate, or do nothing as appropriate. This routine
+ * is typically used by madvise() MADV_DONTNEED.
+ *
+ * Generally speaking we want to move the page into the cache so
+ * it gets reused quickly. However, this can result in a silly syndrome
+ * due to the page recycling too quickly. Small objects will not be
+ * fully cached. On the otherhand, if we move the page to the inactive
+ * queue we wind up with a problem whereby very large objects
+ * unnecessarily blow away our inactive and cache queues.
+ *
+ * The solution is to move the pages based on a fixed weighting. We
+ * either leave them alone, deactivate them, or move them to the cache,
+ * where moving them to the cache has the highest weighting.
+ * By forcing some pages into other queues we eventually force the
+ * system to balance the queues, potentially recovering other unrelated
+ * space from active. The idea is to not force this to happen too
+ * often.
+ */
+
+void
+vm_page_dontneed(m)
+ vm_page_t m;
+{
+ static int dnweight;
+ int dnw;
+ int head;
+
+ dnw = ++dnweight;
+
+ /*
+ * occassionally leave the page alone
+ */
+
+ if ((dnw & 0x01F0) == 0 ||
+ m->queue == PQ_INACTIVE ||
+ m->queue - m->pc == PQ_CACHE
+ ) {
+ if (m->act_count >= ACT_INIT)
+ --m->act_count;
+ return;
+ }
+
+ if (m->dirty == 0)
+ vm_page_test_dirty(m);
+
+ if (m->dirty || (dnw & 0x0070) == 0) {
+ /*
+ * Deactivate the page 3 times out of 32.
+ */
+ head = 0;
+ } else {
+ /*
+ * Cache the page 28 times out of every 32. Note that
+ * the page is deactivated instead of cached, but placed
+ * at the head of the queue instead of the tail.
+ */
+ head = 1;
+ }
+ _vm_page_deactivate(m, head);
+}
+
+/*
* Grab a page, waiting until we are waken up due to the page
* changing state. We keep on waiting, if the page continues
* to be in the object. If the page doesn't exist, allocate it.
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
index 6ffb867..2d7e740 100644
--- a/sys/vm/vm_page.h
+++ b/sys/vm/vm_page.h
@@ -136,7 +136,8 @@ struct vm_page {
};
/*
- * note SWAPBLK_NONE is a flag, basically the high bit.
+ * note: currently use SWAPBLK_NONE as an absolute value rather then
+ * a flag bit.
*/
#define SWAPBLK_MASK ((daddr_t)((u_daddr_t)-1 >> 1)) /* mask */
@@ -391,6 +392,7 @@ void vm_page_activate __P((vm_page_t));
vm_page_t vm_page_alloc __P((vm_object_t, vm_pindex_t, int));
vm_page_t vm_page_grab __P((vm_object_t, vm_pindex_t, int));
void vm_page_cache __P((register vm_page_t));
+void vm_page_dontneed __P((register vm_page_t));
static __inline void vm_page_copy __P((vm_page_t, vm_page_t));
static __inline void vm_page_free __P((vm_page_t));
static __inline void vm_page_free_zero __P((vm_page_t));
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index bc8784c..d24e51c 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -219,7 +219,7 @@ vm_pageout_clean(m)
register vm_object_t object;
vm_page_t mc[2*vm_pageout_page_count];
int pageout_count;
- int i, forward_okay, backward_okay, page_base;
+ int ib, is, page_base;
vm_pindex_t pindex = m->pindex;
object = m->object;
@@ -243,11 +243,9 @@ vm_pageout_clean(m)
mc[vm_pageout_page_count] = m;
pageout_count = 1;
page_base = vm_pageout_page_count;
- forward_okay = TRUE;
- if (pindex != 0)
- backward_okay = TRUE;
- else
- backward_okay = FALSE;
+ ib = 1;
+ is = 1;
+
/*
* Scan object for clusterable pages.
*
@@ -258,82 +256,84 @@ vm_pageout_clean(m)
* active page.
* -or-
* 2) we force the issue.
+ *
+ * During heavy mmap/modification loads the pageout
+ * daemon can really fragment the underlying file
+ * due to flushing pages out of order and not trying
+ * align the clusters (which leave sporatic out-of-order
+ * holes). To solve this problem we do the reverse scan
+ * first and attempt to align our cluster, then do a
+ * forward scan if room remains.
*/
- for (i = 1; (i < vm_pageout_page_count) && (forward_okay || backward_okay); i++) {
+
+more:
+ while (ib && pageout_count < vm_pageout_page_count) {
vm_page_t p;
- /*
- * See if forward page is clusterable.
- */
- if (forward_okay) {
- /*
- * Stop forward scan at end of object.
- */
- if ((pindex + i) > object->size) {
- forward_okay = FALSE;
- goto do_backward;
- }
- p = vm_page_lookup(object, pindex + i);
- if (p) {
- if (((p->queue - p->pc) == PQ_CACHE) ||
- (p->flags & PG_BUSY) || p->busy) {
- forward_okay = FALSE;
- goto do_backward;
- }
- vm_page_test_dirty(p);
- if ((p->dirty & p->valid) != 0 &&
- (p->queue == PQ_INACTIVE) &&
- (p->wire_count == 0) &&
- (p->hold_count == 0)) {
- mc[vm_pageout_page_count + i] = p;
- pageout_count++;
- if (pageout_count == vm_pageout_page_count)
- break;
- } else {
- forward_okay = FALSE;
- }
- } else {
- forward_okay = FALSE;
- }
+ if (ib > pindex) {
+ ib = 0;
+ break;
+ }
+
+ if ((p = vm_page_lookup(object, pindex - ib)) == NULL) {
+ ib = 0;
+ break;
+ }
+ if (((p->queue - p->pc) == PQ_CACHE) ||
+ (p->flags & PG_BUSY) || p->busy) {
+ ib = 0;
+ break;
+ }
+ vm_page_test_dirty(p);
+ if ((p->dirty & p->valid) == 0 ||
+ p->queue != PQ_INACTIVE ||
+ p->wire_count != 0 ||
+ p->hold_count != 0) {
+ ib = 0;
+ break;
}
-do_backward:
+ mc[--page_base] = p;
+ ++pageout_count;
+ ++ib;
/*
- * See if backward page is clusterable.
+ * alignment boundry, stop here and switch directions. Do
+ * not clear ib.
*/
- if (backward_okay) {
- /*
- * Stop backward scan at beginning of object.
- */
- if ((pindex - i) == 0) {
- backward_okay = FALSE;
- }
- p = vm_page_lookup(object, pindex - i);
- if (p) {
- if (((p->queue - p->pc) == PQ_CACHE) ||
- (p->flags & PG_BUSY) || p->busy) {
- backward_okay = FALSE;
- continue;
- }
- vm_page_test_dirty(p);
- if ((p->dirty & p->valid) != 0 &&
- (p->queue == PQ_INACTIVE) &&
- (p->wire_count == 0) &&
- (p->hold_count == 0)) {
- mc[vm_pageout_page_count - i] = p;
- pageout_count++;
- page_base--;
- if (pageout_count == vm_pageout_page_count)
- break;
- } else {
- backward_okay = FALSE;
- }
- } else {
- backward_okay = FALSE;
- }
+ if ((pindex - (ib - 1)) % vm_pageout_page_count == 0)
+ break;
+ }
+
+ while (pageout_count < vm_pageout_page_count &&
+ pindex + is < object->size) {
+ vm_page_t p;
+
+ if ((p = vm_page_lookup(object, pindex + is)) == NULL)
+ break;
+ if (((p->queue - p->pc) == PQ_CACHE) ||
+ (p->flags & PG_BUSY) || p->busy) {
+ break;
}
+ vm_page_test_dirty(p);
+ if ((p->dirty & p->valid) == 0 ||
+ p->queue != PQ_INACTIVE ||
+ p->wire_count != 0 ||
+ p->hold_count != 0) {
+ break;
+ }
+ mc[page_base + pageout_count] = p;
+ ++pageout_count;
+ ++is;
}
/*
+ * If we exhausted our forward scan, continue with the reverse scan
+ * when possible, even past a page boundry. This catches boundry
+ * conditions.
+ */
+ if (ib && pageout_count < vm_pageout_page_count)
+ goto more;
+
+ /*
* we allow reads during pageouts...
*/
return vm_pageout_flush(&mc[page_base], pageout_count, 0);
@@ -397,7 +397,7 @@ vm_pageout_flush(mc, count, flags)
* worked.
*/
pmap_clear_modify(VM_PAGE_TO_PHYS(mt));
- mt->dirty = 0;
+ vm_page_undirty(mt);
break;
case VM_PAGER_ERROR:
case VM_PAGER_FAIL:
@@ -646,9 +646,7 @@ vm_pageout_scan()
* to the cache.
*/
- page_shortage = (cnt.v_free_target + cnt.v_cache_min) -
- (cnt.v_free_count + cnt.v_cache_count);
- page_shortage += addl_page_shortage_init;
+ page_shortage = vm_paging_target() + addl_page_shortage_init;
/*
* Figure out what to do with dirty pages when they are encountered.
@@ -787,7 +785,7 @@ rescan0:
} else {
swap_pageouts_ok = !(defer_swap_pageouts || disable_swap_pageouts);
swap_pageouts_ok |= (!disable_swap_pageouts && defer_swap_pageouts &&
- (cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min);
+ vm_page_count_min());
}
@@ -1082,15 +1080,11 @@ rescan0:
* in a writeable object, wakeup the sync daemon. And kick swapout
* if we did not get enough free pages.
*/
- if ((cnt.v_cache_count + cnt.v_free_count) <
- (cnt.v_free_target + cnt.v_cache_min) ) {
- if (vnodes_skipped &&
- (cnt.v_cache_count + cnt.v_free_count) < cnt.v_free_min) {
+ if (vm_paging_target() > 0) {
+ if (vnodes_skipped && vm_page_count_min())
(void) speedup_syncer();
- }
#if !defined(NO_SWAPPING)
- if (vm_swap_enabled &&
- (cnt.v_free_count + cnt.v_cache_count < cnt.v_free_target)) {
+ if (vm_swap_enabled && vm_page_count_target()) {
vm_req_vmdaemon();
vm_pageout_req_swapout |= VM_SWAP_NORMAL;
}
@@ -1101,8 +1095,7 @@ rescan0:
* make sure that we have swap space -- if we are low on memory and
* swap -- then kill the biggest process.
*/
- if ((vm_swap_size == 0 || swap_pager_full) &&
- ((cnt.v_free_count + cnt.v_cache_count) < cnt.v_free_min)) {
+ if ((vm_swap_size == 0 || swap_pager_full) && vm_page_count_min()) {
bigproc = NULL;
bigsize = 0;
for (p = allproc.lh_first; p != 0; p = p->p_list.le_next) {
@@ -1160,8 +1153,10 @@ vm_pageout_page_stats()
static int fullintervalcount = 0;
int page_shortage;
- page_shortage = (cnt.v_inactive_target + cnt.v_cache_max + cnt.v_free_min) -
+ page_shortage =
+ (cnt.v_inactive_target + cnt.v_cache_max + cnt.v_free_min) -
(cnt.v_free_count + cnt.v_inactive_count + cnt.v_cache_count);
+
if (page_shortage <= 0)
return;
@@ -1253,7 +1248,9 @@ vm_size_t count;
cnt.v_interrupt_free_min;
cnt.v_free_reserved = vm_pageout_page_count +
cnt.v_pageout_free_min + (count / 768) + PQ_L2_SIZE;
+ cnt.v_free_severe = cnt.v_free_min / 2;
cnt.v_free_min += cnt.v_free_reserved;
+ cnt.v_free_severe += cnt.v_free_reserved;
return 1;
}
@@ -1326,8 +1323,17 @@ vm_pageout()
while (TRUE) {
int error;
int s = splvm();
- if (!vm_pages_needed ||
- ((cnt.v_free_count + cnt.v_cache_count) > cnt.v_free_min)) {
+
+ if (vm_pages_needed && vm_page_count_min()) {
+ /*
+ * Still not done, sleep a bit and go again
+ */
+ vm_pages_needed = 0;
+ tsleep(&vm_pages_needed, PVM, "psleep", hz/2);
+ } else {
+ /*
+ * Good enough, sleep & handle stats
+ */
vm_pages_needed = 0;
error = tsleep(&vm_pages_needed,
PVM, "psleep", vm_pageout_stats_interval * hz);
@@ -1336,9 +1342,6 @@ vm_pageout()
vm_pageout_page_stats();
continue;
}
- } else if (vm_pages_needed) {
- vm_pages_needed = 0;
- tsleep(&vm_pages_needed, PVM, "psleep", hz/2);
}
if (vm_pages_needed)
OpenPOWER on IntegriCloud