summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/amd64/include/vmparam.h5
-rw-r--r--sys/arm/include/vmparam.h5
-rw-r--r--sys/i386/include/vmparam.h5
-rw-r--r--sys/ia64/include/vmparam.h5
-rw-r--r--sys/kern/kern_exec.c2
-rw-r--r--sys/kern/vfs_bio.c14
-rw-r--r--sys/powerpc/include/vmparam.h5
-rw-r--r--sys/sparc64/include/vmparam.h5
-rw-r--r--sys/sun4v/include/vmparam.h5
-rw-r--r--sys/sys/vmmeter.h4
-rw-r--r--sys/vm/vm_contig.c11
-rw-r--r--sys/vm/vm_fault.c29
-rw-r--r--sys/vm/vm_map.c18
-rw-r--r--sys/vm/vm_object.c44
-rw-r--r--sys/vm/vm_object.h1
-rw-r--r--sys/vm/vm_page.c357
-rw-r--r--sys/vm/vm_page.h26
-rw-r--r--sys/vm/vm_pageout.c37
-rw-r--r--sys/vm/vm_pageq.c27
-rw-r--r--sys/vm/vm_phys.c148
-rw-r--r--sys/vm/vm_phys.h3
21 files changed, 477 insertions, 279 deletions
diff --git a/sys/amd64/include/vmparam.h b/sys/amd64/include/vmparam.h
index 9bafade..50962e5 100644
--- a/sys/amd64/include/vmparam.h
+++ b/sys/amd64/include/vmparam.h
@@ -101,12 +101,13 @@
#define VM_PHYSSEG_MAX 31
/*
- * Create two free page pools: VM_FREEPOOL_DEFAULT is the default pool
+ * Create three free page pools: VM_FREEPOOL_DEFAULT is the default pool
* from which physical pages are allocated and VM_FREEPOOL_DIRECT is
* the pool from which physical pages for page tables and small UMA
* objects are allocated.
*/
-#define VM_NFREEPOOL 2
+#define VM_NFREEPOOL 3
+#define VM_FREEPOOL_CACHE 2
#define VM_FREEPOOL_DEFAULT 0
#define VM_FREEPOOL_DIRECT 1
diff --git a/sys/arm/include/vmparam.h b/sys/arm/include/vmparam.h
index 5185a48..35b8d41 100644
--- a/sys/arm/include/vmparam.h
+++ b/sys/arm/include/vmparam.h
@@ -59,12 +59,13 @@
#define VM_PHYSSEG_DENSE
/*
- * Create two free page pools: VM_FREEPOOL_DEFAULT is the default pool
+ * Create three free page pools: VM_FREEPOOL_DEFAULT is the default pool
* from which physical pages are allocated and VM_FREEPOOL_DIRECT is
* the pool from which physical pages for small UMA objects are
* allocated.
*/
-#define VM_NFREEPOOL 2
+#define VM_NFREEPOOL 3
+#define VM_FREEPOOL_CACHE 2
#define VM_FREEPOOL_DEFAULT 0
#define VM_FREEPOOL_DIRECT 1
diff --git a/sys/i386/include/vmparam.h b/sys/i386/include/vmparam.h
index 8f97e1b..2aa2848 100644
--- a/sys/i386/include/vmparam.h
+++ b/sys/i386/include/vmparam.h
@@ -93,12 +93,13 @@
#define VM_PHYSSEG_MAX 17
/*
- * Create one free page pool. Since the i386 kernel virtual address
+ * Create two free page pools. Since the i386 kernel virtual address
* space does not include a mapping onto the machine's entire physical
* memory, VM_FREEPOOL_DIRECT is defined as an alias for the default
* pool, VM_FREEPOOL_DEFAULT.
*/
-#define VM_NFREEPOOL 1
+#define VM_NFREEPOOL 2
+#define VM_FREEPOOL_CACHE 1
#define VM_FREEPOOL_DEFAULT 0
#define VM_FREEPOOL_DIRECT 0
diff --git a/sys/ia64/include/vmparam.h b/sys/ia64/include/vmparam.h
index de047bf..c7dac2c 100644
--- a/sys/ia64/include/vmparam.h
+++ b/sys/ia64/include/vmparam.h
@@ -122,12 +122,13 @@
#define VM_PHYSSEG_MAX 49
/*
- * Create two free page pools: VM_FREEPOOL_DEFAULT is the default pool
+ * Create three free page pools: VM_FREEPOOL_DEFAULT is the default pool
* from which physical pages are allocated and VM_FREEPOOL_DIRECT is
* the pool from which physical pages for small UMA objects are
* allocated.
*/
-#define VM_NFREEPOOL 2
+#define VM_NFREEPOOL 3
+#define VM_FREEPOOL_CACHE 2
#define VM_FREEPOOL_DEFAULT 0
#define VM_FREEPOOL_DIRECT 1
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index b0c107c..b3884d0 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -832,7 +832,7 @@ exec_map_first_page(imgp)
vm_page_busy(ma[i]);
} else {
ma[i] = vm_page_alloc(object, i,
- VM_ALLOC_NORMAL);
+ VM_ALLOC_NORMAL | VM_ALLOC_IFNOTCACHED);
if (ma[i] == NULL)
break;
}
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index d66b45d..bb457db 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -2898,7 +2898,8 @@ allocbuf(struct buf *bp, int size)
VM_WAIT;
VM_OBJECT_LOCK(obj);
} else {
- bp->b_flags &= ~B_CACHE;
+ if (m->valid == 0)
+ bp->b_flags &= ~B_CACHE;
bp->b_pages[bp->b_npages] = m;
++bp->b_npages;
}
@@ -2916,20 +2917,13 @@ allocbuf(struct buf *bp, int size)
* vm_fault->getpages->cluster_read->allocbuf
*
*/
- vm_page_lock_queues();
if (vm_page_sleep_if_busy(m, FALSE, "pgtblk"))
continue;
/*
- * We have a good page. Should we wakeup the
- * page daemon?
+ * We have a good page.
*/
- if ((curproc != pageproc) &&
- (VM_PAGE_INQUEUE1(m, PQ_CACHE)) &&
- ((cnt.v_free_count + cnt.v_cache_count) <
- (cnt.v_free_min + cnt.v_cache_min))) {
- pagedaemon_wakeup();
- }
+ vm_page_lock_queues();
vm_page_wire(m);
vm_page_unlock_queues();
bp->b_pages[bp->b_npages] = m;
diff --git a/sys/powerpc/include/vmparam.h b/sys/powerpc/include/vmparam.h
index 60e240c..0f8298e 100644
--- a/sys/powerpc/include/vmparam.h
+++ b/sys/powerpc/include/vmparam.h
@@ -110,12 +110,13 @@ struct pmap_physseg {
#define VM_PHYSSEG_DENSE
/*
- * Create two free page pools: VM_FREEPOOL_DEFAULT is the default pool
+ * Create three free page pools: VM_FREEPOOL_DEFAULT is the default pool
* from which physical pages are allocated and VM_FREEPOOL_DIRECT is
* the pool from which physical pages for small UMA objects are
* allocated.
*/
-#define VM_NFREEPOOL 2
+#define VM_NFREEPOOL 3
+#define VM_FREEPOOL_CACHE 2
#define VM_FREEPOOL_DEFAULT 0
#define VM_FREEPOOL_DIRECT 1
diff --git a/sys/sparc64/include/vmparam.h b/sys/sparc64/include/vmparam.h
index 5d83f60..5609e8e 100644
--- a/sys/sparc64/include/vmparam.h
+++ b/sys/sparc64/include/vmparam.h
@@ -91,12 +91,13 @@
#define VM_PHYSSEG_MAX 64
/*
- * Create two free page pools: VM_FREEPOOL_DEFAULT is the default pool
+ * Create three free page pools: VM_FREEPOOL_DEFAULT is the default pool
* from which physical pages are allocated and VM_FREEPOOL_DIRECT is
* the pool from which physical pages for small UMA objects are
* allocated.
*/
-#define VM_NFREEPOOL 2
+#define VM_NFREEPOOL 3
+#define VM_FREEPOOL_CACHE 2
#define VM_FREEPOOL_DEFAULT 0
#define VM_FREEPOOL_DIRECT 1
diff --git a/sys/sun4v/include/vmparam.h b/sys/sun4v/include/vmparam.h
index 42af1df..a994401 100644
--- a/sys/sun4v/include/vmparam.h
+++ b/sys/sun4v/include/vmparam.h
@@ -91,12 +91,13 @@
#define VM_PHYSSEG_MAX 64
/*
- * Create two free page pools: VM_FREEPOOL_DEFAULT is the default pool
+ * Create three free page pools: VM_FREEPOOL_DEFAULT is the default pool
* from which physical pages are allocated and VM_FREEPOOL_DIRECT is
* the pool from which physical pages for small UMA objects are
* allocated.
*/
-#define VM_NFREEPOOL 2
+#define VM_NFREEPOOL 3
+#define VM_FREEPOOL_CACHE 2
#define VM_FREEPOOL_DEFAULT 0
#define VM_FREEPOOL_DIRECT 1
diff --git a/sys/sys/vmmeter.h b/sys/sys/vmmeter.h
index bdd1964..2b4794a 100644
--- a/sys/sys/vmmeter.h
+++ b/sys/sys/vmmeter.h
@@ -68,7 +68,7 @@ struct vmmeter {
u_int v_vnodepgsin; /* (p) vnode_pager pages paged in */
u_int v_vnodepgsout; /* (p) vnode pager pages paged out */
u_int v_intrans; /* (p) intransit blocking page faults */
- u_int v_reactivated; /* (q) pages reactivated from free list */
+ u_int v_reactivated; /* (f) pages reactivated from free list */
u_int v_pdwakeups; /* (f) times daemon has awaken from sleep */
u_int v_pdpages; /* (q) pages analyzed by daemon */
@@ -89,7 +89,7 @@ struct vmmeter {
u_int v_active_count; /* (q) pages active */
u_int v_inactive_target; /* (c) pages desired inactive */
u_int v_inactive_count; /* (q) pages inactive */
- u_int v_cache_count; /* (q) pages on buffer cache queue */
+ u_int v_cache_count; /* (f) pages on buffer cache queue */
u_int v_cache_min; /* (c) min pages desired on cache queue */
u_int v_cache_max; /* (c) max pages in cached obj */
u_int v_pageout_free_min; /* (c) min pages reserved for kernel */
diff --git a/sys/vm/vm_contig.c b/sys/vm/vm_contig.c
index 955df30..b40a951 100644
--- a/sys/vm/vm_contig.c
+++ b/sys/vm/vm_contig.c
@@ -231,8 +231,7 @@ contigmalloc(
unsigned long boundary)
{
void * ret;
- vm_object_t object;
- vm_page_t m, m_next, pages;
+ vm_page_t pages;
unsigned long npgs;
int actl, actmax, inactl, inactmax, tries;
@@ -258,14 +257,6 @@ again:
actl++;
goto again;
}
- TAILQ_FOREACH_SAFE(m, &vm_page_queues[PQ_CACHE].pl,
- pageq, m_next) {
- if (m->hold_count == 0 &&
- VM_OBJECT_TRYLOCK(object = m->object)) {
- vm_page_free(m);
- VM_OBJECT_UNLOCK(object);
- }
- }
vm_page_unlock_queues();
tries++;
goto retry;
diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
index 8b843dc..b26972e 100644
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
@@ -328,8 +328,6 @@ RetryFault:;
*/
fs.m = vm_page_lookup(fs.object, fs.pindex);
if (fs.m != NULL) {
- int queue;
-
/*
* check for page-based copy on write.
* We check fs.object == fs.first_object so
@@ -398,20 +396,7 @@ RetryFault:;
vm_object_deallocate(fs.first_object);
goto RetryFault;
}
- queue = fs.m->queue;
-
- vm_pageq_remove_nowakeup(fs.m);
-
- if (VM_PAGE_RESOLVEQUEUE(fs.m, queue) == PQ_CACHE) {
- cnt.v_reactivated++;
- if (vm_page_count_severe()) {
- vm_page_activate(fs.m);
- vm_page_unlock_queues();
- unlock_and_deallocate(&fs);
- VM_WAITPFAULT;
- goto RetryFault;
- }
- }
+ vm_pageq_remove(fs.m);
vm_page_unlock_queues();
/*
@@ -446,6 +431,8 @@ RetryFault:;
if (!vm_page_count_severe()) {
fs.m = vm_page_alloc(fs.object, fs.pindex,
(fs.vp || fs.object->backing_object)? VM_ALLOC_NORMAL: VM_ALLOC_ZERO);
+ if ((fs.m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL)
+ break;
}
if (fs.m == NULL) {
unlock_and_deallocate(&fs);
@@ -993,9 +980,7 @@ vm_fault_prefault(pmap_t pmap, vm_offset_t addra, vm_map_entry_t entry)
(m->flags & PG_FICTITIOUS) == 0) {
vm_page_lock_queues();
- if (!VM_PAGE_INQUEUE1(m, PQ_CACHE))
- pmap_enter_quick(pmap, addr, m,
- entry->protection);
+ pmap_enter_quick(pmap, addr, m, entry->protection);
vm_page_unlock_queues();
}
VM_OBJECT_UNLOCK(lobject);
@@ -1273,7 +1258,8 @@ vm_fault_additional_pages(m, rbehind, rahead, marray, reqpage)
for (i = 0, tpindex = pindex - 1; tpindex >= startpindex &&
tpindex < pindex; i++, tpindex--) {
- rtm = vm_page_alloc(object, tpindex, VM_ALLOC_NORMAL);
+ rtm = vm_page_alloc(object, tpindex, VM_ALLOC_NORMAL |
+ VM_ALLOC_IFNOTCACHED);
if (rtm == NULL) {
/*
* Shift the allocated pages to the
@@ -1311,7 +1297,8 @@ vm_fault_additional_pages(m, rbehind, rahead, marray, reqpage)
for (; tpindex < endpindex; i++, tpindex++) {
- rtm = vm_page_alloc(object, tpindex, VM_ALLOC_NORMAL);
+ rtm = vm_page_alloc(object, tpindex, VM_ALLOC_NORMAL |
+ VM_ALLOC_IFNOTCACHED);
if (rtm == NULL) {
break;
}
diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index 905201e..cc6628b 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -1518,28 +1518,24 @@ vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_prot_t prot,
start = addr + ptoa(tmpidx);
p_start = p;
}
+ } else if (p_start != NULL) {
if (!are_queues_locked) {
are_queues_locked = TRUE;
vm_page_lock_queues();
}
- if (VM_PAGE_INQUEUE1(p, PQ_CACHE)) {
- if ((flags & MAP_PREFAULT_MADVISE) != 0)
- vm_page_deactivate(p);
- else if (p_start != NULL) {
- pmap_enter_object(map->pmap, start, addr +
- ptoa(tmpidx), p_start, prot);
- p_start = NULL;
- }
- }
- } else if (p_start != NULL) {
pmap_enter_object(map->pmap, start, addr +
ptoa(tmpidx), p_start, prot);
p_start = NULL;
}
}
- if (p_start != NULL)
+ if (p_start != NULL) {
+ if (!are_queues_locked) {
+ are_queues_locked = TRUE;
+ vm_page_lock_queues();
+ }
pmap_enter_object(map->pmap, start, addr + ptoa(psize),
p_start, prot);
+ }
if (are_queues_locked)
vm_page_unlock_queues();
unlock_return:
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
index 4741638..0d2d61c 100644
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@@ -170,6 +170,9 @@ vm_object_zdtor(void *mem, int size, void *arg)
KASSERT(TAILQ_EMPTY(&object->memq),
("object %p has resident pages",
object));
+ KASSERT(object->cache == NULL,
+ ("object %p has cached pages",
+ object));
KASSERT(object->paging_in_progress == 0,
("object %p paging_in_progress = %d",
object, object->paging_in_progress));
@@ -217,6 +220,7 @@ _vm_object_allocate(objtype_t type, vm_pindex_t size, vm_object_t object)
object->handle = NULL;
object->backing_object = NULL;
object->backing_object_offset = (vm_ooffset_t) 0;
+ object->cache = NULL;
mtx_lock(&vm_object_list_mtx);
TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
@@ -648,6 +652,9 @@ vm_object_terminate(vm_object_t object)
}
vm_page_unlock_queues();
+ if (__predict_false(object->cache != NULL))
+ vm_page_cache_free(object);
+
/*
* Let the pager know object is dead.
*/
@@ -732,8 +739,7 @@ vm_object_page_clean(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int
while (tscan < tend) {
curgeneration = object->generation;
p = vm_page_lookup(object, tscan);
- if (p == NULL || p->valid == 0 ||
- VM_PAGE_INQUEUE1(p, PQ_CACHE)) {
+ if (p == NULL || p->valid == 0) {
if (--scanlimit == 0)
break;
++tscan;
@@ -821,8 +827,7 @@ again:
pi = p->pindex;
if ((p->oflags & VPO_CLEANCHK) == 0 ||
(pi < tstart) || (pi >= tend) ||
- (p->valid == 0) ||
- VM_PAGE_INQUEUE1(p, PQ_CACHE)) {
+ p->valid == 0) {
p->oflags &= ~VPO_CLEANCHK;
continue;
}
@@ -900,10 +905,6 @@ vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration,
(tp->oflags & VPO_CLEANCHK) == 0) ||
(tp->busy != 0))
break;
- if (VM_PAGE_INQUEUE1(tp, PQ_CACHE)) {
- tp->oflags &= ~VPO_CLEANCHK;
- break;
- }
vm_page_test_dirty(tp);
if ((tp->dirty & tp->valid) == 0) {
tp->oflags &= ~VPO_CLEANCHK;
@@ -928,10 +929,6 @@ vm_object_page_collect_flush(vm_object_t object, vm_page_t p, int curgeneration,
(tp->oflags & VPO_CLEANCHK) == 0) ||
(tp->busy != 0))
break;
- if (VM_PAGE_INQUEUE1(tp, PQ_CACHE)) {
- tp->oflags &= ~VPO_CLEANCHK;
- break;
- }
vm_page_test_dirty(tp);
if ((tp->dirty & tp->valid) == 0) {
tp->oflags &= ~VPO_CLEANCHK;
@@ -1104,6 +1101,12 @@ shadowlookup:
}
}
m = vm_page_lookup(tobject, tpindex);
+ if (m == NULL && advise == MADV_WILLNEED) {
+ /*
+ * If the page is cached, reactivate it.
+ */
+ m = vm_page_alloc(tobject, tpindex, VM_ALLOC_IFCACHED);
+ }
if (m == NULL) {
/*
* There may be swap even if there is no backing page
@@ -1356,6 +1359,13 @@ retry:
* and new_object's locks are released and reacquired.
*/
swap_pager_copy(orig_object, new_object, offidxstart, 0);
+
+ /*
+ * Transfer any cached pages from orig_object to new_object.
+ */
+ if (__predict_false(orig_object->cache != NULL))
+ vm_page_cache_transfer(orig_object, offidxstart,
+ new_object);
}
VM_OBJECT_UNLOCK(orig_object);
TAILQ_FOREACH(m, &new_object->memq, listq)
@@ -1390,8 +1400,8 @@ vm_object_backing_scan(vm_object_t object, int op)
*/
if (op & OBSC_TEST_ALL_SHADOWED) {
/*
- * We do not want to have to test for the existence of
- * swap pages in the backing object. XXX but with the
+ * We do not want to have to test for the existence of cache
+ * or swap pages in the backing object. XXX but with the
* new swapper this would be pretty easy to do.
*
* XXX what about anonymous MAP_SHARED memory that hasn't
@@ -1664,6 +1674,12 @@ vm_object_collapse(vm_object_t object)
backing_object,
object,
OFF_TO_IDX(object->backing_object_offset), TRUE);
+
+ /*
+ * Free any cached pages from backing_object.
+ */
+ if (__predict_false(backing_object->cache != NULL))
+ vm_page_cache_free(backing_object);
}
/*
* Object now shadows whatever backing_object did.
diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h
index 5a3f3c7..b0da10b 100644
--- a/sys/vm/vm_object.h
+++ b/sys/vm/vm_object.h
@@ -100,6 +100,7 @@ struct vm_object {
struct vm_object *backing_object; /* object that I'm a shadow of */
vm_ooffset_t backing_object_offset;/* Offset in backing object */
TAILQ_ENTRY(vm_object) pager_object_list; /* list of all objects of this pager type */
+ vm_page_t cache; /* root of the cache page splay tree */
void *handle;
union {
/*
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 6504f60..36fee28 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -547,7 +547,7 @@ vm_page_sleep(vm_page_t m, const char *msg)
void
vm_page_dirty(vm_page_t m)
{
- KASSERT(VM_PAGE_GETKNOWNQUEUE1(m) != PQ_CACHE,
+ KASSERT((m->flags & PG_CACHED) == 0,
("vm_page_dirty: page in cache!"));
KASSERT(!VM_PAGE_IS_FREE(m),
("vm_page_dirty: page is free!"));
@@ -790,48 +790,163 @@ vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex)
vm_page_remove(m);
vm_page_insert(m, new_object, new_pindex);
- if (VM_PAGE_INQUEUE1(m, PQ_CACHE))
- vm_page_deactivate(m);
vm_page_dirty(m);
}
/*
- * vm_page_select_cache:
+ * Convert all of the cached pages belonging to the given object
+ * into free pages. If the given object has cached pages and is
+ * backed by a vnode, reduce the vnode's hold count.
+ */
+void
+vm_page_cache_free(vm_object_t object)
+{
+ vm_page_t m, root;
+ boolean_t empty;
+
+ mtx_lock(&vm_page_queue_free_mtx);
+ empty = object->cache == NULL;
+ while ((m = object->cache) != NULL) {
+ if (m->left == NULL)
+ root = m->right;
+ else if (m->right == NULL)
+ root = m->left;
+ else {
+ root = vm_page_splay(m->pindex, m->left);
+ root->right = m->right;
+ }
+ m->object->cache = root;
+ m->object = NULL;
+ m->valid = 0;
+ /* Clear PG_CACHED and set PG_FREE. */
+ m->flags ^= PG_CACHED | PG_FREE;
+ KASSERT((m->flags & (PG_CACHED | PG_FREE)) == PG_FREE,
+ ("vm_page_cache_free: page %p has inconsistent flags", m));
+ cnt.v_cache_count--;
+ cnt.v_free_count++;
+ }
+ mtx_unlock(&vm_page_queue_free_mtx);
+ if (object->type == OBJT_VNODE && !empty)
+ vdrop(object->handle);
+}
+
+/*
+ * Returns the cached page that is associated with the given
+ * object and offset. If, however, none exists, returns NULL.
*
- * Move a page of the given color from the cache queue to the free
- * queue. As pages might be found, but are not applicable, they are
- * deactivated.
+ * The free page queue must be locked.
+ */
+static inline vm_page_t
+vm_page_cache_lookup(vm_object_t object, vm_pindex_t pindex)
+{
+ vm_page_t m;
+
+ mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+ if ((m = object->cache) != NULL && m->pindex != pindex) {
+ m = vm_page_splay(pindex, m);
+ if ((object->cache = m)->pindex != pindex)
+ m = NULL;
+ }
+ return (m);
+}
+
+/*
+ * Remove the given cached page from its containing object's
+ * collection of cached pages.
*
- * This routine may not block.
+ * The free page queue must be locked.
*/
-vm_page_t
-vm_page_select_cache(void)
+void
+vm_page_cache_remove(vm_page_t m)
{
vm_object_t object;
- vm_page_t m;
- boolean_t was_trylocked;
+ vm_page_t root;
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
- while ((m = TAILQ_FIRST(&vm_page_queues[PQ_CACHE].pl)) != NULL) {
- KASSERT(m->dirty == 0, ("Found dirty cache page %p", m));
- KASSERT(!pmap_page_is_mapped(m),
- ("Found mapped cache page %p", m));
- KASSERT((m->flags & PG_UNMANAGED) == 0,
- ("Found unmanaged cache page %p", m));
- KASSERT(m->wire_count == 0, ("Found wired cache page %p", m));
- if (m->hold_count == 0 && (object = m->object,
- (was_trylocked = VM_OBJECT_TRYLOCK(object)) ||
- VM_OBJECT_LOCKED(object))) {
- KASSERT((m->oflags & VPO_BUSY) == 0 && m->busy == 0,
- ("Found busy cache page %p", m));
- vm_page_free(m);
- if (was_trylocked)
- VM_OBJECT_UNLOCK(object);
- break;
+ mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+ KASSERT((m->flags & PG_CACHED) != 0,
+ ("vm_page_cache_remove: page %p is not cached", m));
+ object = m->object;
+ if (m != object->cache) {
+ root = vm_page_splay(m->pindex, object->cache);
+ KASSERT(root == m,
+ ("vm_page_cache_remove: page %p is not cached in object %p",
+ m, object));
+ }
+ if (m->left == NULL)
+ root = m->right;
+ else if (m->right == NULL)
+ root = m->left;
+ else {
+ root = vm_page_splay(m->pindex, m->left);
+ root->right = m->right;
+ }
+ object->cache = root;
+ m->object = NULL;
+ cnt.v_cache_count--;
+}
+
+/*
+ * Transfer all of the cached pages with offset greater than or
+ * equal to 'offidxstart' from the original object's cache to the
+ * new object's cache. Initially, the new object's cache must be
+ * empty. Offset 'offidxstart' in the original object must
+ * correspond to offset zero in the new object.
+ *
+ * The new object must be locked.
+ */
+void
+vm_page_cache_transfer(vm_object_t orig_object, vm_pindex_t offidxstart,
+ vm_object_t new_object)
+{
+ vm_page_t m, m_next;
+
+ /*
+ * Insertion into an object's collection of cached pages
+ * requires the object to be locked. In contrast, removal does
+ * not.
+ */
+ VM_OBJECT_LOCK_ASSERT(new_object, MA_OWNED);
+ KASSERT(new_object->cache == NULL,
+ ("vm_page_cache_transfer: object %p has cached pages",
+ new_object));
+ mtx_lock(&vm_page_queue_free_mtx);
+ if ((m = orig_object->cache) != NULL) {
+ /*
+ * Transfer all of the pages with offset greater than or
+ * equal to 'offidxstart' from the original object's
+ * cache to the new object's cache.
+ */
+ m = vm_page_splay(offidxstart, m);
+ if (m->pindex < offidxstart) {
+ orig_object->cache = m;
+ new_object->cache = m->right;
+ m->right = NULL;
+ } else {
+ orig_object->cache = m->left;
+ new_object->cache = m;
+ m->left = NULL;
+ }
+ KASSERT(new_object->cache == NULL ||
+ new_object->type == OBJT_SWAP,
+ ("vm_page_cache_transfer: object %p's type is incompatible"
+ " with cached pages", new_object));
+
+ /*
+ * Update the object and offset of each page that was
+ * transferred to the new object's cache.
+ */
+ while ((m = new_object->cache) != NULL) {
+ m_next = vm_page_splay(m->pindex, m->right);
+ m->object = new_object;
+ m->pindex -= offidxstart;
+ if (m_next == NULL)
+ break;
+ m->right = NULL;
+ m_next->left = m;
+ new_object->cache = m_next;
}
- vm_page_deactivate(m);
}
- return (m);
+ mtx_unlock(&vm_page_queue_free_mtx);
}
/*
@@ -847,15 +962,13 @@ vm_page_select_cache(void)
* VM_ALLOC_ZERO zero page
*
* This routine may not block.
- *
- * Additional special handling is required when called from an
- * interrupt (VM_ALLOC_INTERRUPT). We are not allowed to mess with
- * the page cache in this case.
*/
vm_page_t
vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req)
{
- vm_page_t m = NULL;
+ struct vnode *vp = NULL;
+ vm_object_t m_object;
+ vm_page_t m;
int flags, page_req;
page_req = req & VM_ALLOC_CLASS_MASK;
@@ -876,52 +989,32 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req)
page_req = VM_ALLOC_SYSTEM;
};
-loop:
mtx_lock(&vm_page_queue_free_mtx);
- if (cnt.v_free_count > cnt.v_free_reserved ||
+ if (cnt.v_free_count + cnt.v_cache_count > cnt.v_free_reserved ||
(page_req == VM_ALLOC_SYSTEM &&
- cnt.v_cache_count == 0 &&
- cnt.v_free_count > cnt.v_interrupt_free_min) ||
- (page_req == VM_ALLOC_INTERRUPT && cnt.v_free_count > 0)) {
+ cnt.v_free_count + cnt.v_cache_count > cnt.v_interrupt_free_min) ||
+ (page_req == VM_ALLOC_INTERRUPT &&
+ cnt.v_free_count + cnt.v_cache_count > 0)) {
/*
* Allocate from the free queue if the number of free pages
* exceeds the minimum for the request class.
*/
- m = vm_phys_alloc_pages(object != NULL ?
- VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0);
- } else if (page_req != VM_ALLOC_INTERRUPT) {
- mtx_unlock(&vm_page_queue_free_mtx);
- /*
- * Allocatable from cache (non-interrupt only). On success,
- * we must free the page and try again, thus ensuring that
- * cnt.v_*_free_min counters are replenished.
- */
- vm_page_lock_queues();
- if ((m = vm_page_select_cache()) == NULL) {
- KASSERT(cnt.v_cache_count == 0,
- ("vm_page_alloc: cache queue is missing %d pages",
- cnt.v_cache_count));
- vm_page_unlock_queues();
- atomic_add_int(&vm_pageout_deficit, 1);
- pagedaemon_wakeup();
-
- if (page_req != VM_ALLOC_SYSTEM)
- return (NULL);
-
- mtx_lock(&vm_page_queue_free_mtx);
- if (cnt.v_free_count <= cnt.v_interrupt_free_min) {
+ if (object != NULL &&
+ (m = vm_page_cache_lookup(object, pindex)) != NULL) {
+ if ((req & VM_ALLOC_IFNOTCACHED) != 0) {
mtx_unlock(&vm_page_queue_free_mtx);
return (NULL);
}
+ vm_phys_unfree_page(m);
+ } else if ((req & VM_ALLOC_IFCACHED) != 0) {
+ mtx_unlock(&vm_page_queue_free_mtx);
+ return (NULL);
+ } else
m = vm_phys_alloc_pages(object != NULL ?
VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0);
- } else {
- vm_page_unlock_queues();
- goto loop;
- }
} else {
/*
- * Not allocatable from cache from interrupt, give up.
+ * Not allocatable, give up.
*/
mtx_unlock(&vm_page_queue_free_mtx);
atomic_add_int(&vm_pageout_deficit, 1);
@@ -937,8 +1030,24 @@ loop:
m != NULL,
("vm_page_alloc(): missing page on free queue")
);
- KASSERT(VM_PAGE_IS_FREE(m),
- ("vm_page_alloc: page %p is not free", m));
+ if ((m->flags & PG_CACHED) != 0) {
+ KASSERT(m->valid != 0,
+ ("vm_page_alloc: cached page %p is invalid", m));
+ if (m->object == object && m->pindex == pindex)
+ cnt.v_reactivated++;
+ else
+ m->valid = 0;
+ m_object = m->object;
+ vm_page_cache_remove(m);
+ if (m_object->type == OBJT_VNODE && m_object->cache == NULL)
+ vp = m_object->handle;
+ } else {
+ KASSERT(VM_PAGE_IS_FREE(m),
+ ("vm_page_alloc: page %p is not free", m));
+ KASSERT(m->valid == 0,
+ ("vm_page_alloc: free page %p is valid", m));
+ cnt.v_free_count--;
+ }
/*
* Initialize structure. Only the PG_ZERO flag is inherited.
@@ -964,7 +1073,6 @@ loop:
m->hold_count = 0;
m->act_count = 0;
m->busy = 0;
- m->valid = 0;
KASSERT(m->dirty == 0, ("vm_page_alloc: free/cache page %p was dirty", m));
mtx_unlock(&vm_page_queue_free_mtx);
@@ -974,6 +1082,15 @@ loop:
m->pindex = pindex;
/*
+ * The following call to vdrop() must come after the above call
+ * to vm_page_insert() in case both affect the same object and
+ * vnode. Otherwise, the affected vnode's hold count could
+ * temporarily become zero.
+ */
+ if (vp != NULL)
+ vdrop(vp);
+
+ /*
* Don't wakeup too often - wakeup the pageout daemon when
* we would be nearly out of memory.
*/
@@ -1047,8 +1164,6 @@ vm_page_activate(vm_page_t m)
mtx_assert(&vm_page_queue_mtx, MA_OWNED);
if (VM_PAGE_GETKNOWNQUEUE2(m) != PQ_ACTIVE) {
- if (VM_PAGE_INQUEUE1(m, PQ_CACHE))
- cnt.v_reactivated++;
vm_pageq_remove(m);
if (m->wire_count == 0 && (m->flags & PG_UNMANAGED) == 0) {
if (m->act_count < ACT_INIT)
@@ -1133,7 +1248,7 @@ vm_page_free_toq(vm_page_t m)
* callback routine until after we've put the page on the
* appropriate free queue.
*/
- vm_pageq_remove_nowakeup(m);
+ vm_pageq_remove(m);
vm_page_remove(m);
/*
@@ -1160,6 +1275,7 @@ vm_page_free_toq(vm_page_t m)
} else {
m->flags |= PG_FREE;
mtx_lock(&vm_page_queue_free_mtx);
+ cnt.v_free_count++;
if ((m->flags & PG_ZERO) != 0) {
vm_phys_free_pages(m, 0);
++vm_page_zero_count;
@@ -1279,8 +1395,6 @@ _vm_page_deactivate(vm_page_t m, int athead)
if (VM_PAGE_INQUEUE2(m, PQ_INACTIVE))
return;
if (m->wire_count == 0 && (m->flags & PG_UNMANAGED) == 0) {
- if (VM_PAGE_INQUEUE1(m, PQ_CACHE))
- cnt.v_reactivated++;
vm_page_flag_clear(m, PG_WINATCFLS);
vm_pageq_remove(m);
if (athead)
@@ -1354,15 +1468,26 @@ vm_page_try_to_free(vm_page_t m)
void
vm_page_cache(vm_page_t m)
{
+ vm_object_t object;
+ vm_page_t root;
mtx_assert(&vm_page_queue_mtx, MA_OWNED);
- VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
+ object = m->object;
+ VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
if ((m->flags & PG_UNMANAGED) || (m->oflags & VPO_BUSY) || m->busy ||
m->hold_count || m->wire_count) {
panic("vm_page_cache: attempting to cache busy page");
}
- if (VM_PAGE_INQUEUE1(m, PQ_CACHE))
+ if (m->valid == 0 || object->type == OBJT_DEFAULT) {
+ /*
+ * Hypothesis: A cache-elgible page belonging to a
+ * default object must be zero filled.
+ */
+ vm_page_free(m);
return;
+ }
+ KASSERT((m->flags & PG_CACHED) == 0,
+ ("vm_page_cache: page %p is already cached", m));
cnt.v_tcached++;
/*
@@ -1374,11 +1499,72 @@ vm_page_cache(vm_page_t m)
panic("vm_page_cache: caching a dirty page, pindex: %ld",
(long)m->pindex);
}
- vm_pageq_remove_nowakeup(m);
- vm_pageq_enqueue(PQ_CACHE, m);
+
+ /*
+ * Remove the page from the paging queues.
+ */
+ vm_pageq_remove(m);
+
+ /*
+ * Remove the page from the object's collection of resident
+ * pages.
+ */
+ if (m != object->root)
+ vm_page_splay(m->pindex, object->root);
+ if (m->left == NULL)
+ root = m->right;
+ else {
+ root = vm_page_splay(m->pindex, m->left);
+ root->right = m->right;
+ }
+ object->root = root;
+ TAILQ_REMOVE(&object->memq, m, listq);
+ object->resident_page_count--;
+ object->generation++;
+
+ /*
+ * Insert the page into the object's collection of cached pages
+ * and the physical memory allocator's cache/free page queues.
+ */
+ vm_page_flag_set(m, PG_CACHED);
+ vm_page_flag_clear(m, PG_ZERO);
mtx_lock(&vm_page_queue_free_mtx);
+ vm_phys_set_pool(VM_FREEPOOL_CACHE, m, 0);
+ cnt.v_cache_count++;
+ root = object->cache;
+ if (root == NULL) {
+ m->left = NULL;
+ m->right = NULL;
+ } else {
+ root = vm_page_splay(m->pindex, root);
+ if (m->pindex < root->pindex) {
+ m->left = root->left;
+ m->right = root;
+ root->left = NULL;
+ } else if (__predict_false(m->pindex == root->pindex))
+ panic("vm_page_cache: offset already cached");
+ else {
+ m->right = root->right;
+ m->left = root;
+ root->right = NULL;
+ }
+ }
+ object->cache = m;
+ vm_phys_free_pages(m, 0);
vm_page_free_wakeup();
mtx_unlock(&vm_page_queue_free_mtx);
+
+ /*
+ * Increment the vnode's hold count if this is the object's only
+ * cached page. Decrement the vnode's hold count if this was
+ * the object's only resident page.
+ */
+ if (object->type == OBJT_VNODE) {
+ if (root == NULL && object->resident_page_count != 0)
+ vhold(object->handle);
+ else if (root != NULL && object->resident_page_count == 0)
+ vdrop(object->handle);
+ }
}
/*
@@ -1416,9 +1602,7 @@ vm_page_dontneed(vm_page_t m)
* occassionally leave the page alone
*/
if ((dnw & 0x01F0) == 0 ||
- VM_PAGE_INQUEUE2(m, PQ_INACTIVE) ||
- VM_PAGE_INQUEUE1(m, PQ_CACHE)
- ) {
+ VM_PAGE_INQUEUE2(m, PQ_INACTIVE)) {
if (m->act_count >= ACT_INIT)
--m->act_count;
return;
@@ -1482,7 +1666,8 @@ retrylookup:
if ((allocflags & VM_ALLOC_RETRY) == 0)
return (NULL);
goto retrylookup;
- }
+ } else if (m->valid != 0)
+ return (m);
if (allocflags & VM_ALLOC_ZERO && (m->flags & PG_ZERO) == 0)
pmap_zero_page(m);
return (m);
@@ -1813,7 +1998,7 @@ DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info)
db_printf("\n");
db_printf("PQ_CACHE:");
- db_printf(" %d", *vm_page_queues[PQ_CACHE].cnt);
+ db_printf(" %d", cnt.v_cache_count);
db_printf("\n");
db_printf("PQ_ACTIVE: %d, PQ_INACTIVE: %d\n",
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
index b06a19e..3ed2f75 100644
--- a/sys/vm/vm_page.h
+++ b/sys/vm/vm_page.h
@@ -160,27 +160,20 @@ CTASSERT(sizeof(u_long) >= 8);
#define PQ_NONE 0
#define PQ_INACTIVE 1
#define PQ_ACTIVE 2
-#define PQ_CACHE 3
-#define PQ_HOLD 4
-#define PQ_COUNT 5
-#define PQ_MAXCOUNT 5
+#define PQ_HOLD 3
+#define PQ_COUNT 4
+#define PQ_MAXCOUNT 4
/* Returns the real queue a page is on. */
#define VM_PAGE_GETQUEUE(m) ((m)->queue)
/* Returns the well known queue a page is on. */
-#define VM_PAGE_GETKNOWNQUEUE1(m) VM_PAGE_GETQUEUE(m)
#define VM_PAGE_GETKNOWNQUEUE2(m) VM_PAGE_GETQUEUE(m)
-/* Given the real queue number and a page color return the well know queue. */
-#define VM_PAGE_RESOLVEQUEUE(m, q) (q)
-
/* Returns true if the page is in the named well known queue. */
-#define VM_PAGE_INQUEUE1(m, q) (VM_PAGE_GETKNOWNQUEUE1(m) == (q))
#define VM_PAGE_INQUEUE2(m, q) (VM_PAGE_GETKNOWNQUEUE2(m) == (q))
/* Sets the queue a page is on. */
-#define VM_PAGE_SETQUEUE1(m, q) (VM_PAGE_GETQUEUE(m) = (q))
#define VM_PAGE_SETQUEUE2(m, q) (VM_PAGE_GETQUEUE(m) = (q))
struct vpgqueues {
@@ -201,6 +194,7 @@ extern struct mtx vm_page_queue_free_mtx;
* pte mappings, nor can they be removed from their objects via
* the object, and such pages are also not on any PQ queue.
*/
+#define PG_CACHED 0x0001 /* page is cached */
#define PG_FREE 0x0002 /* page is free */
#define PG_WINATCFLS 0x0004 /* flush dirty page on inactive q */
#define PG_FICTITIOUS 0x0008 /* physical page doesn't exist (O) */
@@ -230,9 +224,8 @@ extern struct mtx vm_page_queue_free_mtx;
* Available for allocation now.
*
* cache
- * Almost available for allocation. Still in an
- * object, but clean and immediately freeable at
- * non-interrupt times.
+ * Almost available for allocation. Still associated with
+ * an object, but clean and immediately freeable.
*
* hold
* Will become free after a pending I/O operation
@@ -302,6 +295,8 @@ extern struct mtx vm_page_queue_mtx;
#define VM_ALLOC_RETRY 0x0080 /* vm_page_grab() only */
#define VM_ALLOC_NOOBJ 0x0100 /* No associated object */
#define VM_ALLOC_NOBUSY 0x0200 /* Do not busy the page */
+#define VM_ALLOC_IFCACHED 0x0400 /* Fail if the page is not cached */
+#define VM_ALLOC_IFNOTCACHED 0x0800 /* Fail if the page is cached */
void vm_page_flag_set(vm_page_t m, unsigned short bits);
void vm_page_flag_clear(vm_page_t m, unsigned short bits);
@@ -318,7 +313,6 @@ void vm_page_wakeup(vm_page_t m);
void vm_pageq_init(void);
void vm_pageq_enqueue(int queue, vm_page_t m);
-void vm_pageq_remove_nowakeup(vm_page_t m);
void vm_pageq_remove(vm_page_t m);
void vm_pageq_requeue(vm_page_t m);
@@ -326,6 +320,9 @@ void vm_page_activate (vm_page_t);
vm_page_t vm_page_alloc (vm_object_t, vm_pindex_t, int);
vm_page_t vm_page_grab (vm_object_t, vm_pindex_t, int);
void vm_page_cache (register vm_page_t);
+void vm_page_cache_free(vm_object_t);
+void vm_page_cache_remove(vm_page_t);
+void vm_page_cache_transfer(vm_object_t, vm_pindex_t, vm_object_t);
int vm_page_try_to_cache (vm_page_t);
int vm_page_try_to_free (vm_page_t);
void vm_page_dontneed (register vm_page_t);
@@ -334,7 +331,6 @@ void vm_page_insert (vm_page_t, vm_object_t, vm_pindex_t);
vm_page_t vm_page_lookup (vm_object_t, vm_pindex_t);
void vm_page_remove (vm_page_t);
void vm_page_rename (vm_page_t, vm_object_t, vm_pindex_t);
-vm_page_t vm_page_select_cache(void);
void vm_page_sleep(vm_page_t m, const char *msg);
vm_page_t vm_page_splay(vm_pindex_t, vm_page_t);
vm_offset_t vm_page_startup(vm_offset_t vaddr);
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index 9478809..2c26727 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -342,8 +342,7 @@ more:
ib = 0;
break;
}
- if (VM_PAGE_INQUEUE1(p, PQ_CACHE) ||
- (p->oflags & VPO_BUSY) || p->busy) {
+ if ((p->oflags & VPO_BUSY) || p->busy) {
ib = 0;
break;
}
@@ -372,8 +371,7 @@ more:
if ((p = vm_page_lookup(object, pindex + is)) == NULL)
break;
- if (VM_PAGE_INQUEUE1(p, PQ_CACHE) ||
- (p->oflags & VPO_BUSY) || p->busy) {
+ if ((p->oflags & VPO_BUSY) || p->busy) {
break;
}
vm_page_test_dirty(p);
@@ -1139,37 +1137,6 @@ unlock_and_continue:
VM_OBJECT_UNLOCK(object);
m = next;
}
-
- /*
- * We try to maintain some *really* free pages, this allows interrupt
- * code to be guaranteed space. Since both cache and free queues
- * are considered basically 'free', moving pages from cache to free
- * does not effect other calculations.
- */
- while (cnt.v_free_count < cnt.v_free_reserved) {
- TAILQ_FOREACH(m, &vm_page_queues[PQ_CACHE].pl, pageq) {
- KASSERT(m->dirty == 0,
- ("Found dirty cache page %p", m));
- KASSERT(!pmap_page_is_mapped(m),
- ("Found mapped cache page %p", m));
- KASSERT((m->flags & PG_UNMANAGED) == 0,
- ("Found unmanaged cache page %p", m));
- KASSERT(m->wire_count == 0,
- ("Found wired cache page %p", m));
- if (m->hold_count == 0 && VM_OBJECT_TRYLOCK(object =
- m->object)) {
- KASSERT((m->oflags & VPO_BUSY) == 0 &&
- m->busy == 0, ("Found busy cache page %p",
- m));
- vm_page_free(m);
- VM_OBJECT_UNLOCK(object);
- cnt.v_dfree++;
- break;
- }
- }
- if (m == NULL)
- break;
- }
vm_page_unlock_queues();
#if !defined(NO_SWAPPING)
/*
diff --git a/sys/vm/vm_pageq.c b/sys/vm/vm_pageq.c
index 5c10f62..055bac5 100644
--- a/sys/vm/vm_pageq.c
+++ b/sys/vm/vm_pageq.c
@@ -56,7 +56,6 @@ vm_pageq_init(void)
{
int i;
- vm_page_queues[PQ_CACHE].cnt = &cnt.v_cache_count;
vm_page_queues[PQ_INACTIVE].cnt = &cnt.v_inactive_count;
vm_page_queues[PQ_ACTIVE].cnt = &cnt.v_active_count;
vm_page_queues[PQ_HOLD].cnt = &cnt.v_active_count;
@@ -94,28 +93,6 @@ vm_pageq_enqueue(int queue, vm_page_t m)
}
/*
- * vm_pageq_remove_nowakeup:
- *
- * vm_page_unqueue() without any wakeup
- *
- * The queue containing the given page must be locked.
- * This routine may not block.
- */
-void
-vm_pageq_remove_nowakeup(vm_page_t m)
-{
- int queue = VM_PAGE_GETQUEUE(m);
- struct vpgqueues *pq;
-
- if (queue != PQ_NONE) {
- pq = &vm_page_queues[queue];
- VM_PAGE_SETQUEUE2(m, PQ_NONE);
- TAILQ_REMOVE(&pq->pl, m, pageq);
- (*pq->cnt)--;
- }
-}
-
-/*
* vm_pageq_remove:
*
* Remove a page from its queue.
@@ -134,9 +111,5 @@ vm_pageq_remove(vm_page_t m)
pq = &vm_page_queues[queue];
TAILQ_REMOVE(&pq->pl, m, pageq);
(*pq->cnt)--;
- if (VM_PAGE_RESOLVEQUEUE(m, queue) == PQ_CACHE) {
- if (vm_paging_needed())
- pagedaemon_wakeup();
- }
}
}
diff --git a/sys/vm/vm_phys.c b/sys/vm/vm_phys.c
index 81d597c..8efdf3d 100644
--- a/sys/vm/vm_phys.c
+++ b/sys/vm/vm_phys.c
@@ -44,6 +44,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sbuf.h>
#include <sys/sysctl.h>
#include <sys/vmmeter.h>
+#include <sys/vnode.h>
#include <ddb/ddb.h>
@@ -89,7 +90,6 @@ SYSCTL_OID(_vm, OID_AUTO, phys_segs, CTLTYPE_STRING | CTLFLAG_RD,
static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int flind);
static int vm_phys_paddr_to_segind(vm_paddr_t pa);
-static void vm_phys_set_pool(int pool, vm_page_t m, int order);
static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl,
int order);
@@ -286,6 +286,7 @@ vm_phys_add_page(vm_paddr_t pa)
m->pool = VM_FREEPOOL_DEFAULT;
pmap_page_init(m);
mtx_lock(&vm_page_queue_free_mtx);
+ cnt.v_free_count++;
vm_phys_free_pages(m, 0);
mtx_unlock(&vm_page_queue_free_mtx);
}
@@ -318,7 +319,6 @@ vm_phys_alloc_pages(int pool, int order)
fl[oind].lcnt--;
m->order = VM_NFREEORDER;
vm_phys_split_pages(m, oind, fl, order);
- cnt.v_free_count -= 1 << order;
return (m);
}
}
@@ -339,7 +339,6 @@ vm_phys_alloc_pages(int pool, int order)
m->order = VM_NFREEORDER;
vm_phys_set_pool(pool, m, oind);
vm_phys_split_pages(m, oind, fl, order);
- cnt.v_free_count -= 1 << order;
return (m);
}
}
@@ -428,7 +427,6 @@ vm_phys_free_pages(vm_page_t m, int order)
mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
pa = VM_PAGE_TO_PHYS(m);
seg = &vm_phys_segs[m->segind];
- cnt.v_free_count += 1 << order;
while (order < VM_NFREEORDER - 1) {
pa_buddy = pa ^ (1 << (PAGE_SHIFT + order));
if (pa_buddy < seg->start ||
@@ -456,7 +454,7 @@ vm_phys_free_pages(vm_page_t m, int order)
/*
* Set the pool for a contiguous, power of two-sized set of physical pages.
*/
-static void
+void
vm_phys_set_pool(int pool, vm_page_t m, int order)
{
vm_page_t m_tmp;
@@ -466,44 +464,113 @@ vm_phys_set_pool(int pool, vm_page_t m, int order)
}
/*
- * Try to zero one or more physical pages. Used by an idle priority thread.
+ * Remove the given physical page "m" from the free lists.
+ *
+ * The free page queues must be locked.
+ */
+void
+vm_phys_unfree_page(vm_page_t m)
+{
+ struct vm_freelist *fl;
+ struct vm_phys_seg *seg;
+ vm_paddr_t pa, pa_half;
+ vm_page_t m_set, m_tmp;
+ int order;
+
+ mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+
+ /*
+ * First, find the contiguous, power of two-sized set of free
+ * physical pages containing the given physical page "m" and
+ * assign it to "m_set".
+ */
+ seg = &vm_phys_segs[m->segind];
+ for (m_set = m, order = 0; m_set->order == VM_NFREEORDER &&
+ order < VM_NFREEORDER; ) {
+ order++;
+ pa = m->phys_addr & (~(vm_paddr_t)0 << (PAGE_SHIFT + order));
+ KASSERT(pa >= seg->start && pa < seg->end,
+ ("vm_phys_unfree_page: paddr %#jx is not within segment %p",
+ (uintmax_t)pa, seg));
+ m_set = &seg->first_page[atop(pa - seg->start)];
+ }
+ KASSERT(m_set->order >= order, ("vm_phys_unfree_page: page %p's order"
+ " (%d) is less than expected (%d)", m_set, m_set->order, order));
+ KASSERT(m_set->order < VM_NFREEORDER,
+ ("vm_phys_unfree_page: page %p has unexpected order %d",
+ m_set, m_set->order));
+ KASSERT(order < VM_NFREEORDER,
+ ("vm_phys_unfree_page: order %d is out of range", order));
+
+ /*
+ * Next, remove "m_set" from the free lists. Finally, extract
+ * "m" from "m_set" using an iterative algorithm: While "m_set"
+ * is larger than a page, shrink "m_set" by returning the half
+ * of "m_set" that does not contain "m" to the free lists.
+ */
+ fl = (*seg->free_queues)[m_set->pool];
+ order = m_set->order;
+ TAILQ_REMOVE(&fl[order].pl, m_set, pageq);
+ fl[order].lcnt--;
+ m_set->order = VM_NFREEORDER;
+ while (order > 0) {
+ order--;
+ pa_half = m_set->phys_addr ^ (1 << (PAGE_SHIFT + order));
+ if (m->phys_addr < pa_half)
+ m_tmp = &seg->first_page[atop(pa_half - seg->start)];
+ else {
+ m_tmp = m_set;
+ m_set = &seg->first_page[atop(pa_half - seg->start)];
+ }
+ m_tmp->order = order;
+ TAILQ_INSERT_HEAD(&fl[order].pl, m_tmp, pageq);
+ fl[order].lcnt++;
+ }
+ KASSERT(m_set == m, ("vm_phys_unfree_page: fatal inconsistency"));
+}
+
+/*
+ * Try to zero one physical page. Used by an idle priority thread.
*/
boolean_t
vm_phys_zero_pages_idle(void)
{
- struct vm_freelist *fl;
+ static struct vm_freelist *fl = vm_phys_free_queues[0][0];
+ static int flind, oind, pind;
vm_page_t m, m_tmp;
- int flind, pind, q, zeroed;
mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
- for (flind = 0; flind < vm_nfreelists; flind++) {
- pind = VM_FREEPOOL_DEFAULT;
- fl = vm_phys_free_queues[flind][pind];
- for (q = 0; q < VM_NFREEORDER; q++) {
- m = TAILQ_FIRST(&fl[q].pl);
- if (m != NULL && (m->flags & PG_ZERO) == 0) {
- TAILQ_REMOVE(&fl[q].pl, m, pageq);
- fl[q].lcnt--;
- m->order = VM_NFREEORDER;
- cnt.v_free_count -= 1 << q;
- mtx_unlock(&vm_page_queue_free_mtx);
- zeroed = 0;
- for (m_tmp = m; m_tmp < &m[1 << q]; m_tmp++) {
- if ((m_tmp->flags & PG_ZERO) == 0) {
- pmap_zero_page_idle(m_tmp);
- m_tmp->flags |= PG_ZERO;
- zeroed++;
- }
+ for (;;) {
+ TAILQ_FOREACH_REVERSE(m, &fl[oind].pl, pglist, pageq) {
+ for (m_tmp = m; m_tmp < &m[1 << oind]; m_tmp++) {
+ if ((m_tmp->flags & (PG_CACHED | PG_ZERO)) == 0) {
+ vm_phys_unfree_page(m_tmp);
+ cnt.v_free_count--;
+ mtx_unlock(&vm_page_queue_free_mtx);
+ pmap_zero_page_idle(m_tmp);
+ m_tmp->flags |= PG_ZERO;
+ mtx_lock(&vm_page_queue_free_mtx);
+ cnt.v_free_count++;
+ vm_phys_free_pages(m_tmp, 0);
+ vm_page_zero_count++;
+ cnt_prezero++;
+ return (TRUE);
}
- cnt_prezero += zeroed;
- mtx_lock(&vm_page_queue_free_mtx);
- vm_phys_free_pages(m, q);
- vm_page_zero_count += zeroed;
- return (TRUE);
}
}
+ oind++;
+ if (oind == VM_NFREEORDER) {
+ oind = 0;
+ pind++;
+ if (pind == VM_NFREEPOOL) {
+ pind = 0;
+ flind++;
+ if (flind == vm_nfreelists)
+ flind = 0;
+ }
+ fl = vm_phys_free_queues[flind][pind];
+ }
}
- return (FALSE);
}
/*
@@ -522,6 +589,7 @@ vm_phys_alloc_contig(unsigned long npages, vm_paddr_t low, vm_paddr_t high,
{
struct vm_freelist *fl;
struct vm_phys_seg *seg;
+ vm_object_t m_object;
vm_paddr_t pa, pa_last, size;
vm_page_t m, m_ret;
int flind, i, oind, order, pind;
@@ -606,12 +674,19 @@ done:
vm_phys_set_pool(VM_FREEPOOL_DEFAULT, m_ret, oind);
fl = (*seg->free_queues)[m_ret->pool];
vm_phys_split_pages(m_ret, oind, fl, order);
- cnt.v_free_count -= roundup2(npages, 1 << imin(oind, order));
for (i = 0; i < npages; i++) {
m = &m_ret[i];
KASSERT(m->queue == PQ_NONE,
("vm_phys_alloc_contig: page %p has unexpected queue %d",
m, m->queue));
+ m_object = m->object;
+ if ((m->flags & PG_CACHED) != 0)
+ vm_page_cache_remove(m);
+ else {
+ KASSERT(VM_PAGE_IS_FREE(m),
+ ("vm_phys_alloc_contig: page %p is not free", m));
+ cnt.v_free_count--;
+ }
m->valid = VM_PAGE_BITS_ALL;
if (m->flags & PG_ZERO)
vm_page_zero_count--;
@@ -622,6 +697,13 @@ done:
("vm_phys_alloc_contig: page %p was dirty", m));
m->wire_count = 0;
m->busy = 0;
+ if (m_object != NULL &&
+ m_object->type == OBJT_VNODE &&
+ m_object->cache == NULL) {
+ mtx_unlock(&vm_page_queue_free_mtx);
+ vdrop(m_object->handle);
+ mtx_lock(&vm_page_queue_free_mtx);
+ }
}
for (; i < roundup2(npages, 1 << imin(oind, order)); i++) {
m = &m_ret[i];
diff --git a/sys/vm/vm_phys.h b/sys/vm/vm_phys.h
index 0debc01..3e35f9b 100644
--- a/sys/vm/vm_phys.h
+++ b/sys/vm/vm_phys.h
@@ -1,5 +1,6 @@
/*-
* Copyright (c) 2002-2006 Rice University
+ * Copyright (c) 2007 Alan L. Cox <alc@cs.rice.edu>
* All rights reserved.
*
* This software was developed for the FreeBSD Project by Alan L. Cox,
@@ -45,6 +46,8 @@ vm_page_t vm_phys_alloc_pages(int pool, int order);
vm_paddr_t vm_phys_bootstrap_alloc(vm_size_t size, unsigned long alignment);
void vm_phys_free_pages(vm_page_t m, int order);
void vm_phys_init(void);
+void vm_phys_set_pool(int pool, vm_page_t m, int order);
+void vm_phys_unfree_page(vm_page_t m);
boolean_t vm_phys_zero_pages_idle(void);
#endif /* !_VM_PHYS_H_ */
OpenPOWER on IntegriCloud