summaryrefslogtreecommitdiffstats
path: root/sys/vm/vm_page.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/vm/vm_page.c')
-rw-r--r--sys/vm/vm_page.c357
1 files changed, 271 insertions, 86 deletions
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 6504f60..36fee28 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -547,7 +547,7 @@ vm_page_sleep(vm_page_t m, const char *msg)
void
vm_page_dirty(vm_page_t m)
{
- KASSERT(VM_PAGE_GETKNOWNQUEUE1(m) != PQ_CACHE,
+ KASSERT((m->flags & PG_CACHED) == 0,
("vm_page_dirty: page in cache!"));
KASSERT(!VM_PAGE_IS_FREE(m),
("vm_page_dirty: page is free!"));
@@ -790,48 +790,163 @@ vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex)
vm_page_remove(m);
vm_page_insert(m, new_object, new_pindex);
- if (VM_PAGE_INQUEUE1(m, PQ_CACHE))
- vm_page_deactivate(m);
vm_page_dirty(m);
}
/*
- * vm_page_select_cache:
+ * Convert all of the cached pages belonging to the given object
+ * into free pages. If the given object has cached pages and is
+ * backed by a vnode, reduce the vnode's hold count.
+ */
+void
+vm_page_cache_free(vm_object_t object)
+{
+ vm_page_t m, root;
+ boolean_t empty;
+
+ mtx_lock(&vm_page_queue_free_mtx);
+ empty = object->cache == NULL;
+ while ((m = object->cache) != NULL) {
+ if (m->left == NULL)
+ root = m->right;
+ else if (m->right == NULL)
+ root = m->left;
+ else {
+ root = vm_page_splay(m->pindex, m->left);
+ root->right = m->right;
+ }
+ m->object->cache = root;
+ m->object = NULL;
+ m->valid = 0;
+ /* Clear PG_CACHED and set PG_FREE. */
+ m->flags ^= PG_CACHED | PG_FREE;
+ KASSERT((m->flags & (PG_CACHED | PG_FREE)) == PG_FREE,
+ ("vm_page_cache_free: page %p has inconsistent flags", m));
+ cnt.v_cache_count--;
+ cnt.v_free_count++;
+ }
+ mtx_unlock(&vm_page_queue_free_mtx);
+ if (object->type == OBJT_VNODE && !empty)
+ vdrop(object->handle);
+}
+
+/*
+ * Returns the cached page that is associated with the given
+ * object and offset. If, however, none exists, returns NULL.
*
- * Move a page of the given color from the cache queue to the free
- * queue. As pages might be found, but are not applicable, they are
- * deactivated.
+ * The free page queue must be locked.
+ */
+static inline vm_page_t
+vm_page_cache_lookup(vm_object_t object, vm_pindex_t pindex)
+{
+ vm_page_t m;
+
+ mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+ if ((m = object->cache) != NULL && m->pindex != pindex) {
+ m = vm_page_splay(pindex, m);
+ if ((object->cache = m)->pindex != pindex)
+ m = NULL;
+ }
+ return (m);
+}
+
+/*
+ * Remove the given cached page from its containing object's
+ * collection of cached pages.
*
- * This routine may not block.
+ * The free page queue must be locked.
*/
-vm_page_t
-vm_page_select_cache(void)
+void
+vm_page_cache_remove(vm_page_t m)
{
vm_object_t object;
- vm_page_t m;
- boolean_t was_trylocked;
+ vm_page_t root;
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
- while ((m = TAILQ_FIRST(&vm_page_queues[PQ_CACHE].pl)) != NULL) {
- KASSERT(m->dirty == 0, ("Found dirty cache page %p", m));
- KASSERT(!pmap_page_is_mapped(m),
- ("Found mapped cache page %p", m));
- KASSERT((m->flags & PG_UNMANAGED) == 0,
- ("Found unmanaged cache page %p", m));
- KASSERT(m->wire_count == 0, ("Found wired cache page %p", m));
- if (m->hold_count == 0 && (object = m->object,
- (was_trylocked = VM_OBJECT_TRYLOCK(object)) ||
- VM_OBJECT_LOCKED(object))) {
- KASSERT((m->oflags & VPO_BUSY) == 0 && m->busy == 0,
- ("Found busy cache page %p", m));
- vm_page_free(m);
- if (was_trylocked)
- VM_OBJECT_UNLOCK(object);
- break;
+ mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+ KASSERT((m->flags & PG_CACHED) != 0,
+ ("vm_page_cache_remove: page %p is not cached", m));
+ object = m->object;
+ if (m != object->cache) {
+ root = vm_page_splay(m->pindex, object->cache);
+ KASSERT(root == m,
+ ("vm_page_cache_remove: page %p is not cached in object %p",
+ m, object));
+ }
+ if (m->left == NULL)
+ root = m->right;
+ else if (m->right == NULL)
+ root = m->left;
+ else {
+ root = vm_page_splay(m->pindex, m->left);
+ root->right = m->right;
+ }
+ object->cache = root;
+ m->object = NULL;
+ cnt.v_cache_count--;
+}
+
+/*
+ * Transfer all of the cached pages with offset greater than or
+ * equal to 'offidxstart' from the original object's cache to the
+ * new object's cache. Initially, the new object's cache must be
+ * empty. Offset 'offidxstart' in the original object must
+ * correspond to offset zero in the new object.
+ *
+ * The new object must be locked.
+ */
+void
+vm_page_cache_transfer(vm_object_t orig_object, vm_pindex_t offidxstart,
+ vm_object_t new_object)
+{
+ vm_page_t m, m_next;
+
+ /*
+ * Insertion into an object's collection of cached pages
+ * requires the object to be locked. In contrast, removal does
+ * not.
+ */
+ VM_OBJECT_LOCK_ASSERT(new_object, MA_OWNED);
+ KASSERT(new_object->cache == NULL,
+ ("vm_page_cache_transfer: object %p has cached pages",
+ new_object));
+ mtx_lock(&vm_page_queue_free_mtx);
+ if ((m = orig_object->cache) != NULL) {
+ /*
+ * Transfer all of the pages with offset greater than or
+ * equal to 'offidxstart' from the original object's
+ * cache to the new object's cache.
+ */
+ m = vm_page_splay(offidxstart, m);
+ if (m->pindex < offidxstart) {
+ orig_object->cache = m;
+ new_object->cache = m->right;
+ m->right = NULL;
+ } else {
+ orig_object->cache = m->left;
+ new_object->cache = m;
+ m->left = NULL;
+ }
+ KASSERT(new_object->cache == NULL ||
+ new_object->type == OBJT_SWAP,
+ ("vm_page_cache_transfer: object %p's type is incompatible"
+ " with cached pages", new_object));
+
+ /*
+ * Update the object and offset of each page that was
+ * transferred to the new object's cache.
+ */
+ while ((m = new_object->cache) != NULL) {
+ m_next = vm_page_splay(m->pindex, m->right);
+ m->object = new_object;
+ m->pindex -= offidxstart;
+ if (m_next == NULL)
+ break;
+ m->right = NULL;
+ m_next->left = m;
+ new_object->cache = m_next;
}
- vm_page_deactivate(m);
}
- return (m);
+ mtx_unlock(&vm_page_queue_free_mtx);
}
/*
@@ -847,15 +962,13 @@ vm_page_select_cache(void)
* VM_ALLOC_ZERO zero page
*
* This routine may not block.
- *
- * Additional special handling is required when called from an
- * interrupt (VM_ALLOC_INTERRUPT). We are not allowed to mess with
- * the page cache in this case.
*/
vm_page_t
vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req)
{
- vm_page_t m = NULL;
+ struct vnode *vp = NULL;
+ vm_object_t m_object;
+ vm_page_t m;
int flags, page_req;
page_req = req & VM_ALLOC_CLASS_MASK;
@@ -876,52 +989,32 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req)
page_req = VM_ALLOC_SYSTEM;
};
-loop:
mtx_lock(&vm_page_queue_free_mtx);
- if (cnt.v_free_count > cnt.v_free_reserved ||
+ if (cnt.v_free_count + cnt.v_cache_count > cnt.v_free_reserved ||
(page_req == VM_ALLOC_SYSTEM &&
- cnt.v_cache_count == 0 &&
- cnt.v_free_count > cnt.v_interrupt_free_min) ||
- (page_req == VM_ALLOC_INTERRUPT && cnt.v_free_count > 0)) {
+ cnt.v_free_count + cnt.v_cache_count > cnt.v_interrupt_free_min) ||
+ (page_req == VM_ALLOC_INTERRUPT &&
+ cnt.v_free_count + cnt.v_cache_count > 0)) {
/*
* Allocate from the free queue if the number of free pages
* exceeds the minimum for the request class.
*/
- m = vm_phys_alloc_pages(object != NULL ?
- VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0);
- } else if (page_req != VM_ALLOC_INTERRUPT) {
- mtx_unlock(&vm_page_queue_free_mtx);
- /*
- * Allocatable from cache (non-interrupt only). On success,
- * we must free the page and try again, thus ensuring that
- * cnt.v_*_free_min counters are replenished.
- */
- vm_page_lock_queues();
- if ((m = vm_page_select_cache()) == NULL) {
- KASSERT(cnt.v_cache_count == 0,
- ("vm_page_alloc: cache queue is missing %d pages",
- cnt.v_cache_count));
- vm_page_unlock_queues();
- atomic_add_int(&vm_pageout_deficit, 1);
- pagedaemon_wakeup();
-
- if (page_req != VM_ALLOC_SYSTEM)
- return (NULL);
-
- mtx_lock(&vm_page_queue_free_mtx);
- if (cnt.v_free_count <= cnt.v_interrupt_free_min) {
+ if (object != NULL &&
+ (m = vm_page_cache_lookup(object, pindex)) != NULL) {
+ if ((req & VM_ALLOC_IFNOTCACHED) != 0) {
mtx_unlock(&vm_page_queue_free_mtx);
return (NULL);
}
+ vm_phys_unfree_page(m);
+ } else if ((req & VM_ALLOC_IFCACHED) != 0) {
+ mtx_unlock(&vm_page_queue_free_mtx);
+ return (NULL);
+ } else
m = vm_phys_alloc_pages(object != NULL ?
VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0);
- } else {
- vm_page_unlock_queues();
- goto loop;
- }
} else {
/*
- * Not allocatable from cache from interrupt, give up.
+ * Not allocatable, give up.
*/
mtx_unlock(&vm_page_queue_free_mtx);
atomic_add_int(&vm_pageout_deficit, 1);
@@ -937,8 +1030,24 @@ loop:
m != NULL,
("vm_page_alloc(): missing page on free queue")
);
- KASSERT(VM_PAGE_IS_FREE(m),
- ("vm_page_alloc: page %p is not free", m));
+ if ((m->flags & PG_CACHED) != 0) {
+ KASSERT(m->valid != 0,
+ ("vm_page_alloc: cached page %p is invalid", m));
+ if (m->object == object && m->pindex == pindex)
+ cnt.v_reactivated++;
+ else
+ m->valid = 0;
+ m_object = m->object;
+ vm_page_cache_remove(m);
+ if (m_object->type == OBJT_VNODE && m_object->cache == NULL)
+ vp = m_object->handle;
+ } else {
+ KASSERT(VM_PAGE_IS_FREE(m),
+ ("vm_page_alloc: page %p is not free", m));
+ KASSERT(m->valid == 0,
+ ("vm_page_alloc: free page %p is valid", m));
+ cnt.v_free_count--;
+ }
/*
* Initialize structure. Only the PG_ZERO flag is inherited.
@@ -964,7 +1073,6 @@ loop:
m->hold_count = 0;
m->act_count = 0;
m->busy = 0;
- m->valid = 0;
KASSERT(m->dirty == 0, ("vm_page_alloc: free/cache page %p was dirty", m));
mtx_unlock(&vm_page_queue_free_mtx);
@@ -974,6 +1082,15 @@ loop:
m->pindex = pindex;
/*
+ * The following call to vdrop() must come after the above call
+ * to vm_page_insert() in case both affect the same object and
+ * vnode. Otherwise, the affected vnode's hold count could
+ * temporarily become zero.
+ */
+ if (vp != NULL)
+ vdrop(vp);
+
+ /*
* Don't wakeup too often - wakeup the pageout daemon when
* we would be nearly out of memory.
*/
@@ -1047,8 +1164,6 @@ vm_page_activate(vm_page_t m)
mtx_assert(&vm_page_queue_mtx, MA_OWNED);
if (VM_PAGE_GETKNOWNQUEUE2(m) != PQ_ACTIVE) {
- if (VM_PAGE_INQUEUE1(m, PQ_CACHE))
- cnt.v_reactivated++;
vm_pageq_remove(m);
if (m->wire_count == 0 && (m->flags & PG_UNMANAGED) == 0) {
if (m->act_count < ACT_INIT)
@@ -1133,7 +1248,7 @@ vm_page_free_toq(vm_page_t m)
* callback routine until after we've put the page on the
* appropriate free queue.
*/
- vm_pageq_remove_nowakeup(m);
+ vm_pageq_remove(m);
vm_page_remove(m);
/*
@@ -1160,6 +1275,7 @@ vm_page_free_toq(vm_page_t m)
} else {
m->flags |= PG_FREE;
mtx_lock(&vm_page_queue_free_mtx);
+ cnt.v_free_count++;
if ((m->flags & PG_ZERO) != 0) {
vm_phys_free_pages(m, 0);
++vm_page_zero_count;
@@ -1279,8 +1395,6 @@ _vm_page_deactivate(vm_page_t m, int athead)
if (VM_PAGE_INQUEUE2(m, PQ_INACTIVE))
return;
if (m->wire_count == 0 && (m->flags & PG_UNMANAGED) == 0) {
- if (VM_PAGE_INQUEUE1(m, PQ_CACHE))
- cnt.v_reactivated++;
vm_page_flag_clear(m, PG_WINATCFLS);
vm_pageq_remove(m);
if (athead)
@@ -1354,15 +1468,26 @@ vm_page_try_to_free(vm_page_t m)
void
vm_page_cache(vm_page_t m)
{
+ vm_object_t object;
+ vm_page_t root;
mtx_assert(&vm_page_queue_mtx, MA_OWNED);
- VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED);
+ object = m->object;
+ VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
if ((m->flags & PG_UNMANAGED) || (m->oflags & VPO_BUSY) || m->busy ||
m->hold_count || m->wire_count) {
panic("vm_page_cache: attempting to cache busy page");
}
- if (VM_PAGE_INQUEUE1(m, PQ_CACHE))
+ if (m->valid == 0 || object->type == OBJT_DEFAULT) {
+ /*
+ * Hypothesis: A cache-elgible page belonging to a
+ * default object must be zero filled.
+ */
+ vm_page_free(m);
return;
+ }
+ KASSERT((m->flags & PG_CACHED) == 0,
+ ("vm_page_cache: page %p is already cached", m));
cnt.v_tcached++;
/*
@@ -1374,11 +1499,72 @@ vm_page_cache(vm_page_t m)
panic("vm_page_cache: caching a dirty page, pindex: %ld",
(long)m->pindex);
}
- vm_pageq_remove_nowakeup(m);
- vm_pageq_enqueue(PQ_CACHE, m);
+
+ /*
+ * Remove the page from the paging queues.
+ */
+ vm_pageq_remove(m);
+
+ /*
+ * Remove the page from the object's collection of resident
+ * pages.
+ */
+ if (m != object->root)
+ vm_page_splay(m->pindex, object->root);
+ if (m->left == NULL)
+ root = m->right;
+ else {
+ root = vm_page_splay(m->pindex, m->left);
+ root->right = m->right;
+ }
+ object->root = root;
+ TAILQ_REMOVE(&object->memq, m, listq);
+ object->resident_page_count--;
+ object->generation++;
+
+ /*
+ * Insert the page into the object's collection of cached pages
+ * and the physical memory allocator's cache/free page queues.
+ */
+ vm_page_flag_set(m, PG_CACHED);
+ vm_page_flag_clear(m, PG_ZERO);
mtx_lock(&vm_page_queue_free_mtx);
+ vm_phys_set_pool(VM_FREEPOOL_CACHE, m, 0);
+ cnt.v_cache_count++;
+ root = object->cache;
+ if (root == NULL) {
+ m->left = NULL;
+ m->right = NULL;
+ } else {
+ root = vm_page_splay(m->pindex, root);
+ if (m->pindex < root->pindex) {
+ m->left = root->left;
+ m->right = root;
+ root->left = NULL;
+ } else if (__predict_false(m->pindex == root->pindex))
+ panic("vm_page_cache: offset already cached");
+ else {
+ m->right = root->right;
+ m->left = root;
+ root->right = NULL;
+ }
+ }
+ object->cache = m;
+ vm_phys_free_pages(m, 0);
vm_page_free_wakeup();
mtx_unlock(&vm_page_queue_free_mtx);
+
+ /*
+ * Increment the vnode's hold count if this is the object's only
+ * cached page. Decrement the vnode's hold count if this was
+ * the object's only resident page.
+ */
+ if (object->type == OBJT_VNODE) {
+ if (root == NULL && object->resident_page_count != 0)
+ vhold(object->handle);
+ else if (root != NULL && object->resident_page_count == 0)
+ vdrop(object->handle);
+ }
}
/*
@@ -1416,9 +1602,7 @@ vm_page_dontneed(vm_page_t m)
* occassionally leave the page alone
*/
if ((dnw & 0x01F0) == 0 ||
- VM_PAGE_INQUEUE2(m, PQ_INACTIVE) ||
- VM_PAGE_INQUEUE1(m, PQ_CACHE)
- ) {
+ VM_PAGE_INQUEUE2(m, PQ_INACTIVE)) {
if (m->act_count >= ACT_INIT)
--m->act_count;
return;
@@ -1482,7 +1666,8 @@ retrylookup:
if ((allocflags & VM_ALLOC_RETRY) == 0)
return (NULL);
goto retrylookup;
- }
+ } else if (m->valid != 0)
+ return (m);
if (allocflags & VM_ALLOC_ZERO && (m->flags & PG_ZERO) == 0)
pmap_zero_page(m);
return (m);
@@ -1813,7 +1998,7 @@ DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info)
db_printf("\n");
db_printf("PQ_CACHE:");
- db_printf(" %d", *vm_page_queues[PQ_CACHE].cnt);
+ db_printf(" %d", cnt.v_cache_count);
db_printf("\n");
db_printf("PQ_ACTIVE: %d, PQ_INACTIVE: %d\n",
OpenPOWER on IntegriCloud