diff options
Diffstat (limited to 'sys/vm/vm_page.c')
-rw-r--r-- | sys/vm/vm_page.c | 357 |
1 files changed, 271 insertions, 86 deletions
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 6504f60..36fee28 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -547,7 +547,7 @@ vm_page_sleep(vm_page_t m, const char *msg) void vm_page_dirty(vm_page_t m) { - KASSERT(VM_PAGE_GETKNOWNQUEUE1(m) != PQ_CACHE, + KASSERT((m->flags & PG_CACHED) == 0, ("vm_page_dirty: page in cache!")); KASSERT(!VM_PAGE_IS_FREE(m), ("vm_page_dirty: page is free!")); @@ -790,48 +790,163 @@ vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex) vm_page_remove(m); vm_page_insert(m, new_object, new_pindex); - if (VM_PAGE_INQUEUE1(m, PQ_CACHE)) - vm_page_deactivate(m); vm_page_dirty(m); } /* - * vm_page_select_cache: + * Convert all of the cached pages belonging to the given object + * into free pages. If the given object has cached pages and is + * backed by a vnode, reduce the vnode's hold count. + */ +void +vm_page_cache_free(vm_object_t object) +{ + vm_page_t m, root; + boolean_t empty; + + mtx_lock(&vm_page_queue_free_mtx); + empty = object->cache == NULL; + while ((m = object->cache) != NULL) { + if (m->left == NULL) + root = m->right; + else if (m->right == NULL) + root = m->left; + else { + root = vm_page_splay(m->pindex, m->left); + root->right = m->right; + } + m->object->cache = root; + m->object = NULL; + m->valid = 0; + /* Clear PG_CACHED and set PG_FREE. */ + m->flags ^= PG_CACHED | PG_FREE; + KASSERT((m->flags & (PG_CACHED | PG_FREE)) == PG_FREE, + ("vm_page_cache_free: page %p has inconsistent flags", m)); + cnt.v_cache_count--; + cnt.v_free_count++; + } + mtx_unlock(&vm_page_queue_free_mtx); + if (object->type == OBJT_VNODE && !empty) + vdrop(object->handle); +} + +/* + * Returns the cached page that is associated with the given + * object and offset. If, however, none exists, returns NULL. * - * Move a page of the given color from the cache queue to the free - * queue. As pages might be found, but are not applicable, they are - * deactivated. + * The free page queue must be locked. + */ +static inline vm_page_t +vm_page_cache_lookup(vm_object_t object, vm_pindex_t pindex) +{ + vm_page_t m; + + mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); + if ((m = object->cache) != NULL && m->pindex != pindex) { + m = vm_page_splay(pindex, m); + if ((object->cache = m)->pindex != pindex) + m = NULL; + } + return (m); +} + +/* + * Remove the given cached page from its containing object's + * collection of cached pages. * - * This routine may not block. + * The free page queue must be locked. */ -vm_page_t -vm_page_select_cache(void) +void +vm_page_cache_remove(vm_page_t m) { vm_object_t object; - vm_page_t m; - boolean_t was_trylocked; + vm_page_t root; - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - while ((m = TAILQ_FIRST(&vm_page_queues[PQ_CACHE].pl)) != NULL) { - KASSERT(m->dirty == 0, ("Found dirty cache page %p", m)); - KASSERT(!pmap_page_is_mapped(m), - ("Found mapped cache page %p", m)); - KASSERT((m->flags & PG_UNMANAGED) == 0, - ("Found unmanaged cache page %p", m)); - KASSERT(m->wire_count == 0, ("Found wired cache page %p", m)); - if (m->hold_count == 0 && (object = m->object, - (was_trylocked = VM_OBJECT_TRYLOCK(object)) || - VM_OBJECT_LOCKED(object))) { - KASSERT((m->oflags & VPO_BUSY) == 0 && m->busy == 0, - ("Found busy cache page %p", m)); - vm_page_free(m); - if (was_trylocked) - VM_OBJECT_UNLOCK(object); - break; + mtx_assert(&vm_page_queue_free_mtx, MA_OWNED); + KASSERT((m->flags & PG_CACHED) != 0, + ("vm_page_cache_remove: page %p is not cached", m)); + object = m->object; + if (m != object->cache) { + root = vm_page_splay(m->pindex, object->cache); + KASSERT(root == m, + ("vm_page_cache_remove: page %p is not cached in object %p", + m, object)); + } + if (m->left == NULL) + root = m->right; + else if (m->right == NULL) + root = m->left; + else { + root = vm_page_splay(m->pindex, m->left); + root->right = m->right; + } + object->cache = root; + m->object = NULL; + cnt.v_cache_count--; +} + +/* + * Transfer all of the cached pages with offset greater than or + * equal to 'offidxstart' from the original object's cache to the + * new object's cache. Initially, the new object's cache must be + * empty. Offset 'offidxstart' in the original object must + * correspond to offset zero in the new object. + * + * The new object must be locked. + */ +void +vm_page_cache_transfer(vm_object_t orig_object, vm_pindex_t offidxstart, + vm_object_t new_object) +{ + vm_page_t m, m_next; + + /* + * Insertion into an object's collection of cached pages + * requires the object to be locked. In contrast, removal does + * not. + */ + VM_OBJECT_LOCK_ASSERT(new_object, MA_OWNED); + KASSERT(new_object->cache == NULL, + ("vm_page_cache_transfer: object %p has cached pages", + new_object)); + mtx_lock(&vm_page_queue_free_mtx); + if ((m = orig_object->cache) != NULL) { + /* + * Transfer all of the pages with offset greater than or + * equal to 'offidxstart' from the original object's + * cache to the new object's cache. + */ + m = vm_page_splay(offidxstart, m); + if (m->pindex < offidxstart) { + orig_object->cache = m; + new_object->cache = m->right; + m->right = NULL; + } else { + orig_object->cache = m->left; + new_object->cache = m; + m->left = NULL; + } + KASSERT(new_object->cache == NULL || + new_object->type == OBJT_SWAP, + ("vm_page_cache_transfer: object %p's type is incompatible" + " with cached pages", new_object)); + + /* + * Update the object and offset of each page that was + * transferred to the new object's cache. + */ + while ((m = new_object->cache) != NULL) { + m_next = vm_page_splay(m->pindex, m->right); + m->object = new_object; + m->pindex -= offidxstart; + if (m_next == NULL) + break; + m->right = NULL; + m_next->left = m; + new_object->cache = m_next; } - vm_page_deactivate(m); } - return (m); + mtx_unlock(&vm_page_queue_free_mtx); } /* @@ -847,15 +962,13 @@ vm_page_select_cache(void) * VM_ALLOC_ZERO zero page * * This routine may not block. - * - * Additional special handling is required when called from an - * interrupt (VM_ALLOC_INTERRUPT). We are not allowed to mess with - * the page cache in this case. */ vm_page_t vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req) { - vm_page_t m = NULL; + struct vnode *vp = NULL; + vm_object_t m_object; + vm_page_t m; int flags, page_req; page_req = req & VM_ALLOC_CLASS_MASK; @@ -876,52 +989,32 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req) page_req = VM_ALLOC_SYSTEM; }; -loop: mtx_lock(&vm_page_queue_free_mtx); - if (cnt.v_free_count > cnt.v_free_reserved || + if (cnt.v_free_count + cnt.v_cache_count > cnt.v_free_reserved || (page_req == VM_ALLOC_SYSTEM && - cnt.v_cache_count == 0 && - cnt.v_free_count > cnt.v_interrupt_free_min) || - (page_req == VM_ALLOC_INTERRUPT && cnt.v_free_count > 0)) { + cnt.v_free_count + cnt.v_cache_count > cnt.v_interrupt_free_min) || + (page_req == VM_ALLOC_INTERRUPT && + cnt.v_free_count + cnt.v_cache_count > 0)) { /* * Allocate from the free queue if the number of free pages * exceeds the minimum for the request class. */ - m = vm_phys_alloc_pages(object != NULL ? - VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0); - } else if (page_req != VM_ALLOC_INTERRUPT) { - mtx_unlock(&vm_page_queue_free_mtx); - /* - * Allocatable from cache (non-interrupt only). On success, - * we must free the page and try again, thus ensuring that - * cnt.v_*_free_min counters are replenished. - */ - vm_page_lock_queues(); - if ((m = vm_page_select_cache()) == NULL) { - KASSERT(cnt.v_cache_count == 0, - ("vm_page_alloc: cache queue is missing %d pages", - cnt.v_cache_count)); - vm_page_unlock_queues(); - atomic_add_int(&vm_pageout_deficit, 1); - pagedaemon_wakeup(); - - if (page_req != VM_ALLOC_SYSTEM) - return (NULL); - - mtx_lock(&vm_page_queue_free_mtx); - if (cnt.v_free_count <= cnt.v_interrupt_free_min) { + if (object != NULL && + (m = vm_page_cache_lookup(object, pindex)) != NULL) { + if ((req & VM_ALLOC_IFNOTCACHED) != 0) { mtx_unlock(&vm_page_queue_free_mtx); return (NULL); } + vm_phys_unfree_page(m); + } else if ((req & VM_ALLOC_IFCACHED) != 0) { + mtx_unlock(&vm_page_queue_free_mtx); + return (NULL); + } else m = vm_phys_alloc_pages(object != NULL ? VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0); - } else { - vm_page_unlock_queues(); - goto loop; - } } else { /* - * Not allocatable from cache from interrupt, give up. + * Not allocatable, give up. */ mtx_unlock(&vm_page_queue_free_mtx); atomic_add_int(&vm_pageout_deficit, 1); @@ -937,8 +1030,24 @@ loop: m != NULL, ("vm_page_alloc(): missing page on free queue") ); - KASSERT(VM_PAGE_IS_FREE(m), - ("vm_page_alloc: page %p is not free", m)); + if ((m->flags & PG_CACHED) != 0) { + KASSERT(m->valid != 0, + ("vm_page_alloc: cached page %p is invalid", m)); + if (m->object == object && m->pindex == pindex) + cnt.v_reactivated++; + else + m->valid = 0; + m_object = m->object; + vm_page_cache_remove(m); + if (m_object->type == OBJT_VNODE && m_object->cache == NULL) + vp = m_object->handle; + } else { + KASSERT(VM_PAGE_IS_FREE(m), + ("vm_page_alloc: page %p is not free", m)); + KASSERT(m->valid == 0, + ("vm_page_alloc: free page %p is valid", m)); + cnt.v_free_count--; + } /* * Initialize structure. Only the PG_ZERO flag is inherited. @@ -964,7 +1073,6 @@ loop: m->hold_count = 0; m->act_count = 0; m->busy = 0; - m->valid = 0; KASSERT(m->dirty == 0, ("vm_page_alloc: free/cache page %p was dirty", m)); mtx_unlock(&vm_page_queue_free_mtx); @@ -974,6 +1082,15 @@ loop: m->pindex = pindex; /* + * The following call to vdrop() must come after the above call + * to vm_page_insert() in case both affect the same object and + * vnode. Otherwise, the affected vnode's hold count could + * temporarily become zero. + */ + if (vp != NULL) + vdrop(vp); + + /* * Don't wakeup too often - wakeup the pageout daemon when * we would be nearly out of memory. */ @@ -1047,8 +1164,6 @@ vm_page_activate(vm_page_t m) mtx_assert(&vm_page_queue_mtx, MA_OWNED); if (VM_PAGE_GETKNOWNQUEUE2(m) != PQ_ACTIVE) { - if (VM_PAGE_INQUEUE1(m, PQ_CACHE)) - cnt.v_reactivated++; vm_pageq_remove(m); if (m->wire_count == 0 && (m->flags & PG_UNMANAGED) == 0) { if (m->act_count < ACT_INIT) @@ -1133,7 +1248,7 @@ vm_page_free_toq(vm_page_t m) * callback routine until after we've put the page on the * appropriate free queue. */ - vm_pageq_remove_nowakeup(m); + vm_pageq_remove(m); vm_page_remove(m); /* @@ -1160,6 +1275,7 @@ vm_page_free_toq(vm_page_t m) } else { m->flags |= PG_FREE; mtx_lock(&vm_page_queue_free_mtx); + cnt.v_free_count++; if ((m->flags & PG_ZERO) != 0) { vm_phys_free_pages(m, 0); ++vm_page_zero_count; @@ -1279,8 +1395,6 @@ _vm_page_deactivate(vm_page_t m, int athead) if (VM_PAGE_INQUEUE2(m, PQ_INACTIVE)) return; if (m->wire_count == 0 && (m->flags & PG_UNMANAGED) == 0) { - if (VM_PAGE_INQUEUE1(m, PQ_CACHE)) - cnt.v_reactivated++; vm_page_flag_clear(m, PG_WINATCFLS); vm_pageq_remove(m); if (athead) @@ -1354,15 +1468,26 @@ vm_page_try_to_free(vm_page_t m) void vm_page_cache(vm_page_t m) { + vm_object_t object; + vm_page_t root; mtx_assert(&vm_page_queue_mtx, MA_OWNED); - VM_OBJECT_LOCK_ASSERT(m->object, MA_OWNED); + object = m->object; + VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); if ((m->flags & PG_UNMANAGED) || (m->oflags & VPO_BUSY) || m->busy || m->hold_count || m->wire_count) { panic("vm_page_cache: attempting to cache busy page"); } - if (VM_PAGE_INQUEUE1(m, PQ_CACHE)) + if (m->valid == 0 || object->type == OBJT_DEFAULT) { + /* + * Hypothesis: A cache-elgible page belonging to a + * default object must be zero filled. + */ + vm_page_free(m); return; + } + KASSERT((m->flags & PG_CACHED) == 0, + ("vm_page_cache: page %p is already cached", m)); cnt.v_tcached++; /* @@ -1374,11 +1499,72 @@ vm_page_cache(vm_page_t m) panic("vm_page_cache: caching a dirty page, pindex: %ld", (long)m->pindex); } - vm_pageq_remove_nowakeup(m); - vm_pageq_enqueue(PQ_CACHE, m); + + /* + * Remove the page from the paging queues. + */ + vm_pageq_remove(m); + + /* + * Remove the page from the object's collection of resident + * pages. + */ + if (m != object->root) + vm_page_splay(m->pindex, object->root); + if (m->left == NULL) + root = m->right; + else { + root = vm_page_splay(m->pindex, m->left); + root->right = m->right; + } + object->root = root; + TAILQ_REMOVE(&object->memq, m, listq); + object->resident_page_count--; + object->generation++; + + /* + * Insert the page into the object's collection of cached pages + * and the physical memory allocator's cache/free page queues. + */ + vm_page_flag_set(m, PG_CACHED); + vm_page_flag_clear(m, PG_ZERO); mtx_lock(&vm_page_queue_free_mtx); + vm_phys_set_pool(VM_FREEPOOL_CACHE, m, 0); + cnt.v_cache_count++; + root = object->cache; + if (root == NULL) { + m->left = NULL; + m->right = NULL; + } else { + root = vm_page_splay(m->pindex, root); + if (m->pindex < root->pindex) { + m->left = root->left; + m->right = root; + root->left = NULL; + } else if (__predict_false(m->pindex == root->pindex)) + panic("vm_page_cache: offset already cached"); + else { + m->right = root->right; + m->left = root; + root->right = NULL; + } + } + object->cache = m; + vm_phys_free_pages(m, 0); vm_page_free_wakeup(); mtx_unlock(&vm_page_queue_free_mtx); + + /* + * Increment the vnode's hold count if this is the object's only + * cached page. Decrement the vnode's hold count if this was + * the object's only resident page. + */ + if (object->type == OBJT_VNODE) { + if (root == NULL && object->resident_page_count != 0) + vhold(object->handle); + else if (root != NULL && object->resident_page_count == 0) + vdrop(object->handle); + } } /* @@ -1416,9 +1602,7 @@ vm_page_dontneed(vm_page_t m) * occassionally leave the page alone */ if ((dnw & 0x01F0) == 0 || - VM_PAGE_INQUEUE2(m, PQ_INACTIVE) || - VM_PAGE_INQUEUE1(m, PQ_CACHE) - ) { + VM_PAGE_INQUEUE2(m, PQ_INACTIVE)) { if (m->act_count >= ACT_INIT) --m->act_count; return; @@ -1482,7 +1666,8 @@ retrylookup: if ((allocflags & VM_ALLOC_RETRY) == 0) return (NULL); goto retrylookup; - } + } else if (m->valid != 0) + return (m); if (allocflags & VM_ALLOC_ZERO && (m->flags & PG_ZERO) == 0) pmap_zero_page(m); return (m); @@ -1813,7 +1998,7 @@ DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info) db_printf("\n"); db_printf("PQ_CACHE:"); - db_printf(" %d", *vm_page_queues[PQ_CACHE].cnt); + db_printf(" %d", cnt.v_cache_count); db_printf("\n"); db_printf("PQ_ACTIVE: %d, PQ_INACTIVE: %d\n", |