diff options
Diffstat (limited to 'sys/vm/vm_object.c')
-rw-r--r-- | sys/vm/vm_object.c | 431 |
1 files changed, 261 insertions, 170 deletions
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 32b0779..570befc 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -164,6 +164,9 @@ vm_object_zdtor(void *mem, int size, void *arg) vm_object_t object; object = (vm_object_t)mem; + KASSERT(object->resident_page_count == 0, + ("object %p resident_page_count = %d", + object, object->resident_page_count)); KASSERT(TAILQ_EMPTY(&object->memq), ("object %p has resident pages", object)); @@ -172,15 +175,12 @@ vm_object_zdtor(void *mem, int size, void *arg) ("object %p has reservations", object)); #endif - KASSERT(object->cache == NULL, + KASSERT(object->cached_page_count == 0, ("object %p has cached pages", object)); KASSERT(object->paging_in_progress == 0, ("object %p paging_in_progress = %d", object, object->paging_in_progress)); - KASSERT(object->resident_page_count == 0, - ("object %p resident_page_count = %d", - object, object->resident_page_count)); KASSERT(object->shadow_count == 0, ("object %p shadow_count = %d", object, object->shadow_count)); @@ -210,7 +210,7 @@ _vm_object_allocate(objtype_t type, vm_pindex_t size, vm_object_t object) TAILQ_INIT(&object->memq); LIST_INIT(&object->shadow_head); - object->root = NULL; + object->rtree.rt_root = 0; object->type = type; switch (type) { case OBJT_DEAD: @@ -248,7 +248,6 @@ _vm_object_allocate(objtype_t type, vm_pindex_t size, vm_object_t object) #if VM_NRESERVLEVEL > 0 LIST_INIT(&object->rvq); #endif - object->cache = NULL; mtx_lock(&vm_object_list_mtx); TAILQ_INSERT_TAIL(&vm_object_list, object, object_list); @@ -326,7 +325,7 @@ vm_object_set_memattr(vm_object_t object, vm_memattr_t memattr) case OBJT_SG: case OBJT_SWAP: case OBJT_VNODE: - if (!TAILQ_EMPTY(&object->memq)) + if (object->resident_page_count == 0) return (KERN_FAILURE); break; case OBJT_DEAD: @@ -673,7 +672,11 @@ vm_object_destroy(vm_object_t object) void vm_object_terminate(vm_object_t object) { - vm_page_t p, p_next; + vm_page_t pa[VM_RADIX_STACK]; + vm_page_t p; + vm_pindex_t start; + u_int exhausted; + int n, i; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); @@ -718,43 +721,78 @@ vm_object_terminate(vm_object_t object) * from the object. Rather than incrementally removing each page from * the object, the page and object are reset to any empty state. */ - TAILQ_FOREACH_SAFE(p, &object->memq, listq, p_next) { - KASSERT(!p->busy && (p->oflags & VPO_BUSY) == 0, - ("vm_object_terminate: freeing busy page %p", p)); - vm_page_lock(p); - /* - * Optimize the page's removal from the object by resetting - * its "object" field. Specifically, if the page is not - * wired, then the effect of this assignment is that - * vm_page_free()'s call to vm_page_remove() will return - * immediately without modifying the page or the object. - */ - p->object = NULL; - if (p->wire_count == 0) { - vm_page_free(p); - PCPU_INC(cnt.v_pfree); + start = 0; + exhausted = 0; + while (exhausted == 0 && (n = vm_radix_lookupn(&object->rtree, start, + 0, VM_RADIX_ANY, (void **)pa, VM_RADIX_STACK, &start, + &exhausted)) != 0) { + for (i = 0; i < n; i++) { + p = pa[i]; + /* + * Another thread may allocate this cached page from + * the queue before we acquire the page queue free + * mtx. + */ + if (p->flags & PG_CACHED) { + mtx_lock(&vm_page_queue_free_mtx); + if (p->object == object) { + p->object = NULL; + p->valid = 0; + /* Clear PG_CACHED and set PG_FREE. */ + p->flags ^= PG_CACHED | PG_FREE; + cnt.v_cache_count--; + cnt.v_free_count++; + } + mtx_unlock(&vm_page_queue_free_mtx); + continue; + } else if (p->object != object) + continue; + KASSERT(!p->busy && (p->oflags & VPO_BUSY) == 0, + ("vm_object_terminate: freeing busy page %p", p)); + vm_page_lock(p); + /* + * Optimize the page's removal from the object by + * resetting its "object" field. Specifically, if + * the page is not wired, then the effect of this + * assignment is that vm_page_free()'s call to + * vm_page_remove() will return immediately without + * modifying the page or the object. + * Anyway, the radix tree cannot be accessed anymore + * from within the object, thus all the nodes need + * to be reclaimed later on. + */ + p->object = NULL; + if (p->wire_count == 0) { + vm_page_free(p); + PCPU_INC(cnt.v_pfree); + } + vm_page_unlock(p); } - vm_page_unlock(p); + if (n < VM_RADIX_STACK) + break; } + vm_radix_reclaim_allnodes(&object->rtree); /* * If the object contained any pages, then reset it to an empty state. * None of the object's fields, including "resident_page_count", were * modified by the preceding loop. */ if (object->resident_page_count != 0) { - object->root = NULL; TAILQ_INIT(&object->memq); object->resident_page_count = 0; if (object->type == OBJT_VNODE) vdrop(object->handle); } + if (object->cached_page_count != 0) { + object->cached_page_count = 0; + if (object->type == OBJT_VNODE) + vdrop(object->handle); + } #if VM_NRESERVLEVEL > 0 if (__predict_false(!LIST_EMPTY(&object->rvq))) vm_reserv_break_all(object); #endif - if (__predict_false(object->cache != NULL)) - vm_page_cache_free(object, 0, 0); /* * Let the pager know object is dead. @@ -1266,10 +1304,13 @@ vm_object_shadow( void vm_object_split(vm_map_entry_t entry) { - vm_page_t m, m_next; + vm_page_t ma[VM_RADIX_STACK]; + vm_page_t m; vm_object_t orig_object, new_object, source; - vm_pindex_t idx, offidxstart; + vm_pindex_t idx, offidxstart, start; vm_size_t size; + u_int exhausted; + int i, n; orig_object = entry->object.vm_object; if (orig_object->type != OBJT_DEFAULT && orig_object->type != OBJT_SWAP) @@ -1322,46 +1363,66 @@ vm_object_split(vm_map_entry_t entry) ("orig_object->charge < 0")); orig_object->charge -= ptoa(size); } + start = offidxstart; retry: - m = vm_page_find_least(orig_object, offidxstart); - for (; m != NULL && (idx = m->pindex - offidxstart) < size; - m = m_next) { - m_next = TAILQ_NEXT(m, listq); - - /* - * We must wait for pending I/O to complete before we can - * rename the page. - * - * We do not have to VM_PROT_NONE the page as mappings should - * not be changed by this operation. - */ - if ((m->oflags & VPO_BUSY) || m->busy) { - VM_OBJECT_UNLOCK(new_object); - m->oflags |= VPO_WANTED; - msleep(m, VM_OBJECT_MTX(orig_object), PVM, "spltwt", 0); - VM_OBJECT_LOCK(new_object); - goto retry; - } + exhausted = 0; + while (exhausted == 0 && (n = vm_radix_lookupn(&orig_object->rtree, + start, offidxstart + size, VM_RADIX_ANY, (void **)ma, + VM_RADIX_STACK, &start, &exhausted)) != 0) { + for (i = 0; i < n; i++) { + m = ma[i]; + idx = m->pindex - offidxstart; + if (m->flags & PG_CACHED) { + mtx_lock(&vm_page_queue_free_mtx); + if (m->object == orig_object) + vm_page_cache_rename(m, new_object, + idx); + mtx_unlock(&vm_page_queue_free_mtx); + continue; + } else if (m->object != orig_object) + continue; + /* + * We must wait for pending I/O to complete before + * we can rename the page. + * + * We do not have to VM_PROT_NONE the page as mappings + * should not be changed by this operation. + */ + if ((m->oflags & VPO_BUSY) || m->busy) { + start = m->pindex; + VM_OBJECT_UNLOCK(new_object); + m->oflags |= VPO_WANTED; + msleep(m, VM_OBJECT_MTX(orig_object), PVM, + "spltwt", 0); + VM_OBJECT_LOCK(new_object); + goto retry; + } #if VM_NRESERVLEVEL > 0 - /* - * If some of the reservation's allocated pages remain with - * the original object, then transferring the reservation to - * the new object is neither particularly beneficial nor - * particularly harmful as compared to leaving the reservation - * with the original object. If, however, all of the - * reservation's allocated pages are transferred to the new - * object, then transferring the reservation is typically - * beneficial. Determining which of these two cases applies - * would be more costly than unconditionally renaming the - * reservation. - */ - vm_reserv_rename(m, new_object, orig_object, offidxstart); + /* + * If some of the reservation's allocated pages remain + * with the original object, then transferring the + * reservation to the new object is neither + * particularly beneficial nor particularly harmful as + * compared to leaving the reservation with the + * original object. If, however, all of the + * reservation's allocated pages are transferred to + * the new object, then transferring the reservation + * is typically beneficial. Determining which of + * these two cases applies would be more costly than + * unconditionally renaming the reservation. + */ + vm_reserv_rename(m, new_object, orig_object, + offidxstart); #endif - vm_page_lock(m); - vm_page_rename(m, new_object, idx); - vm_page_unlock(m); - /* page automatically made dirty by rename and cache handled */ - vm_page_busy(m); + vm_page_rename(m, new_object, idx); + /* + * page automatically made dirty by rename and + * cache handled + */ + vm_page_busy(m); + } + if (n < VM_RADIX_STACK) + break; } if (orig_object->type == OBJT_SWAP) { /* @@ -1369,19 +1430,6 @@ retry: * and new_object's locks are released and reacquired. */ swap_pager_copy(orig_object, new_object, offidxstart, 0); - - /* - * Transfer any cached pages from orig_object to new_object. - * If swap_pager_copy() found swapped out pages within the - * specified range of orig_object, then it changed - * new_object's type to OBJT_SWAP when it transferred those - * pages to new_object. Otherwise, new_object's type - * should still be OBJT_DEFAULT and orig_object should not - * contain any cached pages within the specified range. - */ - if (__predict_false(orig_object->cache != NULL)) - vm_page_cache_transfer(orig_object, offidxstart, - new_object); } VM_OBJECT_UNLOCK(orig_object); TAILQ_FOREACH(m, &new_object->memq, listq) @@ -1400,10 +1448,14 @@ retry: static int vm_object_backing_scan(vm_object_t object, int op) { - int r = 1; + vm_page_t pa[VM_RADIX_STACK]; vm_page_t p; vm_object_t backing_object; - vm_pindex_t backing_offset_index; + vm_pindex_t backing_offset_index, new_pindex; + vm_pindex_t start; + u_int exhausted; + int color, i, n; + int r = 1; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); VM_OBJECT_LOCK_ASSERT(object->backing_object, MA_OWNED); @@ -1431,15 +1483,41 @@ vm_object_backing_scan(vm_object_t object, int op) if (op & OBSC_COLLAPSE_WAIT) { vm_object_set_flag(backing_object, OBJ_DEAD); } - + color = VM_RADIX_BLACK; + if (op & OBSC_COLLAPSE_WAIT) + color |= VM_RADIX_RED; /* * Our scan */ - p = TAILQ_FIRST(&backing_object->memq); - while (p) { - vm_page_t next = TAILQ_NEXT(p, listq); - vm_pindex_t new_pindex = p->pindex - backing_offset_index; +restart: + start = 0; + i = n = VM_RADIX_STACK; + exhausted = 0; + for (;;) { + if (i == n) { + if (n < VM_RADIX_STACK) + break; + if (exhausted != 0 || + (n = vm_radix_lookupn(&backing_object->rtree, + start, 0, color, (void **)pa, VM_RADIX_STACK, + &start, &exhausted)) == 0) + break; + i = 0; + } + p = pa[i++]; + /* + * Free cached pages. XXX Why? Emulating old behavior here. + */ + if (p->flags & PG_CACHED) { + mtx_lock(&vm_page_queue_free_mtx); + if (p->object == backing_object) + vm_page_cache_free(p); + mtx_unlock(&vm_page_queue_free_mtx); + continue; + } else if (p->object != backing_object) + continue; + new_pindex = p->pindex - backing_offset_index; if (op & OBSC_TEST_ALL_SHADOWED) { vm_page_t pp; @@ -1451,13 +1529,9 @@ vm_object_backing_scan(vm_object_t object, int op) * note that we do not busy the backing object's * page. */ - if ( - p->pindex < backing_offset_index || - new_pindex >= object->size - ) { - p = next; + if (p->pindex < backing_offset_index || + new_pindex >= object->size) continue; - } /* * See if the parent has the page or if the parent's @@ -1486,12 +1560,9 @@ vm_object_backing_scan(vm_object_t object, int op) vm_page_t pp; if (op & OBSC_COLLAPSE_NOWAIT) { - if ((p->oflags & VPO_BUSY) || - !p->valid || - p->busy) { - p = next; + if ((p->oflags & VPO_BUSY) || !p->valid || + p->busy) continue; - } } else if (op & OBSC_COLLAPSE_WAIT) { if ((p->oflags & VPO_BUSY) || p->busy) { VM_OBJECT_UNLOCK(object); @@ -1507,8 +1578,7 @@ vm_object_backing_scan(vm_object_t object, int op) * should not have changed so we * just restart our scan. */ - p = TAILQ_FIRST(&backing_object->memq); - continue; + goto restart; } } @@ -1544,7 +1614,6 @@ vm_object_backing_scan(vm_object_t object, int op) else vm_page_remove(p); vm_page_unlock(p); - p = next; continue; } @@ -1564,7 +1633,6 @@ vm_object_backing_scan(vm_object_t object, int op) * page before we can (re)lock the parent. * Hence we can get here. */ - p = next; continue; } if ( @@ -1586,7 +1654,6 @@ vm_object_backing_scan(vm_object_t object, int op) else vm_page_remove(p); vm_page_unlock(p); - p = next; continue; } @@ -1605,12 +1672,9 @@ vm_object_backing_scan(vm_object_t object, int op) * If the page was mapped to a process, it can remain * mapped through the rename. */ - vm_page_lock(p); vm_page_rename(p, object, new_pindex); - vm_page_unlock(p); /* page automatically made dirty by rename */ } - p = next; } return (r); } @@ -1724,12 +1788,6 @@ vm_object_collapse(vm_object_t object) backing_object, object, OFF_TO_IDX(object->backing_object_offset), TRUE); - - /* - * Free any cached pages from backing_object. - */ - if (__predict_false(backing_object->cache != NULL)) - vm_page_cache_free(backing_object, 0, 0); } /* * Object now shadows whatever backing_object did. @@ -1850,80 +1908,112 @@ void vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end, int options) { - vm_page_t p, next; + struct vnode *vp; + vm_page_t pa[VM_RADIX_STACK]; + vm_page_t p; + u_int exhausted; + int i, n; int wirings; VM_OBJECT_LOCK_ASSERT(object, MA_OWNED); KASSERT((object->flags & OBJ_UNMANAGED) == 0 || (options & (OBJPR_CLEANONLY | OBJPR_NOTMAPPED)) == OBJPR_NOTMAPPED, ("vm_object_page_remove: illegal options for object %p", object)); - if (object->resident_page_count == 0) - goto skipmemq; + if (object->resident_page_count == 0 && object->cached_page_count == 0) + return; + vp = NULL; vm_object_pip_add(object, 1); -again: - p = vm_page_find_least(object, start); - - /* - * Here, the variable "p" is either (1) the page with the least pindex - * greater than or equal to the parameter "start" or (2) NULL. - */ - for (; p != NULL && (p->pindex < end || end == 0); p = next) { - next = TAILQ_NEXT(p, listq); - - /* - * If the page is wired for any reason besides the existence - * of managed, wired mappings, then it cannot be freed. For - * example, fictitious pages, which represent device memory, - * are inherently wired and cannot be freed. They can, - * however, be invalidated if the option OBJPR_CLEANONLY is - * not specified. - */ - vm_page_lock(p); - if ((wirings = p->wire_count) != 0 && - (wirings = pmap_page_wired_mappings(p)) != p->wire_count) { +restart: + exhausted = 0; + while (exhausted == 0 && (n = vm_radix_lookupn(&object->rtree, start, + end, VM_RADIX_ANY, (void **)pa, VM_RADIX_STACK, &start, + &exhausted)) != 0) { + for (i = 0; i < n; i++) { + p = pa[i]; + /* + * Another thread may allocate this cached page from + * the queue before we acquire the page queue free + * mtx. + */ + if (p->flags & PG_CACHED) { + mtx_lock(&vm_page_queue_free_mtx); + if (p->object == object) { + vm_page_cache_free(p); + if (object->type == OBJT_VNODE && + object->cached_page_count == 0) + vp = object->handle; + } + mtx_unlock(&vm_page_queue_free_mtx); + continue; + } else if (p->object != object) + continue; + /* + * If the page is wired for any reason besides + * the existence of managed, wired mappings, then + * it cannot be freed. For example, fictitious + * pages, which represent device memory, are + * inherently wired and cannot be freed. They can, + * however, be invalidated if the option + * OBJPR_CLEANONLY is not specified. + */ + vm_page_lock(p); + if ((wirings = p->wire_count) != 0 && + (wirings = pmap_page_wired_mappings(p)) != + p->wire_count) { + if ((options & OBJPR_NOTMAPPED) == 0) { + pmap_remove_all(p); + /* + * Account for removal of wired + * mappings. + */ + if (wirings != 0) + p->wire_count -= wirings; + } + if ((options & OBJPR_CLEANONLY) == 0) { + p->valid = 0; + vm_page_undirty(p); + } + vm_page_unlock(p); + continue; + } + if (vm_page_sleep_if_busy(p, TRUE, "vmopar")) { + start = p->pindex; + goto restart; + } + KASSERT((p->flags & PG_FICTITIOUS) == 0, + ("vm_object_page_remove: page %p is fictitious", + p)); + if ((options & OBJPR_CLEANONLY) != 0 && p->valid != 0) { + if ((options & OBJPR_NOTMAPPED) == 0) + pmap_remove_write(p); + if (p->dirty) { + vm_page_unlock(p); + continue; + } + } if ((options & OBJPR_NOTMAPPED) == 0) { pmap_remove_all(p); /* Account for removal of wired mappings. */ + if (wirings != 0) { + KASSERT(p->wire_count == wirings, + ("inconsistent wire count %d %d %p", + p->wire_count, wirings, p)); + p->wire_count = 0; + atomic_subtract_int(&cnt.v_wire_count, + 1); + } if (wirings != 0) p->wire_count -= wirings; } - if ((options & OBJPR_CLEANONLY) == 0) { - p->valid = 0; - vm_page_undirty(p); - } + vm_page_free(p); vm_page_unlock(p); - continue; - } - if (vm_page_sleep_if_busy(p, TRUE, "vmopar")) - goto again; - KASSERT((p->flags & PG_FICTITIOUS) == 0, - ("vm_object_page_remove: page %p is fictitious", p)); - if ((options & OBJPR_CLEANONLY) != 0 && p->valid != 0) { - if ((options & OBJPR_NOTMAPPED) == 0) - pmap_remove_write(p); - if (p->dirty) { - vm_page_unlock(p); - continue; - } } - if ((options & OBJPR_NOTMAPPED) == 0) { - pmap_remove_all(p); - /* Account for removal of wired mappings. */ - if (wirings != 0) { - KASSERT(p->wire_count == wirings, - ("inconsistent wire count %d %d %p", - p->wire_count, wirings, p)); - p->wire_count = 0; - atomic_subtract_int(&cnt.v_wire_count, 1); - } - } - vm_page_free(p); - vm_page_unlock(p); + if (n < VM_RADIX_STACK) + break; } vm_object_pip_wakeup(object); -skipmemq: - if (__predict_false(object->cache != NULL)) - vm_page_cache_free(object, start, end); + if (vp) + vdrop(vp); } /* @@ -2301,8 +2391,9 @@ DB_SHOW_COMMAND(object, vm_object_print_static) db_printf(","); count++; - db_printf("(off=0x%jx,page=0x%jx)", - (uintmax_t)p->pindex, (uintmax_t)VM_PAGE_TO_PHYS(p)); + db_printf("(off=0x%jx,page=0x%jx,obj=%p,flags=0x%X)", + (uintmax_t)p->pindex, (uintmax_t)VM_PAGE_TO_PHYS(p), + p->object, p->flags); } if (count != 0) db_printf("\n"); |