diff options
Diffstat (limited to 'sys/vm')
-rw-r--r-- | sys/vm/vm_kern.c | 43 | ||||
-rw-r--r-- | sys/vm/vm_object.c | 149 | ||||
-rw-r--r-- | sys/vm/vm_object.h | 1 | ||||
-rw-r--r-- | sys/vm/vm_page.c | 4 | ||||
-rw-r--r-- | sys/vm/vm_page.h | 1 | ||||
-rw-r--r-- | sys/vm/vm_pageout.c | 75 |
6 files changed, 213 insertions, 60 deletions
diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c index b97b954..291d0dd 100644 --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -159,11 +159,10 @@ kmem_alloc_attr(vmem_t *vmem, vm_size_t size, int flags, vm_paddr_t low, vm_paddr_t high, vm_memattr_t memattr) { vm_object_t object = vmem == kmem_arena ? kmem_object : kernel_object; - vm_offset_t addr; + vm_offset_t addr, i; vm_ooffset_t offset; vm_page_t m; int pflags, tries; - int i; size = round_page(size); if (vmem_alloc(vmem, size, M_BESTFIT | flags, &addr)) @@ -184,18 +183,7 @@ retry: tries++; goto retry; } - /* - * Unmap and free the pages. - */ - if (i != 0) - pmap_remove(kernel_pmap, addr, addr + i); - while (i != 0) { - i -= PAGE_SIZE; - m = vm_page_lookup(object, - OFF_TO_IDX(offset + i)); - vm_page_unwire(m, 0); - vm_page_free(m); - } + kmem_unback(object, addr, i); vmem_free(vmem, addr, size); return (0); } @@ -353,25 +341,13 @@ retry: * aren't on any queues. */ if (m == NULL) { + VM_OBJECT_WUNLOCK(object); if ((flags & M_NOWAIT) == 0) { - VM_OBJECT_WUNLOCK(object); VM_WAIT; VM_OBJECT_WLOCK(object); goto retry; } - /* - * Unmap and free the pages. - */ - if (i != 0) - pmap_remove(kernel_pmap, addr, addr + i); - while (i != 0) { - i -= PAGE_SIZE; - m = vm_page_lookup(object, - OFF_TO_IDX(offset + i)); - vm_page_unwire(m, 0); - vm_page_free(m); - } - VM_OBJECT_WUNLOCK(object); + kmem_unback(object, addr, i); return (KERN_NO_SPACE); } if (flags & M_ZERO && (m->flags & PG_ZERO) == 0) @@ -387,6 +363,15 @@ retry: return (KERN_SUCCESS); } +/* + * kmem_unback: + * + * Unmap and free the physical pages underlying the specified virtual + * address range. + * + * A physical page must exist within the specified object at each index + * that is being unmapped. + */ void kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size) { @@ -396,9 +381,9 @@ kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size) KASSERT(object == kmem_object || object == kernel_object, ("kmem_unback: only supports kernel objects.")); + pmap_remove(kernel_pmap, addr, addr + size); offset = addr - VM_MIN_KERNEL_ADDRESS; VM_OBJECT_WLOCK(object); - pmap_remove(kernel_pmap, addr, addr + size); for (i = 0; i < size; i += PAGE_SIZE) { m = vm_page_lookup(object, OFF_TO_IDX(offset + i)); vm_page_unwire(m, 0); diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c index 36a2ead..9d08714 100644 --- a/sys/vm/vm_object.c +++ b/sys/vm/vm_object.c @@ -79,6 +79,7 @@ __FBSDID("$FreeBSD$"); #include <sys/socket.h> #include <sys/resourcevar.h> #include <sys/rwlock.h> +#include <sys/user.h> #include <sys/vnode.h> #include <sys/vmmeter.h> #include <sys/sx.h> @@ -2269,6 +2270,154 @@ next_page: } } +struct vnode * +vm_object_vnode(vm_object_t object) +{ + + VM_OBJECT_ASSERT_LOCKED(object); + if (object->type == OBJT_VNODE) + return (object->handle); + if (object->type == OBJT_SWAP && (object->flags & OBJ_TMPFS) != 0) + return (object->un_pager.swp.swp_tmpfs); + return (NULL); +} + +static int +sysctl_vm_object_list(SYSCTL_HANDLER_ARGS) +{ + struct kinfo_vmobject kvo; + char *fullpath, *freepath; + struct vnode *vp; + struct vattr va; + vm_object_t obj; + vm_page_t m; + int count, error; + + if (req->oldptr == NULL) { + /* + * If an old buffer has not been provided, generate an + * estimate of the space needed for a subsequent call. + */ + mtx_lock(&vm_object_list_mtx); + count = 0; + TAILQ_FOREACH(obj, &vm_object_list, object_list) { + if (obj->type == OBJT_DEAD) + continue; + count++; + } + mtx_unlock(&vm_object_list_mtx); + return (SYSCTL_OUT(req, NULL, sizeof(struct kinfo_vmobject) * + count * 11 / 10)); + } + + error = 0; + + /* + * VM objects are type stable and are never removed from the + * list once added. This allows us to safely read obj->object_list + * after reacquiring the VM object lock. + */ + mtx_lock(&vm_object_list_mtx); + TAILQ_FOREACH(obj, &vm_object_list, object_list) { + if (obj->type == OBJT_DEAD) + continue; + VM_OBJECT_RLOCK(obj); + if (obj->type == OBJT_DEAD) { + VM_OBJECT_RUNLOCK(obj); + continue; + } + mtx_unlock(&vm_object_list_mtx); + kvo.kvo_size = ptoa(obj->size); + kvo.kvo_resident = obj->resident_page_count; + kvo.kvo_ref_count = obj->ref_count; + kvo.kvo_shadow_count = obj->shadow_count; + kvo.kvo_memattr = obj->memattr; + kvo.kvo_active = 0; + kvo.kvo_inactive = 0; + TAILQ_FOREACH(m, &obj->memq, listq) { + /* + * A page may belong to the object but be + * dequeued and set to PQ_NONE while the + * object lock is not held. This makes the + * reads of m->queue below racy, and we do not + * count pages set to PQ_NONE. However, this + * sysctl is only meant to give an + * approximation of the system anyway. + */ + if (m->queue == PQ_ACTIVE) + kvo.kvo_active++; + else if (m->queue == PQ_INACTIVE) + kvo.kvo_inactive++; + } + + kvo.kvo_vn_fileid = 0; + kvo.kvo_vn_fsid = 0; + freepath = NULL; + fullpath = ""; + vp = NULL; + switch (obj->type) { + case OBJT_DEFAULT: + kvo.kvo_type = KVME_TYPE_DEFAULT; + break; + case OBJT_VNODE: + kvo.kvo_type = KVME_TYPE_VNODE; + vp = obj->handle; + vref(vp); + break; + case OBJT_SWAP: + kvo.kvo_type = KVME_TYPE_SWAP; + break; + case OBJT_DEVICE: + kvo.kvo_type = KVME_TYPE_DEVICE; + break; + case OBJT_PHYS: + kvo.kvo_type = KVME_TYPE_PHYS; + break; + case OBJT_DEAD: + kvo.kvo_type = KVME_TYPE_DEAD; + break; + case OBJT_SG: + kvo.kvo_type = KVME_TYPE_SG; + break; + case OBJT_MGTDEVICE: + kvo.kvo_type = KVME_TYPE_MGTDEVICE; + break; + default: + kvo.kvo_type = KVME_TYPE_UNKNOWN; + break; + } + VM_OBJECT_RUNLOCK(obj); + if (vp != NULL) { + vn_fullpath(curthread, vp, &fullpath, &freepath); + vn_lock(vp, LK_SHARED | LK_RETRY); + if (VOP_GETATTR(vp, &va, curthread->td_ucred) == 0) { + kvo.kvo_vn_fileid = va.va_fileid; + kvo.kvo_vn_fsid = va.va_fsid; + } + vput(vp); + } + + strlcpy(kvo.kvo_path, fullpath, sizeof(kvo.kvo_path)); + if (freepath != NULL) + free(freepath, M_TEMP); + + /* Pack record size down */ + kvo.kvo_structsize = offsetof(struct kinfo_vmobject, kvo_path) + + strlen(kvo.kvo_path) + 1; + kvo.kvo_structsize = roundup(kvo.kvo_structsize, + sizeof(uint64_t)); + error = SYSCTL_OUT(req, &kvo, kvo.kvo_structsize); + mtx_lock(&vm_object_list_mtx); + if (error) + break; + } + mtx_unlock(&vm_object_list_mtx); + return (error); +} +SYSCTL_PROC(_vm, OID_AUTO, objects, CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_SKIP | + CTLFLAG_MPSAFE, NULL, 0, sysctl_vm_object_list, "S,kinfo_vmobject", + "List of VM objects"); + #include "opt_ddb.h" #ifdef DDB #include <sys/kernel.h> diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h index 06111a1..ac8feae 100644 --- a/sys/vm/vm_object.h +++ b/sys/vm/vm_object.h @@ -297,6 +297,7 @@ boolean_t vm_object_sync(vm_object_t, vm_ooffset_t, vm_size_t, boolean_t, boolean_t); void vm_object_unwire(vm_object_t object, vm_ooffset_t offset, vm_size_t length, uint8_t queue); +struct vnode *vm_object_vnode(vm_object_t object); #endif /* _KERNEL */ #endif /* _VM_OBJECT_ */ diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index 8024a7c..95bf6ca 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -1710,6 +1710,7 @@ vm_page_alloc_contig_vdrop(struct spglist *lst) * * optional allocation flags: * VM_ALLOC_NOBUSY do not exclusive busy the page + * VM_ALLOC_NODUMP do not include the page in a kernel core dump * VM_ALLOC_NOOBJ page is not associated with an object and * should not be exclusive busy * VM_ALLOC_SBUSY shared busy the allocated page @@ -3009,7 +3010,8 @@ vm_page_set_invalid(vm_page_t m, int base, int size) bits = VM_PAGE_BITS_ALL; else bits = vm_page_bits(base, size); - if (m->valid == VM_PAGE_BITS_ALL && bits != 0) + if (object->ref_count != 0 && m->valid == VM_PAGE_BITS_ALL && + bits != 0) pmap_remove_all(m); KASSERT((bits == 0 && m->valid == VM_PAGE_BITS_ALL) || !pmap_page_is_mapped(m), diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h index 7a1f944..accf517 100644 --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -227,6 +227,7 @@ struct vm_domain { long vmd_segs; /* bitmask of the segments */ boolean_t vmd_oom; int vmd_pass; /* local pagedaemon pass */ + int vmd_last_active_scan; struct vm_page vmd_marker; /* marker for pagedaemon private use */ }; diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index 6a56fd7..ed80b1b 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -408,10 +408,13 @@ more: ib = 0; break; } - vm_page_lock(p); vm_page_test_dirty(p); - if (p->dirty == 0 || - p->queue != PQ_INACTIVE || + if (p->dirty == 0) { + ib = 0; + break; + } + vm_page_lock(p); + if (p->queue != PQ_INACTIVE || p->hold_count != 0) { /* may be undergoing I/O */ vm_page_unlock(p); ib = 0; @@ -435,10 +438,11 @@ more: if ((p = vm_page_next(ps)) == NULL || vm_page_busied(p)) break; - vm_page_lock(p); vm_page_test_dirty(p); - if (p->dirty == 0 || - p->queue != PQ_INACTIVE || + if (p->dirty == 0) + break; + vm_page_lock(p); + if (p->queue != PQ_INACTIVE || p->hold_count != 0) { /* may be undergoing I/O */ vm_page_unlock(p); break; @@ -922,9 +926,10 @@ vm_pageout_scan(struct vm_domain *vmd, int pass) vm_page_t m, next; struct vm_pagequeue *pq; vm_object_t object; + long min_scan; int act_delta, addl_page_shortage, deficit, maxscan, page_shortage; int vnodes_skipped = 0; - int maxlaunder; + int maxlaunder, scan_tick, scanned; int lockmode; boolean_t queues_locked; @@ -1115,9 +1120,11 @@ vm_pageout_scan(struct vm_domain *vmd, int pass) * then the page may still be modified until the last of those * mappings are removed. */ - vm_page_test_dirty(m); - if (m->dirty == 0 && object->ref_count != 0) - pmap_remove_all(m); + if (object->ref_count != 0) { + vm_page_test_dirty(m); + if (m->dirty == 0) + pmap_remove_all(m); + } if (m->valid == 0) { /* @@ -1353,34 +1360,37 @@ relock_queues: * If we're just idle polling attempt to visit every * active page within 'update_period' seconds. */ - if (pass == 0 && vm_pageout_update_period != 0) { - maxscan /= vm_pageout_update_period; - page_shortage = maxscan; - } + scan_tick = ticks; + if (vm_pageout_update_period != 0) { + min_scan = pq->pq_cnt; + min_scan *= scan_tick - vmd->vmd_last_active_scan; + min_scan /= hz * vm_pageout_update_period; + } else + min_scan = 0; + if (min_scan > 0 || (page_shortage > 0 && maxscan > 0)) + vmd->vmd_last_active_scan = scan_tick; /* - * Scan the active queue for things we can deactivate. We nominally - * track the per-page activity counter and use it to locate - * deactivation candidates. + * Scan the active queue for pages that can be deactivated. Update + * the per-page activity counter and use it to identify deactivation + * candidates. */ - m = TAILQ_FIRST(&pq->pq_pl); - while (m != NULL && maxscan-- > 0 && page_shortage > 0) { + for (m = TAILQ_FIRST(&pq->pq_pl), scanned = 0; m != NULL && (scanned < + min_scan || (page_shortage > 0 && scanned < maxscan)); m = next, + scanned++) { KASSERT(m->queue == PQ_ACTIVE, ("vm_pageout_scan: page %p isn't active", m)); next = TAILQ_NEXT(m, plinks.q); - if ((m->flags & PG_MARKER) != 0) { - m = next; + if ((m->flags & PG_MARKER) != 0) continue; - } KASSERT((m->flags & PG_FICTITIOUS) == 0, ("Fictitious page %p cannot be in active queue", m)); KASSERT((m->oflags & VPO_UNMANAGED) == 0, ("Unmanaged page %p cannot be in active queue", m)); if (!vm_pageout_page_lock(m, &next)) { vm_page_unlock(m); - m = next; continue; } @@ -1433,7 +1443,6 @@ relock_queues: } else vm_page_requeue_locked(m); vm_page_unlock(m); - m = next; } vm_pagequeue_unlock(pq); #if !defined(NO_SWAPPING) @@ -1621,6 +1630,7 @@ vm_pageout_worker(void *arg) */ KASSERT(domain->vmd_segs != 0, ("domain without segments")); + domain->vmd_last_active_scan = ticks; vm_pageout_init_marker(&domain->vmd_marker, PQ_INACTIVE); /* @@ -1641,9 +1651,15 @@ vm_pageout_worker(void *arg) } if (vm_pages_needed) { /* - * Still not done, take a second pass without waiting - * (unlimited dirty cleaning), otherwise sleep a bit - * and try again. + * We're still not done. Either vm_pages_needed was + * set by another thread during the previous scan + * (typically, this happens during a level 0 scan) or + * vm_pages_needed was already set and the scan failed + * to free enough pages. If we haven't yet performed + * a level >= 2 scan (unlimited dirty cleaning), then + * upgrade the level and scan again now. Otherwise, + * sleep a bit and try again later. While sleeping, + * vm_pages_needed can be cleared. */ if (domain->vmd_pass > 1) msleep(&vm_pages_needed, @@ -1654,15 +1670,14 @@ vm_pageout_worker(void *arg) * Good enough, sleep until required to refresh * stats. */ - domain->vmd_pass = 0; msleep(&vm_pages_needed, &vm_page_queue_free_mtx, PVM, "psleep", hz); - } if (vm_pages_needed) { cnt.v_pdwakeups++; domain->vmd_pass++; - } + } else + domain->vmd_pass = 0; mtx_unlock(&vm_page_queue_free_mtx); vm_pageout_scan(domain, domain->vmd_pass); } |