summaryrefslogtreecommitdiffstats
path: root/sys/vm
diff options
context:
space:
mode:
Diffstat (limited to 'sys/vm')
-rw-r--r--sys/vm/vm_kern.c43
-rw-r--r--sys/vm/vm_object.c149
-rw-r--r--sys/vm/vm_object.h1
-rw-r--r--sys/vm/vm_page.c4
-rw-r--r--sys/vm/vm_page.h1
-rw-r--r--sys/vm/vm_pageout.c75
6 files changed, 213 insertions, 60 deletions
diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c
index b97b954..291d0dd 100644
--- a/sys/vm/vm_kern.c
+++ b/sys/vm/vm_kern.c
@@ -159,11 +159,10 @@ kmem_alloc_attr(vmem_t *vmem, vm_size_t size, int flags, vm_paddr_t low,
vm_paddr_t high, vm_memattr_t memattr)
{
vm_object_t object = vmem == kmem_arena ? kmem_object : kernel_object;
- vm_offset_t addr;
+ vm_offset_t addr, i;
vm_ooffset_t offset;
vm_page_t m;
int pflags, tries;
- int i;
size = round_page(size);
if (vmem_alloc(vmem, size, M_BESTFIT | flags, &addr))
@@ -184,18 +183,7 @@ retry:
tries++;
goto retry;
}
- /*
- * Unmap and free the pages.
- */
- if (i != 0)
- pmap_remove(kernel_pmap, addr, addr + i);
- while (i != 0) {
- i -= PAGE_SIZE;
- m = vm_page_lookup(object,
- OFF_TO_IDX(offset + i));
- vm_page_unwire(m, 0);
- vm_page_free(m);
- }
+ kmem_unback(object, addr, i);
vmem_free(vmem, addr, size);
return (0);
}
@@ -353,25 +341,13 @@ retry:
* aren't on any queues.
*/
if (m == NULL) {
+ VM_OBJECT_WUNLOCK(object);
if ((flags & M_NOWAIT) == 0) {
- VM_OBJECT_WUNLOCK(object);
VM_WAIT;
VM_OBJECT_WLOCK(object);
goto retry;
}
- /*
- * Unmap and free the pages.
- */
- if (i != 0)
- pmap_remove(kernel_pmap, addr, addr + i);
- while (i != 0) {
- i -= PAGE_SIZE;
- m = vm_page_lookup(object,
- OFF_TO_IDX(offset + i));
- vm_page_unwire(m, 0);
- vm_page_free(m);
- }
- VM_OBJECT_WUNLOCK(object);
+ kmem_unback(object, addr, i);
return (KERN_NO_SPACE);
}
if (flags & M_ZERO && (m->flags & PG_ZERO) == 0)
@@ -387,6 +363,15 @@ retry:
return (KERN_SUCCESS);
}
+/*
+ * kmem_unback:
+ *
+ * Unmap and free the physical pages underlying the specified virtual
+ * address range.
+ *
+ * A physical page must exist within the specified object at each index
+ * that is being unmapped.
+ */
void
kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size)
{
@@ -396,9 +381,9 @@ kmem_unback(vm_object_t object, vm_offset_t addr, vm_size_t size)
KASSERT(object == kmem_object || object == kernel_object,
("kmem_unback: only supports kernel objects."));
+ pmap_remove(kernel_pmap, addr, addr + size);
offset = addr - VM_MIN_KERNEL_ADDRESS;
VM_OBJECT_WLOCK(object);
- pmap_remove(kernel_pmap, addr, addr + size);
for (i = 0; i < size; i += PAGE_SIZE) {
m = vm_page_lookup(object, OFF_TO_IDX(offset + i));
vm_page_unwire(m, 0);
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
index 36a2ead..9d08714 100644
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@@ -79,6 +79,7 @@ __FBSDID("$FreeBSD$");
#include <sys/socket.h>
#include <sys/resourcevar.h>
#include <sys/rwlock.h>
+#include <sys/user.h>
#include <sys/vnode.h>
#include <sys/vmmeter.h>
#include <sys/sx.h>
@@ -2269,6 +2270,154 @@ next_page:
}
}
+struct vnode *
+vm_object_vnode(vm_object_t object)
+{
+
+ VM_OBJECT_ASSERT_LOCKED(object);
+ if (object->type == OBJT_VNODE)
+ return (object->handle);
+ if (object->type == OBJT_SWAP && (object->flags & OBJ_TMPFS) != 0)
+ return (object->un_pager.swp.swp_tmpfs);
+ return (NULL);
+}
+
+static int
+sysctl_vm_object_list(SYSCTL_HANDLER_ARGS)
+{
+ struct kinfo_vmobject kvo;
+ char *fullpath, *freepath;
+ struct vnode *vp;
+ struct vattr va;
+ vm_object_t obj;
+ vm_page_t m;
+ int count, error;
+
+ if (req->oldptr == NULL) {
+ /*
+ * If an old buffer has not been provided, generate an
+ * estimate of the space needed for a subsequent call.
+ */
+ mtx_lock(&vm_object_list_mtx);
+ count = 0;
+ TAILQ_FOREACH(obj, &vm_object_list, object_list) {
+ if (obj->type == OBJT_DEAD)
+ continue;
+ count++;
+ }
+ mtx_unlock(&vm_object_list_mtx);
+ return (SYSCTL_OUT(req, NULL, sizeof(struct kinfo_vmobject) *
+ count * 11 / 10));
+ }
+
+ error = 0;
+
+ /*
+ * VM objects are type stable and are never removed from the
+ * list once added. This allows us to safely read obj->object_list
+ * after reacquiring the VM object lock.
+ */
+ mtx_lock(&vm_object_list_mtx);
+ TAILQ_FOREACH(obj, &vm_object_list, object_list) {
+ if (obj->type == OBJT_DEAD)
+ continue;
+ VM_OBJECT_RLOCK(obj);
+ if (obj->type == OBJT_DEAD) {
+ VM_OBJECT_RUNLOCK(obj);
+ continue;
+ }
+ mtx_unlock(&vm_object_list_mtx);
+ kvo.kvo_size = ptoa(obj->size);
+ kvo.kvo_resident = obj->resident_page_count;
+ kvo.kvo_ref_count = obj->ref_count;
+ kvo.kvo_shadow_count = obj->shadow_count;
+ kvo.kvo_memattr = obj->memattr;
+ kvo.kvo_active = 0;
+ kvo.kvo_inactive = 0;
+ TAILQ_FOREACH(m, &obj->memq, listq) {
+ /*
+ * A page may belong to the object but be
+ * dequeued and set to PQ_NONE while the
+ * object lock is not held. This makes the
+ * reads of m->queue below racy, and we do not
+ * count pages set to PQ_NONE. However, this
+ * sysctl is only meant to give an
+ * approximation of the system anyway.
+ */
+ if (m->queue == PQ_ACTIVE)
+ kvo.kvo_active++;
+ else if (m->queue == PQ_INACTIVE)
+ kvo.kvo_inactive++;
+ }
+
+ kvo.kvo_vn_fileid = 0;
+ kvo.kvo_vn_fsid = 0;
+ freepath = NULL;
+ fullpath = "";
+ vp = NULL;
+ switch (obj->type) {
+ case OBJT_DEFAULT:
+ kvo.kvo_type = KVME_TYPE_DEFAULT;
+ break;
+ case OBJT_VNODE:
+ kvo.kvo_type = KVME_TYPE_VNODE;
+ vp = obj->handle;
+ vref(vp);
+ break;
+ case OBJT_SWAP:
+ kvo.kvo_type = KVME_TYPE_SWAP;
+ break;
+ case OBJT_DEVICE:
+ kvo.kvo_type = KVME_TYPE_DEVICE;
+ break;
+ case OBJT_PHYS:
+ kvo.kvo_type = KVME_TYPE_PHYS;
+ break;
+ case OBJT_DEAD:
+ kvo.kvo_type = KVME_TYPE_DEAD;
+ break;
+ case OBJT_SG:
+ kvo.kvo_type = KVME_TYPE_SG;
+ break;
+ case OBJT_MGTDEVICE:
+ kvo.kvo_type = KVME_TYPE_MGTDEVICE;
+ break;
+ default:
+ kvo.kvo_type = KVME_TYPE_UNKNOWN;
+ break;
+ }
+ VM_OBJECT_RUNLOCK(obj);
+ if (vp != NULL) {
+ vn_fullpath(curthread, vp, &fullpath, &freepath);
+ vn_lock(vp, LK_SHARED | LK_RETRY);
+ if (VOP_GETATTR(vp, &va, curthread->td_ucred) == 0) {
+ kvo.kvo_vn_fileid = va.va_fileid;
+ kvo.kvo_vn_fsid = va.va_fsid;
+ }
+ vput(vp);
+ }
+
+ strlcpy(kvo.kvo_path, fullpath, sizeof(kvo.kvo_path));
+ if (freepath != NULL)
+ free(freepath, M_TEMP);
+
+ /* Pack record size down */
+ kvo.kvo_structsize = offsetof(struct kinfo_vmobject, kvo_path) +
+ strlen(kvo.kvo_path) + 1;
+ kvo.kvo_structsize = roundup(kvo.kvo_structsize,
+ sizeof(uint64_t));
+ error = SYSCTL_OUT(req, &kvo, kvo.kvo_structsize);
+ mtx_lock(&vm_object_list_mtx);
+ if (error)
+ break;
+ }
+ mtx_unlock(&vm_object_list_mtx);
+ return (error);
+}
+SYSCTL_PROC(_vm, OID_AUTO, objects, CTLTYPE_STRUCT | CTLFLAG_RW | CTLFLAG_SKIP |
+ CTLFLAG_MPSAFE, NULL, 0, sysctl_vm_object_list, "S,kinfo_vmobject",
+ "List of VM objects");
+
#include "opt_ddb.h"
#ifdef DDB
#include <sys/kernel.h>
diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h
index 06111a1..ac8feae 100644
--- a/sys/vm/vm_object.h
+++ b/sys/vm/vm_object.h
@@ -297,6 +297,7 @@ boolean_t vm_object_sync(vm_object_t, vm_ooffset_t, vm_size_t, boolean_t,
boolean_t);
void vm_object_unwire(vm_object_t object, vm_ooffset_t offset,
vm_size_t length, uint8_t queue);
+struct vnode *vm_object_vnode(vm_object_t object);
#endif /* _KERNEL */
#endif /* _VM_OBJECT_ */
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 8024a7c..95bf6ca 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -1710,6 +1710,7 @@ vm_page_alloc_contig_vdrop(struct spglist *lst)
*
* optional allocation flags:
* VM_ALLOC_NOBUSY do not exclusive busy the page
+ * VM_ALLOC_NODUMP do not include the page in a kernel core dump
* VM_ALLOC_NOOBJ page is not associated with an object and
* should not be exclusive busy
* VM_ALLOC_SBUSY shared busy the allocated page
@@ -3009,7 +3010,8 @@ vm_page_set_invalid(vm_page_t m, int base, int size)
bits = VM_PAGE_BITS_ALL;
else
bits = vm_page_bits(base, size);
- if (m->valid == VM_PAGE_BITS_ALL && bits != 0)
+ if (object->ref_count != 0 && m->valid == VM_PAGE_BITS_ALL &&
+ bits != 0)
pmap_remove_all(m);
KASSERT((bits == 0 && m->valid == VM_PAGE_BITS_ALL) ||
!pmap_page_is_mapped(m),
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
index 7a1f944..accf517 100644
--- a/sys/vm/vm_page.h
+++ b/sys/vm/vm_page.h
@@ -227,6 +227,7 @@ struct vm_domain {
long vmd_segs; /* bitmask of the segments */
boolean_t vmd_oom;
int vmd_pass; /* local pagedaemon pass */
+ int vmd_last_active_scan;
struct vm_page vmd_marker; /* marker for pagedaemon private use */
};
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index 6a56fd7..ed80b1b 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -408,10 +408,13 @@ more:
ib = 0;
break;
}
- vm_page_lock(p);
vm_page_test_dirty(p);
- if (p->dirty == 0 ||
- p->queue != PQ_INACTIVE ||
+ if (p->dirty == 0) {
+ ib = 0;
+ break;
+ }
+ vm_page_lock(p);
+ if (p->queue != PQ_INACTIVE ||
p->hold_count != 0) { /* may be undergoing I/O */
vm_page_unlock(p);
ib = 0;
@@ -435,10 +438,11 @@ more:
if ((p = vm_page_next(ps)) == NULL || vm_page_busied(p))
break;
- vm_page_lock(p);
vm_page_test_dirty(p);
- if (p->dirty == 0 ||
- p->queue != PQ_INACTIVE ||
+ if (p->dirty == 0)
+ break;
+ vm_page_lock(p);
+ if (p->queue != PQ_INACTIVE ||
p->hold_count != 0) { /* may be undergoing I/O */
vm_page_unlock(p);
break;
@@ -922,9 +926,10 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
vm_page_t m, next;
struct vm_pagequeue *pq;
vm_object_t object;
+ long min_scan;
int act_delta, addl_page_shortage, deficit, maxscan, page_shortage;
int vnodes_skipped = 0;
- int maxlaunder;
+ int maxlaunder, scan_tick, scanned;
int lockmode;
boolean_t queues_locked;
@@ -1115,9 +1120,11 @@ vm_pageout_scan(struct vm_domain *vmd, int pass)
* then the page may still be modified until the last of those
* mappings are removed.
*/
- vm_page_test_dirty(m);
- if (m->dirty == 0 && object->ref_count != 0)
- pmap_remove_all(m);
+ if (object->ref_count != 0) {
+ vm_page_test_dirty(m);
+ if (m->dirty == 0)
+ pmap_remove_all(m);
+ }
if (m->valid == 0) {
/*
@@ -1353,34 +1360,37 @@ relock_queues:
* If we're just idle polling attempt to visit every
* active page within 'update_period' seconds.
*/
- if (pass == 0 && vm_pageout_update_period != 0) {
- maxscan /= vm_pageout_update_period;
- page_shortage = maxscan;
- }
+ scan_tick = ticks;
+ if (vm_pageout_update_period != 0) {
+ min_scan = pq->pq_cnt;
+ min_scan *= scan_tick - vmd->vmd_last_active_scan;
+ min_scan /= hz * vm_pageout_update_period;
+ } else
+ min_scan = 0;
+ if (min_scan > 0 || (page_shortage > 0 && maxscan > 0))
+ vmd->vmd_last_active_scan = scan_tick;
/*
- * Scan the active queue for things we can deactivate. We nominally
- * track the per-page activity counter and use it to locate
- * deactivation candidates.
+ * Scan the active queue for pages that can be deactivated. Update
+ * the per-page activity counter and use it to identify deactivation
+ * candidates.
*/
- m = TAILQ_FIRST(&pq->pq_pl);
- while (m != NULL && maxscan-- > 0 && page_shortage > 0) {
+ for (m = TAILQ_FIRST(&pq->pq_pl), scanned = 0; m != NULL && (scanned <
+ min_scan || (page_shortage > 0 && scanned < maxscan)); m = next,
+ scanned++) {
KASSERT(m->queue == PQ_ACTIVE,
("vm_pageout_scan: page %p isn't active", m));
next = TAILQ_NEXT(m, plinks.q);
- if ((m->flags & PG_MARKER) != 0) {
- m = next;
+ if ((m->flags & PG_MARKER) != 0)
continue;
- }
KASSERT((m->flags & PG_FICTITIOUS) == 0,
("Fictitious page %p cannot be in active queue", m));
KASSERT((m->oflags & VPO_UNMANAGED) == 0,
("Unmanaged page %p cannot be in active queue", m));
if (!vm_pageout_page_lock(m, &next)) {
vm_page_unlock(m);
- m = next;
continue;
}
@@ -1433,7 +1443,6 @@ relock_queues:
} else
vm_page_requeue_locked(m);
vm_page_unlock(m);
- m = next;
}
vm_pagequeue_unlock(pq);
#if !defined(NO_SWAPPING)
@@ -1621,6 +1630,7 @@ vm_pageout_worker(void *arg)
*/
KASSERT(domain->vmd_segs != 0, ("domain without segments"));
+ domain->vmd_last_active_scan = ticks;
vm_pageout_init_marker(&domain->vmd_marker, PQ_INACTIVE);
/*
@@ -1641,9 +1651,15 @@ vm_pageout_worker(void *arg)
}
if (vm_pages_needed) {
/*
- * Still not done, take a second pass without waiting
- * (unlimited dirty cleaning), otherwise sleep a bit
- * and try again.
+ * We're still not done. Either vm_pages_needed was
+ * set by another thread during the previous scan
+ * (typically, this happens during a level 0 scan) or
+ * vm_pages_needed was already set and the scan failed
+ * to free enough pages. If we haven't yet performed
+ * a level >= 2 scan (unlimited dirty cleaning), then
+ * upgrade the level and scan again now. Otherwise,
+ * sleep a bit and try again later. While sleeping,
+ * vm_pages_needed can be cleared.
*/
if (domain->vmd_pass > 1)
msleep(&vm_pages_needed,
@@ -1654,15 +1670,14 @@ vm_pageout_worker(void *arg)
* Good enough, sleep until required to refresh
* stats.
*/
- domain->vmd_pass = 0;
msleep(&vm_pages_needed, &vm_page_queue_free_mtx,
PVM, "psleep", hz);
-
}
if (vm_pages_needed) {
cnt.v_pdwakeups++;
domain->vmd_pass++;
- }
+ } else
+ domain->vmd_pass = 0;
mtx_unlock(&vm_page_queue_free_mtx);
vm_pageout_scan(domain, domain->vmd_pass);
}
OpenPOWER on IntegriCloud