summaryrefslogtreecommitdiffstats
path: root/sys/vm
diff options
context:
space:
mode:
authorattilio <attilio@FreeBSD.org>2012-08-03 15:58:05 +0000
committerattilio <attilio@FreeBSD.org>2012-08-03 15:58:05 +0000
commitc52a057b1917918a0faad18ccf6666c4cdcb5c8c (patch)
tree21862df57c9f0eef84636c11cb5c3ecf3d578893 /sys/vm
parent675a214708d3e1fb79cbcf6bd8a46e0f1cc4823d (diff)
parent8916b8f9033faa7d822c84a2bdd638979f11fab1 (diff)
downloadFreeBSD-src-c52a057b1917918a0faad18ccf6666c4cdcb5c8c.zip
FreeBSD-src-c52a057b1917918a0faad18ccf6666c4cdcb5c8c.tar.gz
MFC
Diffstat (limited to 'sys/vm')
-rw-r--r--sys/vm/memguard.c26
-rw-r--r--sys/vm/memguard.h2
-rw-r--r--sys/vm/vm_contig.c351
-rw-r--r--sys/vm/vm_kern.c142
-rw-r--r--sys/vm/vm_map.h4
-rw-r--r--sys/vm/vm_page.c59
-rw-r--r--sys/vm/vm_page.h87
-rw-r--r--sys/vm/vm_pageout.c208
-rw-r--r--sys/vm/vm_pageout.h4
-rw-r--r--sys/vm/vm_reserv.c2
10 files changed, 416 insertions, 469 deletions
diff --git a/sys/vm/memguard.c b/sys/vm/memguard.c
index 5a690e7..b1740c3 100644
--- a/sys/vm/memguard.c
+++ b/sys/vm/memguard.c
@@ -159,16 +159,18 @@ SYSCTL_ULONG(_vm_memguard, OID_AUTO, frequency_hits, CTLFLAG_RD,
* the kmem_map. The memguard memory will be a submap.
*/
unsigned long
-memguard_fudge(unsigned long km_size, unsigned long km_max)
+memguard_fudge(unsigned long km_size, const struct vm_map *parent_map)
{
- u_long mem_pgs = cnt.v_page_count;
+ u_long mem_pgs, parent_size;
vm_memguard_divisor = 10;
TUNABLE_INT_FETCH("vm.memguard.divisor", &vm_memguard_divisor);
+ parent_size = vm_map_max(parent_map) - vm_map_min(parent_map) +
+ PAGE_SIZE;
/* Pick a conservative value if provided value sucks. */
if ((vm_memguard_divisor <= 0) ||
- ((km_size / vm_memguard_divisor) == 0))
+ ((parent_size / vm_memguard_divisor) == 0))
vm_memguard_divisor = 10;
/*
* Limit consumption of physical pages to
@@ -177,21 +179,19 @@ memguard_fudge(unsigned long km_size, unsigned long km_max)
* This prevents memguard's page promotions from completely
* using up memory, since most malloc(9) calls are sub-page.
*/
+ mem_pgs = cnt.v_page_count;
memguard_physlimit = (mem_pgs / vm_memguard_divisor) * PAGE_SIZE;
/*
* We want as much KVA as we can take safely. Use at most our
- * allotted fraction of kmem_max. Limit this to twice the
- * physical memory to avoid using too much memory as pagetable
- * pages.
+ * allotted fraction of the parent map's size. Limit this to
+ * twice the physical memory to avoid using too much memory as
+ * pagetable pages (size must be multiple of PAGE_SIZE).
*/
- memguard_mapsize = km_max / vm_memguard_divisor;
- /* size must be multiple of PAGE_SIZE */
- memguard_mapsize = round_page(memguard_mapsize);
- if (memguard_mapsize == 0 ||
- memguard_mapsize / (2 * PAGE_SIZE) > mem_pgs)
+ memguard_mapsize = round_page(parent_size / vm_memguard_divisor);
+ if (memguard_mapsize / (2 * PAGE_SIZE) > mem_pgs)
memguard_mapsize = mem_pgs * 2 * PAGE_SIZE;
- if (km_max > 0 && km_size + memguard_mapsize > km_max)
- return (km_max);
+ if (km_size + memguard_mapsize > parent_size)
+ memguard_mapsize = 0;
return (km_size + memguard_mapsize);
}
diff --git a/sys/vm/memguard.h b/sys/vm/memguard.h
index 335e237..9ec4ffd 100644
--- a/sys/vm/memguard.h
+++ b/sys/vm/memguard.h
@@ -35,7 +35,7 @@ struct malloc_type;
struct vm_map;
#ifdef DEBUG_MEMGUARD
-unsigned long memguard_fudge(unsigned long, unsigned long);
+unsigned long memguard_fudge(unsigned long, const struct vm_map *);
void memguard_init(struct vm_map *);
void *memguard_alloc(unsigned long, int);
void *memguard_realloc(void *, unsigned long, struct malloc_type *, int);
diff --git a/sys/vm/vm_contig.c b/sys/vm/vm_contig.c
deleted file mode 100644
index e29aaf7..0000000
--- a/sys/vm/vm_contig.c
+++ /dev/null
@@ -1,351 +0,0 @@
-/*-
- * Copyright (c) 1991 Regents of the University of California.
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * The Mach Operating System project at Carnegie-Mellon University.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- * notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- * notice, this list of conditions and the following disclaimer in the
- * documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- * may be used to endorse or promote products derived from this software
- * without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91
- */
-
-/*-
- * Copyright (c) 1987, 1990 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Authors: Avadis Tevanian, Jr., Michael Wayne Young
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU
- * School of Computer Science
- * Carnegie Mellon University
- * Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/lock.h>
-#include <sys/mount.h>
-#include <sys/mutex.h>
-#include <sys/proc.h>
-#include <sys/kernel.h>
-#include <sys/sysctl.h>
-#include <sys/vmmeter.h>
-#include <sys/vnode.h>
-
-#include <vm/vm.h>
-#include <vm/vm_param.h>
-#include <vm/vm_kern.h>
-#include <vm/pmap.h>
-#include <vm/vm_map.h>
-#include <vm/vm_object.h>
-#include <vm/vm_page.h>
-#include <vm/vm_pageout.h>
-#include <vm/vm_pager.h>
-#include <vm/vm_extern.h>
-
-static int
-vm_contig_launder_page(vm_page_t m, vm_page_t *next)
-{
- vm_object_t object;
- vm_page_t m_tmp;
- struct vnode *vp;
- struct mount *mp;
- int vfslocked;
-
- mtx_assert(&vm_page_queue_mtx, MA_OWNED);
- vm_page_lock_assert(m, MA_OWNED);
- object = m->object;
- if (!VM_OBJECT_TRYLOCK(object) &&
- (!vm_pageout_fallback_object_lock(m, next) || m->hold_count != 0)) {
- vm_page_unlock(m);
- VM_OBJECT_UNLOCK(object);
- return (EAGAIN);
- }
- if (vm_page_sleep_if_busy(m, TRUE, "vpctw0")) {
- VM_OBJECT_UNLOCK(object);
- vm_page_lock_queues();
- return (EBUSY);
- }
- vm_page_test_dirty(m);
- if (m->dirty == 0)
- pmap_remove_all(m);
- if (m->dirty != 0) {
- vm_page_unlock(m);
- if ((object->flags & OBJ_DEAD) != 0) {
- VM_OBJECT_UNLOCK(object);
- return (EAGAIN);
- }
- if (object->type == OBJT_VNODE) {
- vm_page_unlock_queues();
- vp = object->handle;
- vm_object_reference_locked(object);
- VM_OBJECT_UNLOCK(object);
- (void) vn_start_write(vp, &mp, V_WAIT);
- vfslocked = VFS_LOCK_GIANT(vp->v_mount);
- vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
- VM_OBJECT_LOCK(object);
- vm_object_page_clean(object, 0, 0, OBJPC_SYNC);
- VM_OBJECT_UNLOCK(object);
- VOP_UNLOCK(vp, 0);
- VFS_UNLOCK_GIANT(vfslocked);
- vm_object_deallocate(object);
- vn_finished_write(mp);
- vm_page_lock_queues();
- return (0);
- } else if (object->type == OBJT_SWAP ||
- object->type == OBJT_DEFAULT) {
- vm_page_unlock_queues();
- m_tmp = m;
- vm_pageout_flush(&m_tmp, 1, VM_PAGER_PUT_SYNC, 0,
- NULL, NULL);
- VM_OBJECT_UNLOCK(object);
- vm_page_lock_queues();
- return (0);
- }
- } else {
- vm_page_cache(m);
- vm_page_unlock(m);
- }
- VM_OBJECT_UNLOCK(object);
- return (0);
-}
-
-static int
-vm_contig_launder(int queue, vm_paddr_t low, vm_paddr_t high)
-{
- vm_page_t m, next;
- vm_paddr_t pa;
- int error;
-
- TAILQ_FOREACH_SAFE(m, &vm_page_queues[queue].pl, pageq, next) {
-
- /* Skip marker pages */
- if ((m->flags & PG_MARKER) != 0)
- continue;
-
- pa = VM_PAGE_TO_PHYS(m);
- if (pa < low || pa + PAGE_SIZE > high)
- continue;
-
- if (!vm_pageout_page_lock(m, &next) || m->hold_count != 0) {
- vm_page_unlock(m);
- continue;
- }
- KASSERT(m->queue == queue,
- ("vm_contig_launder: page %p's queue is not %d", m, queue));
- error = vm_contig_launder_page(m, &next);
- vm_page_lock_assert(m, MA_NOTOWNED);
- if (error == 0)
- return (TRUE);
- if (error == EBUSY)
- return (FALSE);
- }
- return (FALSE);
-}
-
-/*
- * Increase the number of cached pages.
- */
-void
-vm_contig_grow_cache(int tries, vm_paddr_t low, vm_paddr_t high)
-{
- int actl, actmax, inactl, inactmax;
-
- vm_page_lock_queues();
- inactl = 0;
- inactmax = tries < 1 ? 0 : cnt.v_inactive_count;
- actl = 0;
- actmax = tries < 2 ? 0 : cnt.v_active_count;
-again:
- if (inactl < inactmax && vm_contig_launder(PQ_INACTIVE, low, high)) {
- inactl++;
- goto again;
- }
- if (actl < actmax && vm_contig_launder(PQ_ACTIVE, low, high)) {
- actl++;
- goto again;
- }
- vm_page_unlock_queues();
-}
-
-/*
- * Allocates a region from the kernel address map and pages within the
- * specified physical address range to the kernel object, creates a wired
- * mapping from the region to these pages, and returns the region's starting
- * virtual address. The allocated pages are not necessarily physically
- * contiguous. If M_ZERO is specified through the given flags, then the pages
- * are zeroed before they are mapped.
- */
-vm_offset_t
-kmem_alloc_attr(vm_map_t map, vm_size_t size, int flags, vm_paddr_t low,
- vm_paddr_t high, vm_memattr_t memattr)
-{
- vm_object_t object = kernel_object;
- vm_offset_t addr;
- vm_ooffset_t end_offset, offset;
- vm_page_t m;
- int pflags, tries;
-
- size = round_page(size);
- vm_map_lock(map);
- if (vm_map_findspace(map, vm_map_min(map), size, &addr)) {
- vm_map_unlock(map);
- return (0);
- }
- offset = addr - VM_MIN_KERNEL_ADDRESS;
- vm_object_reference(object);
- vm_map_insert(map, object, offset, addr, addr + size, VM_PROT_ALL,
- VM_PROT_ALL, 0);
- if ((flags & (M_NOWAIT | M_USE_RESERVE)) == M_NOWAIT)
- pflags = VM_ALLOC_INTERRUPT | VM_ALLOC_NOBUSY;
- else
- pflags = VM_ALLOC_SYSTEM | VM_ALLOC_NOBUSY;
- if (flags & M_ZERO)
- pflags |= VM_ALLOC_ZERO;
- VM_OBJECT_LOCK(object);
- end_offset = offset + size;
- for (; offset < end_offset; offset += PAGE_SIZE) {
- tries = 0;
-retry:
- m = vm_page_alloc_contig(object, OFF_TO_IDX(offset), pflags, 1,
- low, high, PAGE_SIZE, 0, memattr);
- if (m == NULL) {
- VM_OBJECT_UNLOCK(object);
- if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) {
- vm_map_unlock(map);
- vm_contig_grow_cache(tries, low, high);
- vm_map_lock(map);
- VM_OBJECT_LOCK(object);
- tries++;
- goto retry;
- }
- /*
- * Since the pages that were allocated by any previous
- * iterations of this loop are not busy, they can be
- * freed by vm_object_page_remove(), which is called
- * by vm_map_delete().
- */
- vm_map_delete(map, addr, addr + size);
- vm_map_unlock(map);
- return (0);
- }
- if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0)
- pmap_zero_page(m);
- m->valid = VM_PAGE_BITS_ALL;
- }
- VM_OBJECT_UNLOCK(object);
- vm_map_unlock(map);
- vm_map_wire(map, addr, addr + size, VM_MAP_WIRE_SYSTEM |
- VM_MAP_WIRE_NOHOLES);
- return (addr);
-}
-
-/*
- * Allocates a region from the kernel address map, inserts the
- * given physically contiguous pages into the kernel object,
- * creates a wired mapping from the region to the pages, and
- * returns the region's starting virtual address. If M_ZERO is
- * specified through the given flags, then the pages are zeroed
- * before they are mapped.
- */
-vm_offset_t
-kmem_alloc_contig(vm_map_t map, vm_size_t size, int flags, vm_paddr_t low,
- vm_paddr_t high, u_long alignment, vm_paddr_t boundary,
- vm_memattr_t memattr)
-{
- vm_object_t object = kernel_object;
- vm_offset_t addr;
- vm_ooffset_t offset;
- vm_page_t end_m, m;
- int pflags, tries;
-
- size = round_page(size);
- vm_map_lock(map);
- if (vm_map_findspace(map, vm_map_min(map), size, &addr)) {
- vm_map_unlock(map);
- return (0);
- }
- offset = addr - VM_MIN_KERNEL_ADDRESS;
- vm_object_reference(object);
- vm_map_insert(map, object, offset, addr, addr + size, VM_PROT_ALL,
- VM_PROT_ALL, 0);
- if ((flags & (M_NOWAIT | M_USE_RESERVE)) == M_NOWAIT)
- pflags = VM_ALLOC_INTERRUPT | VM_ALLOC_NOBUSY;
- else
- pflags = VM_ALLOC_SYSTEM | VM_ALLOC_NOBUSY;
- if (flags & M_ZERO)
- pflags |= VM_ALLOC_ZERO;
- if (flags & M_NODUMP)
- pflags |= VM_ALLOC_NODUMP;
- VM_OBJECT_LOCK(object);
- tries = 0;
-retry:
- m = vm_page_alloc_contig(object, OFF_TO_IDX(offset), pflags,
- atop(size), low, high, alignment, boundary, memattr);
- if (m == NULL) {
- VM_OBJECT_UNLOCK(object);
- if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) {
- vm_map_unlock(map);
- vm_contig_grow_cache(tries, low, high);
- vm_map_lock(map);
- VM_OBJECT_LOCK(object);
- tries++;
- goto retry;
- }
- vm_map_delete(map, addr, addr + size);
- vm_map_unlock(map);
- return (0);
- }
- end_m = m + atop(size);
- for (; m < end_m; m++) {
- if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0)
- pmap_zero_page(m);
- m->valid = VM_PAGE_BITS_ALL;
- }
- VM_OBJECT_UNLOCK(object);
- vm_map_unlock(map);
- vm_map_wire(map, addr, addr + size, VM_MAP_WIRE_SYSTEM |
- VM_MAP_WIRE_NOHOLES);
- return (addr);
-}
diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c
index 5e157a6..46e7f1c 100644
--- a/sys/vm/vm_kern.c
+++ b/sys/vm/vm_kern.c
@@ -195,6 +195,148 @@ kmem_alloc(map, size)
}
/*
+ * Allocates a region from the kernel address map and physical pages
+ * within the specified address range to the kernel object. Creates a
+ * wired mapping from this region to these pages, and returns the
+ * region's starting virtual address. The allocated pages are not
+ * necessarily physically contiguous. If M_ZERO is specified through the
+ * given flags, then the pages are zeroed before they are mapped.
+ */
+vm_offset_t
+kmem_alloc_attr(vm_map_t map, vm_size_t size, int flags, vm_paddr_t low,
+ vm_paddr_t high, vm_memattr_t memattr)
+{
+ vm_object_t object = kernel_object;
+ vm_offset_t addr;
+ vm_ooffset_t end_offset, offset;
+ vm_page_t m;
+ int pflags, tries;
+
+ size = round_page(size);
+ vm_map_lock(map);
+ if (vm_map_findspace(map, vm_map_min(map), size, &addr)) {
+ vm_map_unlock(map);
+ return (0);
+ }
+ offset = addr - VM_MIN_KERNEL_ADDRESS;
+ vm_object_reference(object);
+ vm_map_insert(map, object, offset, addr, addr + size, VM_PROT_ALL,
+ VM_PROT_ALL, 0);
+ if ((flags & (M_NOWAIT | M_USE_RESERVE)) == M_NOWAIT)
+ pflags = VM_ALLOC_INTERRUPT | VM_ALLOC_NOBUSY;
+ else
+ pflags = VM_ALLOC_SYSTEM | VM_ALLOC_NOBUSY;
+ if (flags & M_ZERO)
+ pflags |= VM_ALLOC_ZERO;
+ VM_OBJECT_LOCK(object);
+ end_offset = offset + size;
+ for (; offset < end_offset; offset += PAGE_SIZE) {
+ tries = 0;
+retry:
+ m = vm_page_alloc_contig(object, OFF_TO_IDX(offset), pflags, 1,
+ low, high, PAGE_SIZE, 0, memattr);
+ if (m == NULL) {
+ VM_OBJECT_UNLOCK(object);
+ if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) {
+ vm_map_unlock(map);
+ vm_pageout_grow_cache(tries, low, high);
+ vm_map_lock(map);
+ VM_OBJECT_LOCK(object);
+ tries++;
+ goto retry;
+ }
+
+ /*
+ * Since the pages that were allocated by any previous
+ * iterations of this loop are not busy, they can be
+ * freed by vm_object_page_remove(), which is called
+ * by vm_map_delete().
+ */
+ vm_map_delete(map, addr, addr + size);
+ vm_map_unlock(map);
+ return (0);
+ }
+ if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0)
+ pmap_zero_page(m);
+ m->valid = VM_PAGE_BITS_ALL;
+ }
+ VM_OBJECT_UNLOCK(object);
+ vm_map_unlock(map);
+ vm_map_wire(map, addr, addr + size, VM_MAP_WIRE_SYSTEM |
+ VM_MAP_WIRE_NOHOLES);
+ return (addr);
+}
+
+/*
+ * Allocates a region from the kernel address map and physically
+ * contiguous pages within the specified address range to the kernel
+ * object. Creates a wired mapping from this region to these pages, and
+ * returns the region's starting virtual address. If M_ZERO is specified
+ * through the given flags, then the pages are zeroed before they are
+ * mapped.
+ */
+vm_offset_t
+kmem_alloc_contig(vm_map_t map, vm_size_t size, int flags, vm_paddr_t low,
+ vm_paddr_t high, u_long alignment, vm_paddr_t boundary,
+ vm_memattr_t memattr)
+{
+ vm_object_t object = kernel_object;
+ vm_offset_t addr;
+ vm_ooffset_t offset;
+ vm_page_t end_m, m;
+ int pflags, tries;
+
+ size = round_page(size);
+ vm_map_lock(map);
+ if (vm_map_findspace(map, vm_map_min(map), size, &addr)) {
+ vm_map_unlock(map);
+ return (0);
+ }
+ offset = addr - VM_MIN_KERNEL_ADDRESS;
+ vm_object_reference(object);
+ vm_map_insert(map, object, offset, addr, addr + size, VM_PROT_ALL,
+ VM_PROT_ALL, 0);
+ if ((flags & (M_NOWAIT | M_USE_RESERVE)) == M_NOWAIT)
+ pflags = VM_ALLOC_INTERRUPT | VM_ALLOC_NOBUSY;
+ else
+ pflags = VM_ALLOC_SYSTEM | VM_ALLOC_NOBUSY;
+ if (flags & M_ZERO)
+ pflags |= VM_ALLOC_ZERO;
+ if (flags & M_NODUMP)
+ pflags |= VM_ALLOC_NODUMP;
+ VM_OBJECT_LOCK(object);
+ tries = 0;
+retry:
+ m = vm_page_alloc_contig(object, OFF_TO_IDX(offset), pflags,
+ atop(size), low, high, alignment, boundary, memattr);
+ if (m == NULL) {
+ VM_OBJECT_UNLOCK(object);
+ if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) {
+ vm_map_unlock(map);
+ vm_pageout_grow_cache(tries, low, high);
+ vm_map_lock(map);
+ VM_OBJECT_LOCK(object);
+ tries++;
+ goto retry;
+ }
+ vm_map_delete(map, addr, addr + size);
+ vm_map_unlock(map);
+ return (0);
+ }
+ end_m = m + atop(size);
+ for (; m < end_m; m++) {
+ if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0)
+ pmap_zero_page(m);
+ m->valid = VM_PAGE_BITS_ALL;
+ }
+ VM_OBJECT_UNLOCK(object);
+ vm_map_unlock(map);
+ vm_map_wire(map, addr, addr + size, VM_MAP_WIRE_SYSTEM |
+ VM_MAP_WIRE_NOHOLES);
+ return (addr);
+}
+
+/*
* kmem_free:
*
* Release a region of kernel virtual memory allocated
diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h
index 056eac5..b3b1ad4 100644
--- a/sys/vm/vm_map.h
+++ b/sys/vm/vm_map.h
@@ -200,13 +200,13 @@ struct vm_map {
#ifdef _KERNEL
static __inline vm_offset_t
-vm_map_max(vm_map_t map)
+vm_map_max(const struct vm_map *map)
{
return (map->max_offset);
}
static __inline vm_offset_t
-vm_map_min(vm_map_t map)
+vm_map_min(const struct vm_map *map)
{
return (map->min_offset);
}
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index ec96135..ccadd8d 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -451,63 +451,6 @@ vm_page_startup(vm_offset_t vaddr)
return (vaddr);
}
-
-CTASSERT(offsetof(struct vm_page, aflags) % sizeof(uint32_t) == 0);
-
-void
-vm_page_aflag_set(vm_page_t m, uint8_t bits)
-{
- uint32_t *addr, val;
-
- /*
- * The PGA_WRITEABLE flag can only be set if the page is managed and
- * VPO_BUSY. Currently, this flag is only set by pmap_enter().
- */
- KASSERT((bits & PGA_WRITEABLE) == 0 ||
- (m->oflags & (VPO_UNMANAGED | VPO_BUSY)) == VPO_BUSY,
- ("PGA_WRITEABLE and !VPO_BUSY"));
-
- /*
- * We want to use atomic updates for m->aflags, which is a
- * byte wide. Not all architectures provide atomic operations
- * on the single-byte destination. Punt and access the whole
- * 4-byte word with an atomic update. Parallel non-atomic
- * updates to the fields included in the update by proximity
- * are handled properly by atomics.
- */
- addr = (void *)&m->aflags;
- MPASS(((uintptr_t)addr & (sizeof(uint32_t) - 1)) == 0);
- val = bits;
-#if BYTE_ORDER == BIG_ENDIAN
- val <<= 24;
-#endif
- atomic_set_32(addr, val);
-}
-
-void
-vm_page_aflag_clear(vm_page_t m, uint8_t bits)
-{
- uint32_t *addr, val;
-
- /*
- * The PGA_REFERENCED flag can only be cleared if the object
- * containing the page is locked.
- */
- KASSERT((bits & PGA_REFERENCED) == 0 || VM_OBJECT_LOCKED(m->object),
- ("PGA_REFERENCED and !VM_OBJECT_LOCKED"));
-
- /*
- * See the comment in vm_page_aflag_set().
- */
- addr = (void *)&m->aflags;
- MPASS(((uintptr_t)addr & (sizeof(uint32_t) - 1)) == 0);
- val = bits;
-#if BYTE_ORDER == BIG_ENDIAN
- val <<= 24;
-#endif
- atomic_clear_32(addr, val);
-}
-
void
vm_page_reference(vm_page_t m)
{
@@ -1480,7 +1423,7 @@ retry:
cpindex = pindex;
for (m = m_ret; m < &m_ret[npages]; m++) {
m->aflags = 0;
- m->flags &= flags;
+ m->flags = (m->flags | PG_NODUMP) & flags;
if ((req & VM_ALLOC_WIRED) != 0)
m->wire_count = 1;
/* Unmanaged pages don't use "act_count". */
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
index da7fd89..e95f173 100644
--- a/sys/vm/vm_page.h
+++ b/sys/vm/vm_page.h
@@ -237,13 +237,14 @@ extern struct vpglocks pa_lock[];
#define vm_page_queue_free_mtx vm_page_queue_free_lock.data
/*
- * These are the flags defined for vm_page.
- *
- * aflags are updated by atomic accesses. Use the vm_page_aflag_set()
- * and vm_page_aflag_clear() functions to set and clear the flags.
+ * The vm_page's aflags are updated using atomic operations. To set or clear
+ * these flags, the functions vm_page_aflag_set() and vm_page_aflag_clear()
+ * must be used. Neither these flags nor these functions are part of the KBI.
*
* PGA_REFERENCED may be cleared only if the object containing the page is
- * locked. It is set by both the MI and MD VM layers.
+ * locked. It is set by both the MI and MD VM layers. However, kernel
+ * loadable modules should not directly set this flag. They should call
+ * vm_page_reference() instead.
*
* PGA_WRITEABLE is set exclusively on managed pages by pmap_enter(). When it
* does so, the page must be VPO_BUSY. The MI VM layer must never access this
@@ -279,8 +280,12 @@ extern struct vpglocks pa_lock[];
#ifdef _KERNEL
+#include <sys/systm.h>
+
#include <vm/vm_param.h>
+#include <machine/atomic.h>
+
/*
* Each pageable resident page falls into one of five lists:
*
@@ -308,7 +313,6 @@ extern struct vpglocks pa_lock[];
*
*/
-struct vnode;
extern int vm_page_zero_count;
extern vm_page_t vm_page_array; /* First resident page in table */
@@ -348,8 +352,6 @@ extern struct vpglocks vm_page_queue_lock;
#define VM_ALLOC_COUNT_SHIFT 16
#define VM_ALLOC_COUNT(count) ((count) << VM_ALLOC_COUNT_SHIFT)
-void vm_page_aflag_set(vm_page_t m, uint8_t bits);
-void vm_page_aflag_clear(vm_page_t m, uint8_t bits);
void vm_page_busy(vm_page_t m);
void vm_page_flash(vm_page_t m);
void vm_page_io_start(vm_page_t m);
@@ -425,6 +427,75 @@ void vm_page_object_lock_assert(vm_page_t m);
#endif
/*
+ * We want to use atomic updates for the aflags field, which is 8 bits wide.
+ * However, not all architectures support atomic operations on 8-bit
+ * destinations. In order that we can easily use a 32-bit operation, we
+ * require that the aflags field be 32-bit aligned.
+ */
+CTASSERT(offsetof(struct vm_page, aflags) % sizeof(uint32_t) == 0);
+
+/*
+ * Clear the given bits in the specified page.
+ */
+static inline void
+vm_page_aflag_clear(vm_page_t m, uint8_t bits)
+{
+ uint32_t *addr, val;
+
+ /*
+ * The PGA_REFERENCED flag can only be cleared if the object
+ * containing the page is locked.
+ */
+ if ((bits & PGA_REFERENCED) != 0)
+ VM_PAGE_OBJECT_LOCK_ASSERT(m);
+
+ /*
+ * Access the whole 32-bit word containing the aflags field with an
+ * atomic update. Parallel non-atomic updates to the other fields
+ * within this word are handled properly by the atomic update.
+ */
+ addr = (void *)&m->aflags;
+ KASSERT(((uintptr_t)addr & (sizeof(uint32_t) - 1)) == 0,
+ ("vm_page_aflag_clear: aflags is misaligned"));
+ val = bits;
+#if BYTE_ORDER == BIG_ENDIAN
+ val <<= 24;
+#endif
+ atomic_clear_32(addr, val);
+}
+
+/*
+ * Set the given bits in the specified page.
+ */
+static inline void
+vm_page_aflag_set(vm_page_t m, uint8_t bits)
+{
+ uint32_t *addr, val;
+
+ /*
+ * The PGA_WRITEABLE flag can only be set if the page is managed and
+ * VPO_BUSY. Currently, this flag is only set by pmap_enter().
+ */
+ KASSERT((bits & PGA_WRITEABLE) == 0 ||
+ (m->oflags & (VPO_UNMANAGED | VPO_BUSY)) == VPO_BUSY,
+ ("vm_page_aflag_set: PGA_WRITEABLE and !VPO_BUSY"));
+
+ /*
+ * Access the whole 32-bit word containing the aflags field with an
+ * atomic update. Parallel non-atomic updates to the other fields
+ * within this word are handled properly by the atomic update.
+ */
+ addr = (void *)&m->aflags;
+ KASSERT(((uintptr_t)addr & (sizeof(uint32_t) - 1)) == 0,
+ ("vm_page_aflag_set: aflags is misaligned"));
+ val = bits;
+#if BYTE_ORDER == BIG_ENDIAN
+ val <<= 24;
+#endif
+ atomic_set_32(addr, val);
+}
+
+/*
* vm_page_dirty:
*
* Set all bits in the page's dirty field.
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index 11d040d..3994ce1 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -209,11 +209,14 @@ int vm_page_max_wired; /* XXX max # of wired pages system-wide */
SYSCTL_INT(_vm, OID_AUTO, max_wired,
CTLFLAG_RW, &vm_page_max_wired, 0, "System-wide limit to wired page count");
+static boolean_t vm_pageout_fallback_object_lock(vm_page_t, vm_page_t *);
+static boolean_t vm_pageout_launder(int, int, vm_paddr_t, vm_paddr_t);
#if !defined(NO_SWAPPING)
static void vm_pageout_map_deactivate_pages(vm_map_t, long);
static void vm_pageout_object_deactivate_pages(pmap_t, vm_object_t, long);
static void vm_req_vmdaemon(int req);
#endif
+static boolean_t vm_pageout_page_lock(vm_page_t, vm_page_t *);
static void vm_pageout_page_stats(void);
/*
@@ -247,7 +250,7 @@ vm_pageout_init_marker(vm_page_t marker, u_short queue)
* This function depends on both the lock portion of struct vm_object
* and normal struct vm_page being type stable.
*/
-boolean_t
+static boolean_t
vm_pageout_fallback_object_lock(vm_page_t m, vm_page_t *next)
{
struct vm_page marker;
@@ -286,7 +289,7 @@ vm_pageout_fallback_object_lock(vm_page_t m, vm_page_t *next)
*
* This function depends on normal struct vm_page being type stable.
*/
-boolean_t
+static boolean_t
vm_pageout_page_lock(vm_page_t m, vm_page_t *next)
{
struct vm_page marker;
@@ -558,6 +561,138 @@ vm_pageout_flush(vm_page_t *mc, int count, int flags, int mreq, int *prunlen,
return (numpagedout);
}
+static boolean_t
+vm_pageout_launder(int queue, int tries, vm_paddr_t low, vm_paddr_t high)
+{
+ struct mount *mp;
+ struct vnode *vp;
+ vm_object_t object;
+ vm_paddr_t pa;
+ vm_page_t m, m_tmp, next;
+ int vfslocked;
+
+ vm_page_lock_queues();
+ TAILQ_FOREACH_SAFE(m, &vm_page_queues[queue].pl, pageq, next) {
+ KASSERT(m->queue == queue,
+ ("vm_pageout_launder: page %p's queue is not %d", m,
+ queue));
+ if ((m->flags & PG_MARKER) != 0)
+ continue;
+ pa = VM_PAGE_TO_PHYS(m);
+ if (pa < low || pa + PAGE_SIZE > high)
+ continue;
+ if (!vm_pageout_page_lock(m, &next) || m->hold_count != 0) {
+ vm_page_unlock(m);
+ continue;
+ }
+ object = m->object;
+ if (!VM_OBJECT_TRYLOCK(object) &&
+ (!vm_pageout_fallback_object_lock(m, &next) ||
+ m->hold_count != 0)) {
+ vm_page_unlock(m);
+ VM_OBJECT_UNLOCK(object);
+ continue;
+ }
+ if ((m->oflags & VPO_BUSY) != 0 || m->busy != 0) {
+ if (tries == 0) {
+ vm_page_unlock(m);
+ VM_OBJECT_UNLOCK(object);
+ continue;
+ }
+ vm_page_sleep(m, "vpctw0");
+ VM_OBJECT_UNLOCK(object);
+ return (FALSE);
+ }
+ vm_page_test_dirty(m);
+ if (m->dirty == 0)
+ pmap_remove_all(m);
+ if (m->dirty != 0) {
+ vm_page_unlock(m);
+ if (tries == 0 || (object->flags & OBJ_DEAD) != 0) {
+ VM_OBJECT_UNLOCK(object);
+ continue;
+ }
+ if (object->type == OBJT_VNODE) {
+ vm_page_unlock_queues();
+ vp = object->handle;
+ vm_object_reference_locked(object);
+ VM_OBJECT_UNLOCK(object);
+ (void)vn_start_write(vp, &mp, V_WAIT);
+ vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+ vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+ VM_OBJECT_LOCK(object);
+ vm_object_page_clean(object, 0, 0, OBJPC_SYNC);
+ VM_OBJECT_UNLOCK(object);
+ VOP_UNLOCK(vp, 0);
+ VFS_UNLOCK_GIANT(vfslocked);
+ vm_object_deallocate(object);
+ vn_finished_write(mp);
+ return (TRUE);
+ } else if (object->type == OBJT_SWAP ||
+ object->type == OBJT_DEFAULT) {
+ vm_page_unlock_queues();
+ m_tmp = m;
+ vm_pageout_flush(&m_tmp, 1, VM_PAGER_PUT_SYNC,
+ 0, NULL, NULL);
+ VM_OBJECT_UNLOCK(object);
+ return (TRUE);
+ }
+ } else {
+ vm_page_cache(m);
+ vm_page_unlock(m);
+ }
+ VM_OBJECT_UNLOCK(object);
+ }
+ vm_page_unlock_queues();
+ return (FALSE);
+}
+
+/*
+ * Increase the number of cached pages. The specified value, "tries",
+ * determines which categories of pages are cached:
+ *
+ * 0: All clean, inactive pages within the specified physical address range
+ * are cached. Will not sleep.
+ * 1: The vm_lowmem handlers are called. All inactive pages within
+ * the specified physical address range are cached. May sleep.
+ * 2: The vm_lowmem handlers are called. All inactive and active pages
+ * within the specified physical address range are cached. May sleep.
+ */
+void
+vm_pageout_grow_cache(int tries, vm_paddr_t low, vm_paddr_t high)
+{
+ int actl, actmax, inactl, inactmax;
+
+ if (tries > 0) {
+ /*
+ * Decrease registered cache sizes. The vm_lowmem handlers
+ * may acquire locks and/or sleep, so they can only be invoked
+ * when "tries" is greater than zero.
+ */
+ EVENTHANDLER_INVOKE(vm_lowmem, 0);
+
+ /*
+ * We do this explicitly after the caches have been drained
+ * above.
+ */
+ uma_reclaim();
+ }
+ inactl = 0;
+ inactmax = cnt.v_inactive_count;
+ actl = 0;
+ actmax = tries < 2 ? 0 : cnt.v_active_count;
+again:
+ if (inactl < inactmax && vm_pageout_launder(PQ_INACTIVE, tries, low,
+ high)) {
+ inactl++;
+ goto again;
+ }
+ if (actl < actmax && vm_pageout_launder(PQ_ACTIVE, tries, low, high)) {
+ actl++;
+ goto again;
+ }
+}
+
#if !defined(NO_SWAPPING)
/*
* vm_pageout_object_deactivate_pages
@@ -738,7 +873,7 @@ vm_pageout_scan(int pass)
vm_page_t m, next;
struct vm_page marker;
int page_shortage, maxscan, pcount;
- int addl_page_shortage, addl_page_shortage_init;
+ int addl_page_shortage;
vm_object_t object;
int actcount;
int vnodes_skipped = 0;
@@ -754,13 +889,19 @@ vm_pageout_scan(int pass)
*/
uma_reclaim();
- addl_page_shortage_init = atomic_readandclear_int(&vm_pageout_deficit);
+ /*
+ * The addl_page_shortage is the the number of temporarily
+ * stuck pages in the inactive queue. In other words, the
+ * number of pages from cnt.v_inactive_count that should be
+ * discounted in setting the target for the active queue scan.
+ */
+ addl_page_shortage = atomic_readandclear_int(&vm_pageout_deficit);
/*
* Calculate the number of pages we want to either free or move
* to the cache.
*/
- page_shortage = vm_paging_target() + addl_page_shortage_init;
+ page_shortage = vm_paging_target() + addl_page_shortage;
vm_pageout_init_marker(&marker, PQ_INACTIVE);
@@ -786,8 +927,6 @@ vm_pageout_scan(int pass)
maxlaunder = 10000;
vm_page_lock_queues();
queues_locked = TRUE;
-rescan0:
- addl_page_shortage = addl_page_shortage_init;
maxscan = cnt.v_inactive_count;
for (m = TAILQ_FIRST(&vm_page_queues[PQ_INACTIVE].pl);
@@ -795,12 +934,9 @@ rescan0:
m = next) {
KASSERT(queues_locked, ("unlocked queues"));
mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+ KASSERT(m->queue == PQ_INACTIVE, ("Inactive queue %p", m));
cnt.v_pdpages++;
-
- if (m->queue != PQ_INACTIVE)
- goto rescan0;
-
next = TAILQ_NEXT(m, pageq);
/*
@@ -815,38 +951,31 @@ rescan0:
("Unmanaged page %p cannot be in inactive queue", m));
/*
- * Lock the page.
+ * The page or object lock acquisitions fail if the
+ * page was removed from the queue or moved to a
+ * different position within the queue. In either
+ * case, addl_page_shortage should not be incremented.
*/
if (!vm_pageout_page_lock(m, &next)) {
vm_page_unlock(m);
- addl_page_shortage++;
continue;
}
-
- /*
- * A held page may be undergoing I/O, so skip it.
- */
- if (m->hold_count) {
+ object = m->object;
+ if (!VM_OBJECT_TRYLOCK(object) &&
+ !vm_pageout_fallback_object_lock(m, &next)) {
vm_page_unlock(m);
- vm_page_requeue(m);
- addl_page_shortage++;
+ VM_OBJECT_UNLOCK(object);
continue;
}
/*
- * Don't mess with busy pages, keep in the front of the
- * queue, most likely are being paged out.
+ * Don't mess with busy pages, keep them at at the
+ * front of the queue, most likely they are being
+ * paged out. Increment addl_page_shortage for busy
+ * pages, because they may leave the inactive queue
+ * shortly after page scan is finished.
*/
- object = m->object;
- if (!VM_OBJECT_TRYLOCK(object) &&
- (!vm_pageout_fallback_object_lock(m, &next) ||
- m->hold_count != 0)) {
- VM_OBJECT_UNLOCK(object);
- vm_page_unlock(m);
- addl_page_shortage++;
- continue;
- }
- if (m->busy || (m->oflags & VPO_BUSY)) {
+ if (m->busy != 0 || (m->oflags & VPO_BUSY) != 0) {
vm_page_unlock(m);
VM_OBJECT_UNLOCK(object);
addl_page_shortage++;
@@ -906,6 +1035,21 @@ rescan0:
goto relock_queues;
}
+ if (m->hold_count != 0) {
+ vm_page_unlock(m);
+ VM_OBJECT_UNLOCK(object);
+
+ /*
+ * Held pages are essentially stuck in the
+ * queue. So, they ought to be discounted
+ * from cnt.v_inactive_count. See the
+ * calculation of the page_shortage for the
+ * loop over the active queue below.
+ */
+ addl_page_shortage++;
+ goto relock_queues;
+ }
+
/*
* If the upper level VM system does not believe that the page
* is fully dirty, but it is mapped for write access, then we
diff --git a/sys/vm/vm_pageout.h b/sys/vm/vm_pageout.h
index 6897bbb..c7b4e90 100644
--- a/sys/vm/vm_pageout.h
+++ b/sys/vm/vm_pageout.h
@@ -101,10 +101,8 @@ extern void vm_wait(void);
extern void vm_waitpfault(void);
#ifdef _KERNEL
-boolean_t vm_pageout_fallback_object_lock(vm_page_t, vm_page_t *);
int vm_pageout_flush(vm_page_t *, int, int, int, int *, boolean_t *);
+void vm_pageout_grow_cache(int, vm_paddr_t, vm_paddr_t);
void vm_pageout_oom(int shortage);
-boolean_t vm_pageout_page_lock(vm_page_t, vm_page_t *);
-void vm_contig_grow_cache(int, vm_paddr_t, vm_paddr_t);
#endif
#endif /* _VM_VM_PAGEOUT_H_ */
diff --git a/sys/vm/vm_reserv.c b/sys/vm/vm_reserv.c
index 10db93c..549e710 100644
--- a/sys/vm/vm_reserv.c
+++ b/sys/vm/vm_reserv.c
@@ -445,7 +445,7 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, u_long npages,
m += VM_LEVEL_0_NPAGES;
first += VM_LEVEL_0_NPAGES;
allocpages -= VM_LEVEL_0_NPAGES;
- } while (allocpages > VM_LEVEL_0_NPAGES);
+ } while (allocpages > 0);
return (m_ret);
/*
OpenPOWER on IntegriCloud