MFC

author: attilio <attilio@FreeBSD.org> 2012-08-03 15:58:05 +0000
committer: attilio <attilio@FreeBSD.org> 2012-08-03 15:58:05 +0000
commit: c52a057b1917918a0faad18ccf6666c4cdcb5c8c (patch)
tree: 21862df57c9f0eef84636c11cb5c3ecf3d578893 /sys/vm
parent: 675a214708d3e1fb79cbcf6bd8a46e0f1cc4823d (diff)
parent: 8916b8f9033faa7d822c84a2bdd638979f11fab1 (diff)
download: FreeBSD-src-c52a057b1917918a0faad18ccf6666c4cdcb5c8c.zip
FreeBSD-src-c52a057b1917918a0faad18ccf6666c4cdcb5c8c.tar.gz
10 files changed, 416 insertions, 469 deletions
diff --git a/sys/vm/memguard.c b/sys/vm/memguard.c
index 5a690e7..b1740c3 100644
--- a/sys/vm/memguard.c
+++ b/sys/vm/memguard.c
@@ -159,16 +159,18 @@ SYSCTL_ULONG(_vm_memguard, OID_AUTO, frequency_hits, CTLFLAG_RD,
  * the kmem_map.  The memguard memory will be a submap.
  */
 unsigned long
-memguard_fudge(unsigned long km_size, unsigned long km_max)
+memguard_fudge(unsigned long km_size, const struct vm_map *parent_map)
 {
-	u_long mem_pgs = cnt.v_page_count;
+	u_long mem_pgs, parent_size;
 
 	vm_memguard_divisor = 10;
 	TUNABLE_INT_FETCH("vm.memguard.divisor", &vm_memguard_divisor);
 
+	parent_size = vm_map_max(parent_map) - vm_map_min(parent_map) +
+	    PAGE_SIZE;
 	/* Pick a conservative value if provided value sucks. */
 	if ((vm_memguard_divisor <= 0) ||
-	    ((km_size / vm_memguard_divisor) == 0))
+	    ((parent_size / vm_memguard_divisor) == 0))
 		vm_memguard_divisor = 10;
 	/*
 	 * Limit consumption of physical pages to
@@ -177,21 +179,19 @@ memguard_fudge(unsigned long km_size, unsigned long km_max)
 	 * This prevents memguard's page promotions from completely
 	 * using up memory, since most malloc(9) calls are sub-page.
 	 */
+	mem_pgs = cnt.v_page_count;
 	memguard_physlimit = (mem_pgs / vm_memguard_divisor) * PAGE_SIZE;
 	/*
 	 * We want as much KVA as we can take safely.  Use at most our
-	 * allotted fraction of kmem_max.  Limit this to twice the
-	 * physical memory to avoid using too much memory as pagetable
-	 * pages.
+	 * allotted fraction of the parent map's size.  Limit this to
+	 * twice the physical memory to avoid using too much memory as
+	 * pagetable pages (size must be multiple of PAGE_SIZE).
 	 */
-	memguard_mapsize = km_max / vm_memguard_divisor;
-	/* size must be multiple of PAGE_SIZE */
-	memguard_mapsize = round_page(memguard_mapsize);
-	if (memguard_mapsize == 0 ||
-	    memguard_mapsize / (2 * PAGE_SIZE) > mem_pgs)
+	memguard_mapsize = round_page(parent_size / vm_memguard_divisor);
+	if (memguard_mapsize / (2 * PAGE_SIZE) > mem_pgs)
 		memguard_mapsize = mem_pgs * 2 * PAGE_SIZE;
-	if (km_max > 0 && km_size + memguard_mapsize > km_max)
-		return (km_max);
+	if (km_size + memguard_mapsize > parent_size)
+		memguard_mapsize = 0;
 	return (km_size + memguard_mapsize);
 }
 
diff --git a/sys/vm/memguard.h b/sys/vm/memguard.h
index 335e237..9ec4ffd 100644
--- a/sys/vm/memguard.h
+++ b/sys/vm/memguard.h
@@ -35,7 +35,7 @@ struct malloc_type;
 struct vm_map;
 
 #ifdef DEBUG_MEMGUARD
-unsigned long	memguard_fudge(unsigned long, unsigned long);
+unsigned long	memguard_fudge(unsigned long, const struct vm_map *);
 void	memguard_init(struct vm_map *);
 void 	*memguard_alloc(unsigned long, int);
 void	*memguard_realloc(void *, unsigned long, struct malloc_type *, int);
diff --git a/sys/vm/vm_contig.c b/sys/vm/vm_contig.c
deleted file mode 100644
index e29aaf7..0000000
--- a/sys/vm/vm_contig.c
+++ /dev/null
@@ -1,351 +0,0 @@
-/*-
- * Copyright (c) 1991 Regents of the University of California.
- * All rights reserved.
- *
- * This code is derived from software contributed to Berkeley by
- * The Mach Operating System project at Carnegie-Mellon University.
- *
- * Redistribution and use in source and binary forms, with or without
- * modification, are permitted provided that the following conditions
- * are met:
- * 1. Redistributions of source code must retain the above copyright
- *    notice, this list of conditions and the following disclaimer.
- * 2. Redistributions in binary form must reproduce the above copyright
- *    notice, this list of conditions and the following disclaimer in the
- *    documentation and/or other materials provided with the distribution.
- * 4. Neither the name of the University nor the names of its contributors
- *    may be used to endorse or promote products derived from this software
- *    without specific prior written permission.
- *
- * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
- * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
- * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
- * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
- * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
- * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
- * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
- * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
- * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
- * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
- * SUCH DAMAGE.
- *
- *	from: @(#)vm_page.c	7.4 (Berkeley) 5/7/91
- */
-
-/*-
- * Copyright (c) 1987, 1990 Carnegie-Mellon University.
- * All rights reserved.
- *
- * Authors: Avadis Tevanian, Jr., Michael Wayne Young
- *
- * Permission to use, copy, modify and distribute this software and
- * its documentation is hereby granted, provided that both the copyright
- * notice and this permission notice appear in all copies of the
- * software, derivative works or modified versions, and any portions
- * thereof, and that both notices appear in supporting documentation.
- *
- * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
- * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
- * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
- *
- * Carnegie Mellon requests users of this software to return to
- *
- *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
- *  School of Computer Science
- *  Carnegie Mellon University
- *  Pittsburgh PA 15213-3890
- *
- * any improvements or extensions that they make and grant Carnegie the
- * rights to redistribute these changes.
- */
-
-#include <sys/cdefs.h>
-__FBSDID("$FreeBSD$");
-
-#include <sys/param.h>
-#include <sys/systm.h>
-#include <sys/lock.h>
-#include <sys/mount.h>
-#include <sys/mutex.h>
-#include <sys/proc.h>
-#include <sys/kernel.h>
-#include <sys/sysctl.h>
-#include <sys/vmmeter.h>
-#include <sys/vnode.h>
-
-#include <vm/vm.h>
-#include <vm/vm_param.h>
-#include <vm/vm_kern.h>
-#include <vm/pmap.h>
-#include <vm/vm_map.h>
-#include <vm/vm_object.h>
-#include <vm/vm_page.h>
-#include <vm/vm_pageout.h>
-#include <vm/vm_pager.h>
-#include <vm/vm_extern.h>
-
-static int
-vm_contig_launder_page(vm_page_t m, vm_page_t *next)
-{
-	vm_object_t object;
-	vm_page_t m_tmp;
-	struct vnode *vp;
-	struct mount *mp;
-	int vfslocked;
-
-	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
-	vm_page_lock_assert(m, MA_OWNED);
-	object = m->object;
-	if (!VM_OBJECT_TRYLOCK(object) &&
-	    (!vm_pageout_fallback_object_lock(m, next) || m->hold_count != 0)) {
-		vm_page_unlock(m);
-		VM_OBJECT_UNLOCK(object);
-		return (EAGAIN);
-	}
-	if (vm_page_sleep_if_busy(m, TRUE, "vpctw0")) {
-		VM_OBJECT_UNLOCK(object);
-		vm_page_lock_queues();
-		return (EBUSY);
-	}
-	vm_page_test_dirty(m);
-	if (m->dirty == 0)
-		pmap_remove_all(m);
-	if (m->dirty != 0) {
-		vm_page_unlock(m);
-		if ((object->flags & OBJ_DEAD) != 0) {
-			VM_OBJECT_UNLOCK(object);
-			return (EAGAIN);
-		}
-		if (object->type == OBJT_VNODE) {
-			vm_page_unlock_queues();
-			vp = object->handle;
-			vm_object_reference_locked(object);
-			VM_OBJECT_UNLOCK(object);
-			(void) vn_start_write(vp, &mp, V_WAIT);
-			vfslocked = VFS_LOCK_GIANT(vp->v_mount);
-			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
-			VM_OBJECT_LOCK(object);
-			vm_object_page_clean(object, 0, 0, OBJPC_SYNC);
-			VM_OBJECT_UNLOCK(object);
-			VOP_UNLOCK(vp, 0);
-			VFS_UNLOCK_GIANT(vfslocked);
-			vm_object_deallocate(object);
-			vn_finished_write(mp);
-			vm_page_lock_queues();
-			return (0);
-		} else if (object->type == OBJT_SWAP ||
-			   object->type == OBJT_DEFAULT) {
-			vm_page_unlock_queues();
-			m_tmp = m;
-			vm_pageout_flush(&m_tmp, 1, VM_PAGER_PUT_SYNC, 0,
-			    NULL, NULL);
-			VM_OBJECT_UNLOCK(object);
-			vm_page_lock_queues();
-			return (0);
-		}
-	} else {
-		vm_page_cache(m);
-		vm_page_unlock(m);
-	}
-	VM_OBJECT_UNLOCK(object);
-	return (0);
-}
-
-static int
-vm_contig_launder(int queue, vm_paddr_t low, vm_paddr_t high)
-{
-	vm_page_t m, next;
-	vm_paddr_t pa;
-	int error;
-
-	TAILQ_FOREACH_SAFE(m, &vm_page_queues[queue].pl, pageq, next) {
-
-		/* Skip marker pages */
-		if ((m->flags & PG_MARKER) != 0)
-			continue;
-
-		pa = VM_PAGE_TO_PHYS(m);
-		if (pa < low || pa + PAGE_SIZE > high)
-			continue;
-
-		if (!vm_pageout_page_lock(m, &next) || m->hold_count != 0) {
-			vm_page_unlock(m);
-			continue;
-		}
-		KASSERT(m->queue == queue,
-		    ("vm_contig_launder: page %p's queue is not %d", m, queue));
-		error = vm_contig_launder_page(m, &next);
-		vm_page_lock_assert(m, MA_NOTOWNED);
-		if (error == 0)
-			return (TRUE);
-		if (error == EBUSY)
-			return (FALSE);
-	}
-	return (FALSE);
-}
-
-/*
- * Increase the number of cached pages.
- */
-void
-vm_contig_grow_cache(int tries, vm_paddr_t low, vm_paddr_t high)
-{
-	int actl, actmax, inactl, inactmax;
-
-	vm_page_lock_queues();
-	inactl = 0;
-	inactmax = tries < 1 ? 0 : cnt.v_inactive_count;
-	actl = 0;
-	actmax = tries < 2 ? 0 : cnt.v_active_count;
-again:
-	if (inactl < inactmax && vm_contig_launder(PQ_INACTIVE, low, high)) {
-		inactl++;
-		goto again;
-	}
-	if (actl < actmax && vm_contig_launder(PQ_ACTIVE, low, high)) {
-		actl++;
-		goto again;
-	}
-	vm_page_unlock_queues();
-}
-
-/*
- * Allocates a region from the kernel address map and pages within the
- * specified physical address range to the kernel object, creates a wired
- * mapping from the region to these pages, and returns the region's starting
- * virtual address.  The allocated pages are not necessarily physically
- * contiguous.  If M_ZERO is specified through the given flags, then the pages
- * are zeroed before they are mapped.
- */
-vm_offset_t
-kmem_alloc_attr(vm_map_t map, vm_size_t size, int flags, vm_paddr_t low,
-    vm_paddr_t high, vm_memattr_t memattr)
-{
-	vm_object_t object = kernel_object;
-	vm_offset_t addr;
-	vm_ooffset_t end_offset, offset;
-	vm_page_t m;
-	int pflags, tries;
-
-	size = round_page(size);
-	vm_map_lock(map);
-	if (vm_map_findspace(map, vm_map_min(map), size, &addr)) {
-		vm_map_unlock(map);
-		return (0);
-	}
-	offset = addr - VM_MIN_KERNEL_ADDRESS;
-	vm_object_reference(object);
-	vm_map_insert(map, object, offset, addr, addr + size, VM_PROT_ALL,
-	    VM_PROT_ALL, 0);
-	if ((flags & (M_NOWAIT | M_USE_RESERVE)) == M_NOWAIT)
-		pflags = VM_ALLOC_INTERRUPT | VM_ALLOC_NOBUSY;
-	else
-		pflags = VM_ALLOC_SYSTEM | VM_ALLOC_NOBUSY;
-	if (flags & M_ZERO)
-		pflags |= VM_ALLOC_ZERO;
-	VM_OBJECT_LOCK(object);
-	end_offset = offset + size;
-	for (; offset < end_offset; offset += PAGE_SIZE) {
-		tries = 0;
-retry:
-		m = vm_page_alloc_contig(object, OFF_TO_IDX(offset), pflags, 1,
-		    low, high, PAGE_SIZE, 0, memattr);
-		if (m == NULL) {
-			VM_OBJECT_UNLOCK(object);
-			if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) {
-				vm_map_unlock(map);
-				vm_contig_grow_cache(tries, low, high);
-				vm_map_lock(map);
-				VM_OBJECT_LOCK(object);
-				tries++;
-				goto retry;
-			}
-			/*
-			 * Since the pages that were allocated by any previous
-			 * iterations of this loop are not busy, they can be
-			 * freed by vm_object_page_remove(), which is called
-			 * by vm_map_delete().
-			 */
-			vm_map_delete(map, addr, addr + size);
-			vm_map_unlock(map);
-			return (0);
-		}
-		if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0)
-			pmap_zero_page(m);
-		m->valid = VM_PAGE_BITS_ALL;
-	}
-	VM_OBJECT_UNLOCK(object);
-	vm_map_unlock(map);
-	vm_map_wire(map, addr, addr + size, VM_MAP_WIRE_SYSTEM |
-	    VM_MAP_WIRE_NOHOLES);
-	return (addr);
-}
-
-/*
- *	Allocates a region from the kernel address map, inserts the
- *	given physically contiguous pages into the kernel object,
- *	creates a wired mapping from the region to the pages, and
- *	returns the region's starting virtual address.  If M_ZERO is
- *	specified through the given flags, then the pages are zeroed
- *	before they are mapped.
- */
-vm_offset_t
-kmem_alloc_contig(vm_map_t map, vm_size_t size, int flags, vm_paddr_t low,
-    vm_paddr_t high, u_long alignment, vm_paddr_t boundary,
-    vm_memattr_t memattr)
-{
-	vm_object_t object = kernel_object;
-	vm_offset_t addr;
-	vm_ooffset_t offset;
-	vm_page_t end_m, m;
-	int pflags, tries;
- 
-	size = round_page(size);
-	vm_map_lock(map);
-	if (vm_map_findspace(map, vm_map_min(map), size, &addr)) {
-		vm_map_unlock(map);
-		return (0);
-	}
-	offset = addr - VM_MIN_KERNEL_ADDRESS;
-	vm_object_reference(object);
-	vm_map_insert(map, object, offset, addr, addr + size, VM_PROT_ALL,
-	    VM_PROT_ALL, 0);
-	if ((flags & (M_NOWAIT | M_USE_RESERVE)) == M_NOWAIT)
-		pflags = VM_ALLOC_INTERRUPT | VM_ALLOC_NOBUSY;
-	else
-		pflags = VM_ALLOC_SYSTEM | VM_ALLOC_NOBUSY;
-	if (flags & M_ZERO)
-		pflags |= VM_ALLOC_ZERO;
-	if (flags & M_NODUMP)
-		pflags |= VM_ALLOC_NODUMP;
-	VM_OBJECT_LOCK(object);
-	tries = 0;
-retry:
-	m = vm_page_alloc_contig(object, OFF_TO_IDX(offset), pflags,
-	    atop(size), low, high, alignment, boundary, memattr);
-	if (m == NULL) {
-		VM_OBJECT_UNLOCK(object);
-		if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) {
-			vm_map_unlock(map);
-			vm_contig_grow_cache(tries, low, high);
-			vm_map_lock(map);
-			VM_OBJECT_LOCK(object);
-			tries++;
-			goto retry;
-		}
-		vm_map_delete(map, addr, addr + size);
-		vm_map_unlock(map);
-		return (0);
-	}
-	end_m = m + atop(size);
-	for (; m < end_m; m++) {
-		if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0)
-			pmap_zero_page(m);
-		m->valid = VM_PAGE_BITS_ALL;
-	}
-	VM_OBJECT_UNLOCK(object);
-	vm_map_unlock(map);
-	vm_map_wire(map, addr, addr + size, VM_MAP_WIRE_SYSTEM |
-	    VM_MAP_WIRE_NOHOLES);
-	return (addr);
-}
diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c
index 5e157a6..46e7f1c 100644
--- a/sys/vm/vm_kern.c
+++ b/sys/vm/vm_kern.c
@@ -195,6 +195,148 @@ kmem_alloc(map, size)
 }
 
 /*
+ *	Allocates a region from the kernel address map and physical pages
+ *	within the specified address range to the kernel object.  Creates a
+ *	wired mapping from this region to these pages, and returns the
+ *	region's starting virtual address.  The allocated pages are not
+ *	necessarily physically contiguous.  If M_ZERO is specified through the
+ *	given flags, then the pages are zeroed before they are mapped.
+ */
+vm_offset_t
+kmem_alloc_attr(vm_map_t map, vm_size_t size, int flags, vm_paddr_t low,
+    vm_paddr_t high, vm_memattr_t memattr)
+{
+	vm_object_t object = kernel_object;
+	vm_offset_t addr;
+	vm_ooffset_t end_offset, offset;
+	vm_page_t m;
+	int pflags, tries;
+
+	size = round_page(size);
+	vm_map_lock(map);
+	if (vm_map_findspace(map, vm_map_min(map), size, &addr)) {
+		vm_map_unlock(map);
+		return (0);
+	}
+	offset = addr - VM_MIN_KERNEL_ADDRESS;
+	vm_object_reference(object);
+	vm_map_insert(map, object, offset, addr, addr + size, VM_PROT_ALL,
+	    VM_PROT_ALL, 0);
+	if ((flags & (M_NOWAIT | M_USE_RESERVE)) == M_NOWAIT)
+		pflags = VM_ALLOC_INTERRUPT | VM_ALLOC_NOBUSY;
+	else
+		pflags = VM_ALLOC_SYSTEM | VM_ALLOC_NOBUSY;
+	if (flags & M_ZERO)
+		pflags |= VM_ALLOC_ZERO;
+	VM_OBJECT_LOCK(object);
+	end_offset = offset + size;
+	for (; offset < end_offset; offset += PAGE_SIZE) {
+		tries = 0;
+retry:
+		m = vm_page_alloc_contig(object, OFF_TO_IDX(offset), pflags, 1,
+		    low, high, PAGE_SIZE, 0, memattr);
+		if (m == NULL) {
+			VM_OBJECT_UNLOCK(object);
+			if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) {
+				vm_map_unlock(map);
+				vm_pageout_grow_cache(tries, low, high);
+				vm_map_lock(map);
+				VM_OBJECT_LOCK(object);
+				tries++;
+				goto retry;
+			}
+
+			/*
+			 * Since the pages that were allocated by any previous
+			 * iterations of this loop are not busy, they can be
+			 * freed by vm_object_page_remove(), which is called
+			 * by vm_map_delete().
+			 */
+			vm_map_delete(map, addr, addr + size);
+			vm_map_unlock(map);
+			return (0);
+		}
+		if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0)
+			pmap_zero_page(m);
+		m->valid = VM_PAGE_BITS_ALL;
+	}
+	VM_OBJECT_UNLOCK(object);
+	vm_map_unlock(map);
+	vm_map_wire(map, addr, addr + size, VM_MAP_WIRE_SYSTEM |
+	    VM_MAP_WIRE_NOHOLES);
+	return (addr);
+}
+
+/*
+ *	Allocates a region from the kernel address map and physically
+ *	contiguous pages within the specified address range to the kernel
+ *	object.  Creates a wired mapping from this region to these pages, and
+ *	returns the region's starting virtual address.  If M_ZERO is specified
+ *	through the given flags, then the pages are zeroed before they are
+ *	mapped.
+ */
+vm_offset_t
+kmem_alloc_contig(vm_map_t map, vm_size_t size, int flags, vm_paddr_t low,
+    vm_paddr_t high, u_long alignment, vm_paddr_t boundary,
+    vm_memattr_t memattr)
+{
+	vm_object_t object = kernel_object;
+	vm_offset_t addr;
+	vm_ooffset_t offset;
+	vm_page_t end_m, m;
+	int pflags, tries;
+ 
+	size = round_page(size);
+	vm_map_lock(map);
+	if (vm_map_findspace(map, vm_map_min(map), size, &addr)) {
+		vm_map_unlock(map);
+		return (0);
+	}
+	offset = addr - VM_MIN_KERNEL_ADDRESS;
+	vm_object_reference(object);
+	vm_map_insert(map, object, offset, addr, addr + size, VM_PROT_ALL,
+	    VM_PROT_ALL, 0);
+	if ((flags & (M_NOWAIT | M_USE_RESERVE)) == M_NOWAIT)
+		pflags = VM_ALLOC_INTERRUPT | VM_ALLOC_NOBUSY;
+	else
+		pflags = VM_ALLOC_SYSTEM | VM_ALLOC_NOBUSY;
+	if (flags & M_ZERO)
+		pflags |= VM_ALLOC_ZERO;
+	if (flags & M_NODUMP)
+		pflags |= VM_ALLOC_NODUMP;
+	VM_OBJECT_LOCK(object);
+	tries = 0;
+retry:
+	m = vm_page_alloc_contig(object, OFF_TO_IDX(offset), pflags,
+	    atop(size), low, high, alignment, boundary, memattr);
+	if (m == NULL) {
+		VM_OBJECT_UNLOCK(object);
+		if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) {
+			vm_map_unlock(map);
+			vm_pageout_grow_cache(tries, low, high);
+			vm_map_lock(map);
+			VM_OBJECT_LOCK(object);
+			tries++;
+			goto retry;
+		}
+		vm_map_delete(map, addr, addr + size);
+		vm_map_unlock(map);
+		return (0);
+	}
+	end_m = m + atop(size);
+	for (; m < end_m; m++) {
+		if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0)
+			pmap_zero_page(m);
+		m->valid = VM_PAGE_BITS_ALL;
+	}
+	VM_OBJECT_UNLOCK(object);
+	vm_map_unlock(map);
+	vm_map_wire(map, addr, addr + size, VM_MAP_WIRE_SYSTEM |
+	    VM_MAP_WIRE_NOHOLES);
+	return (addr);
+}
+
+/*
  *	kmem_free:
  *
  *	Release a region of kernel virtual memory allocated
diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h
index 056eac5..b3b1ad4 100644
--- a/sys/vm/vm_map.h
+++ b/sys/vm/vm_map.h
@@ -200,13 +200,13 @@ struct vm_map {
 
 #ifdef	_KERNEL
 static __inline vm_offset_t
-vm_map_max(vm_map_t map)
+vm_map_max(const struct vm_map *map)
 {
 	return (map->max_offset);
 }
 
 static __inline vm_offset_t
-vm_map_min(vm_map_t map)
+vm_map_min(const struct vm_map *map)
 {
 	return (map->min_offset);
 }
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index ec96135..ccadd8d 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -451,63 +451,6 @@ vm_page_startup(vm_offset_t vaddr)
 	return (vaddr);
 }
 
-
-CTASSERT(offsetof(struct vm_page, aflags) % sizeof(uint32_t) == 0);
-
-void
-vm_page_aflag_set(vm_page_t m, uint8_t bits)
-{
-	uint32_t *addr, val;
-
-	/*
-	 * The PGA_WRITEABLE flag can only be set if the page is managed and
-	 * VPO_BUSY.  Currently, this flag is only set by pmap_enter().
-	 */
-	KASSERT((bits & PGA_WRITEABLE) == 0 ||
-	    (m->oflags & (VPO_UNMANAGED | VPO_BUSY)) == VPO_BUSY,
-	    ("PGA_WRITEABLE and !VPO_BUSY"));
-
-	/*
-	 * We want to use atomic updates for m->aflags, which is a
-	 * byte wide.  Not all architectures provide atomic operations
-	 * on the single-byte destination.  Punt and access the whole
-	 * 4-byte word with an atomic update.  Parallel non-atomic
-	 * updates to the fields included in the update by proximity
-	 * are handled properly by atomics.
-	 */
-	addr = (void *)&m->aflags;
-	MPASS(((uintptr_t)addr & (sizeof(uint32_t) - 1)) == 0);
-	val = bits;
-#if BYTE_ORDER == BIG_ENDIAN
-	val <<= 24;
-#endif
-	atomic_set_32(addr, val);
-} 
-
-void
-vm_page_aflag_clear(vm_page_t m, uint8_t bits)
-{
-	uint32_t *addr, val;
-
-	/*
-	 * The PGA_REFERENCED flag can only be cleared if the object
-	 * containing the page is locked.
-	 */
-	KASSERT((bits & PGA_REFERENCED) == 0 || VM_OBJECT_LOCKED(m->object),
-	    ("PGA_REFERENCED and !VM_OBJECT_LOCKED"));
-
-	/*
-	 * See the comment in vm_page_aflag_set().
-	 */
-	addr = (void *)&m->aflags;
-	MPASS(((uintptr_t)addr & (sizeof(uint32_t) - 1)) == 0);
-	val = bits;
-#if BYTE_ORDER == BIG_ENDIAN
-	val <<= 24;
-#endif
-	atomic_clear_32(addr, val);
-}
-
 void
 vm_page_reference(vm_page_t m)
 {
@@ -1480,7 +1423,7 @@ retry:
 	cpindex = pindex;
 	for (m = m_ret; m < &m_ret[npages]; m++) {
 		m->aflags = 0;
-		m->flags &= flags;
+		m->flags = (m->flags | PG_NODUMP) & flags;
 		if ((req & VM_ALLOC_WIRED) != 0)
 			m->wire_count = 1;
 		/* Unmanaged pages don't use "act_count". */
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
index da7fd89..e95f173 100644
--- a/sys/vm/vm_page.h
+++ b/sys/vm/vm_page.h
@@ -237,13 +237,14 @@ extern struct vpglocks pa_lock[];
 #define	vm_page_queue_free_mtx	vm_page_queue_free_lock.data
 
 /*
- * These are the flags defined for vm_page.
- *
- * aflags are updated by atomic accesses.  Use the vm_page_aflag_set()
- * and vm_page_aflag_clear() functions to set and clear the flags.
+ * The vm_page's aflags are updated using atomic operations.  To set or clear
+ * these flags, the functions vm_page_aflag_set() and vm_page_aflag_clear()
+ * must be used.  Neither these flags nor these functions are part of the KBI.
  *
  * PGA_REFERENCED may be cleared only if the object containing the page is
- * locked.  It is set by both the MI and MD VM layers.
+ * locked.  It is set by both the MI and MD VM layers.  However, kernel
+ * loadable modules should not directly set this flag.  They should call
+ * vm_page_reference() instead.
  *
  * PGA_WRITEABLE is set exclusively on managed pages by pmap_enter().  When it
  * does so, the page must be VPO_BUSY.  The MI VM layer must never access this
@@ -279,8 +280,12 @@ extern struct vpglocks pa_lock[];
 
 #ifdef _KERNEL
 
+#include <sys/systm.h>
+
 #include <vm/vm_param.h>
 
+#include <machine/atomic.h>
+
 /*
  * Each pageable resident page falls into one of five lists:
  *
@@ -308,7 +313,6 @@ extern struct vpglocks pa_lock[];
  *
  */
 
-struct vnode;
 extern int vm_page_zero_count;
 
 extern vm_page_t vm_page_array;		/* First resident page in table */
@@ -348,8 +352,6 @@ extern struct vpglocks vm_page_queue_lock;
 #define	VM_ALLOC_COUNT_SHIFT	16
 #define	VM_ALLOC_COUNT(count)	((count) << VM_ALLOC_COUNT_SHIFT)
 
-void vm_page_aflag_set(vm_page_t m, uint8_t bits);
-void vm_page_aflag_clear(vm_page_t m, uint8_t bits);
 void vm_page_busy(vm_page_t m);
 void vm_page_flash(vm_page_t m);
 void vm_page_io_start(vm_page_t m);
@@ -425,6 +427,75 @@ void vm_page_object_lock_assert(vm_page_t m);
 #endif
 
 /*
+ * We want to use atomic updates for the aflags field, which is 8 bits wide.
+ * However, not all architectures support atomic operations on 8-bit
+ * destinations.  In order that we can easily use a 32-bit operation, we
+ * require that the aflags field be 32-bit aligned.
+ */
+CTASSERT(offsetof(struct vm_page, aflags) % sizeof(uint32_t) == 0);
+
+/*
+ *	Clear the given bits in the specified page.
+ */
+static inline void
+vm_page_aflag_clear(vm_page_t m, uint8_t bits)
+{
+	uint32_t *addr, val;
+
+	/*
+	 * The PGA_REFERENCED flag can only be cleared if the object
+	 * containing the page is locked.
+	 */
+	if ((bits & PGA_REFERENCED) != 0)
+		VM_PAGE_OBJECT_LOCK_ASSERT(m);
+
+	/*
+	 * Access the whole 32-bit word containing the aflags field with an
+	 * atomic update.  Parallel non-atomic updates to the other fields
+	 * within this word are handled properly by the atomic update.
+	 */
+	addr = (void *)&m->aflags;
+	KASSERT(((uintptr_t)addr & (sizeof(uint32_t) - 1)) == 0,
+	    ("vm_page_aflag_clear: aflags is misaligned"));
+	val = bits;
+#if BYTE_ORDER == BIG_ENDIAN
+	val <<= 24;
+#endif
+	atomic_clear_32(addr, val);
+}
+
+/*
+ *	Set the given bits in the specified page.
+ */
+static inline void
+vm_page_aflag_set(vm_page_t m, uint8_t bits)
+{
+	uint32_t *addr, val;
+
+	/*
+	 * The PGA_WRITEABLE flag can only be set if the page is managed and
+	 * VPO_BUSY.  Currently, this flag is only set by pmap_enter().
+	 */
+	KASSERT((bits & PGA_WRITEABLE) == 0 ||
+	    (m->oflags & (VPO_UNMANAGED | VPO_BUSY)) == VPO_BUSY,
+	    ("vm_page_aflag_set: PGA_WRITEABLE and !VPO_BUSY"));
+
+	/*
+	 * Access the whole 32-bit word containing the aflags field with an
+	 * atomic update.  Parallel non-atomic updates to the other fields
+	 * within this word are handled properly by the atomic update.
+	 */
+	addr = (void *)&m->aflags;
+	KASSERT(((uintptr_t)addr & (sizeof(uint32_t) - 1)) == 0,
+	    ("vm_page_aflag_set: aflags is misaligned"));
+	val = bits;
+#if BYTE_ORDER == BIG_ENDIAN
+	val <<= 24;
+#endif
+	atomic_set_32(addr, val);
+} 
+
+/*
  *	vm_page_dirty:
  *
  *	Set all bits in the page's dirty field.
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index 11d040d..3994ce1 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -209,11 +209,14 @@ int vm_page_max_wired;		/* XXX max # of wired pages system-wide */
 SYSCTL_INT(_vm, OID_AUTO, max_wired,
 	CTLFLAG_RW, &vm_page_max_wired, 0, "System-wide limit to wired page count");
 
+static boolean_t vm_pageout_fallback_object_lock(vm_page_t, vm_page_t *);
+static boolean_t vm_pageout_launder(int, int, vm_paddr_t, vm_paddr_t);
 #if !defined(NO_SWAPPING)
 static void vm_pageout_map_deactivate_pages(vm_map_t, long);
 static void vm_pageout_object_deactivate_pages(pmap_t, vm_object_t, long);
 static void vm_req_vmdaemon(int req);
 #endif
+static boolean_t vm_pageout_page_lock(vm_page_t, vm_page_t *);
 static void vm_pageout_page_stats(void);
 
 /*
@@ -247,7 +250,7 @@ vm_pageout_init_marker(vm_page_t marker, u_short queue)
  * This function depends on both the lock portion of struct vm_object
  * and normal struct vm_page being type stable.
  */
-boolean_t
+static boolean_t
 vm_pageout_fallback_object_lock(vm_page_t m, vm_page_t *next)
 {
 	struct vm_page marker;
@@ -286,7 +289,7 @@ vm_pageout_fallback_object_lock(vm_page_t m, vm_page_t *next)
  *
  * This function depends on normal struct vm_page being type stable.
  */
-boolean_t
+static boolean_t
 vm_pageout_page_lock(vm_page_t m, vm_page_t *next)
 {
 	struct vm_page marker;
@@ -558,6 +561,138 @@ vm_pageout_flush(vm_page_t *mc, int count, int flags, int mreq, int *prunlen,
 	return (numpagedout);
 }
 
+static boolean_t
+vm_pageout_launder(int queue, int tries, vm_paddr_t low, vm_paddr_t high)
+{
+	struct mount *mp;
+	struct vnode *vp;
+	vm_object_t object;
+	vm_paddr_t pa;
+	vm_page_t m, m_tmp, next;
+	int vfslocked;
+
+	vm_page_lock_queues();
+	TAILQ_FOREACH_SAFE(m, &vm_page_queues[queue].pl, pageq, next) {
+		KASSERT(m->queue == queue,
+		    ("vm_pageout_launder: page %p's queue is not %d", m,
+		    queue));
+		if ((m->flags & PG_MARKER) != 0)
+			continue;
+		pa = VM_PAGE_TO_PHYS(m);
+		if (pa < low || pa + PAGE_SIZE > high)
+			continue;
+		if (!vm_pageout_page_lock(m, &next) || m->hold_count != 0) {
+			vm_page_unlock(m);
+			continue;
+		}
+		object = m->object;
+		if (!VM_OBJECT_TRYLOCK(object) &&
+		    (!vm_pageout_fallback_object_lock(m, &next) ||
+		    m->hold_count != 0)) {
+			vm_page_unlock(m);
+			VM_OBJECT_UNLOCK(object);
+			continue;
+		}
+		if ((m->oflags & VPO_BUSY) != 0 || m->busy != 0) {
+			if (tries == 0) {
+				vm_page_unlock(m);
+				VM_OBJECT_UNLOCK(object);
+				continue;
+			}
+			vm_page_sleep(m, "vpctw0");
+			VM_OBJECT_UNLOCK(object);
+			return (FALSE);
+		}
+		vm_page_test_dirty(m);
+		if (m->dirty == 0)
+			pmap_remove_all(m);
+		if (m->dirty != 0) {
+			vm_page_unlock(m);
+			if (tries == 0 || (object->flags & OBJ_DEAD) != 0) {
+				VM_OBJECT_UNLOCK(object);
+				continue;
+			}
+			if (object->type == OBJT_VNODE) {
+				vm_page_unlock_queues();
+				vp = object->handle;
+				vm_object_reference_locked(object);
+				VM_OBJECT_UNLOCK(object);
+				(void)vn_start_write(vp, &mp, V_WAIT);
+				vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+				vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
+				VM_OBJECT_LOCK(object);
+				vm_object_page_clean(object, 0, 0, OBJPC_SYNC);
+				VM_OBJECT_UNLOCK(object);
+				VOP_UNLOCK(vp, 0);
+				VFS_UNLOCK_GIANT(vfslocked);
+				vm_object_deallocate(object);
+				vn_finished_write(mp);
+				return (TRUE);
+			} else if (object->type == OBJT_SWAP ||
+			    object->type == OBJT_DEFAULT) {
+				vm_page_unlock_queues();
+				m_tmp = m;
+				vm_pageout_flush(&m_tmp, 1, VM_PAGER_PUT_SYNC,
+				    0, NULL, NULL);
+				VM_OBJECT_UNLOCK(object);
+				return (TRUE);
+			}
+		} else {
+			vm_page_cache(m);
+			vm_page_unlock(m);
+		}
+		VM_OBJECT_UNLOCK(object);
+	}
+	vm_page_unlock_queues();
+	return (FALSE);
+}
+
+/*
+ * Increase the number of cached pages.  The specified value, "tries",
+ * determines which categories of pages are cached:
+ *
+ *  0: All clean, inactive pages within the specified physical address range
+ *     are cached.  Will not sleep.
+ *  1: The vm_lowmem handlers are called.  All inactive pages within
+ *     the specified physical address range are cached.  May sleep.
+ *  2: The vm_lowmem handlers are called.  All inactive and active pages
+ *     within the specified physical address range are cached.  May sleep.
+ */
+void
+vm_pageout_grow_cache(int tries, vm_paddr_t low, vm_paddr_t high)
+{
+	int actl, actmax, inactl, inactmax;
+
+	if (tries > 0) {
+		/*
+		 * Decrease registered cache sizes.  The vm_lowmem handlers
+		 * may acquire locks and/or sleep, so they can only be invoked
+		 * when "tries" is greater than zero.
+		 */
+		EVENTHANDLER_INVOKE(vm_lowmem, 0);
+
+		/*
+		 * We do this explicitly after the caches have been drained
+		 * above.
+		 */
+		uma_reclaim();
+	}
+	inactl = 0;
+	inactmax = cnt.v_inactive_count;
+	actl = 0;
+	actmax = tries < 2 ? 0 : cnt.v_active_count;
+again:
+	if (inactl < inactmax && vm_pageout_launder(PQ_INACTIVE, tries, low,
+	    high)) {
+		inactl++;
+		goto again;
+	}
+	if (actl < actmax && vm_pageout_launder(PQ_ACTIVE, tries, low, high)) {
+		actl++;
+		goto again;
+	}
+}
+
 #if !defined(NO_SWAPPING)
 /*
  *	vm_pageout_object_deactivate_pages
@@ -738,7 +873,7 @@ vm_pageout_scan(int pass)
 	vm_page_t m, next;
 	struct vm_page marker;
 	int page_shortage, maxscan, pcount;
-	int addl_page_shortage, addl_page_shortage_init;
+	int addl_page_shortage;
 	vm_object_t object;
 	int actcount;
 	int vnodes_skipped = 0;
@@ -754,13 +889,19 @@ vm_pageout_scan(int pass)
 	 */
 	uma_reclaim();
 
-	addl_page_shortage_init = atomic_readandclear_int(&vm_pageout_deficit);
+	/*
+	 * The addl_page_shortage is the the number of temporarily
+	 * stuck pages in the inactive queue.  In other words, the
+	 * number of pages from cnt.v_inactive_count that should be
+	 * discounted in setting the target for the active queue scan.
+	 */
+	addl_page_shortage = atomic_readandclear_int(&vm_pageout_deficit);
 
 	/*
 	 * Calculate the number of pages we want to either free or move
 	 * to the cache.
 	 */
-	page_shortage = vm_paging_target() + addl_page_shortage_init;
+	page_shortage = vm_paging_target() + addl_page_shortage;
 
 	vm_pageout_init_marker(&marker, PQ_INACTIVE);
 
@@ -786,8 +927,6 @@ vm_pageout_scan(int pass)
 		maxlaunder = 10000;
 	vm_page_lock_queues();
 	queues_locked = TRUE;
-rescan0:
-	addl_page_shortage = addl_page_shortage_init;
 	maxscan = cnt.v_inactive_count;
 
 	for (m = TAILQ_FIRST(&vm_page_queues[PQ_INACTIVE].pl);
@@ -795,12 +934,9 @@ rescan0:
 	     m = next) {
 		KASSERT(queues_locked, ("unlocked queues"));
 		mtx_assert(&vm_page_queue_mtx, MA_OWNED);
+		KASSERT(m->queue == PQ_INACTIVE, ("Inactive queue %p", m));
 
 		cnt.v_pdpages++;
-
-		if (m->queue != PQ_INACTIVE)
-			goto rescan0;
-
 		next = TAILQ_NEXT(m, pageq);
 
 		/*
@@ -815,38 +951,31 @@ rescan0:
 		    ("Unmanaged page %p cannot be in inactive queue", m));
 
 		/*
-		 * Lock the page.
+		 * The page or object lock acquisitions fail if the
+		 * page was removed from the queue or moved to a
+		 * different position within the queue.  In either
+		 * case, addl_page_shortage should not be incremented.
 		 */
 		if (!vm_pageout_page_lock(m, &next)) {
 			vm_page_unlock(m);
-			addl_page_shortage++;
 			continue;
 		}
-
-		/*
-		 * A held page may be undergoing I/O, so skip it.
-		 */
-		if (m->hold_count) {
+		object = m->object;
+		if (!VM_OBJECT_TRYLOCK(object) &&
+		    !vm_pageout_fallback_object_lock(m, &next)) {
 			vm_page_unlock(m);
-			vm_page_requeue(m);
-			addl_page_shortage++;
+			VM_OBJECT_UNLOCK(object);
 			continue;
 		}
 
 		/*
-		 * Don't mess with busy pages, keep in the front of the
-		 * queue, most likely are being paged out.
+		 * Don't mess with busy pages, keep them at at the
+		 * front of the queue, most likely they are being
+		 * paged out.  Increment addl_page_shortage for busy
+		 * pages, because they may leave the inactive queue
+		 * shortly after page scan is finished.
 		 */
-		object = m->object;
-		if (!VM_OBJECT_TRYLOCK(object) &&
-		    (!vm_pageout_fallback_object_lock(m, &next) ||
-		    m->hold_count != 0)) {
-			VM_OBJECT_UNLOCK(object);
-			vm_page_unlock(m);
-			addl_page_shortage++;
-			continue;
-		}
-		if (m->busy || (m->oflags & VPO_BUSY)) {
+		if (m->busy != 0 || (m->oflags & VPO_BUSY) != 0) {
 			vm_page_unlock(m);
 			VM_OBJECT_UNLOCK(object);
 			addl_page_shortage++;
@@ -906,6 +1035,21 @@ rescan0:
 			goto relock_queues;
 		}
 
+		if (m->hold_count != 0) {
+			vm_page_unlock(m);
+			VM_OBJECT_UNLOCK(object);
+
+			/*
+			 * Held pages are essentially stuck in the
+			 * queue.  So, they ought to be discounted
+			 * from cnt.v_inactive_count.  See the
+			 * calculation of the page_shortage for the
+			 * loop over the active queue below.
+			 */
+			addl_page_shortage++;
+			goto relock_queues;
+		}
+
 		/*
 		 * If the upper level VM system does not believe that the page
 		 * is fully dirty, but it is mapped for write access, then we
diff --git a/sys/vm/vm_pageout.h b/sys/vm/vm_pageout.h
index 6897bbb..c7b4e90 100644
--- a/sys/vm/vm_pageout.h
+++ b/sys/vm/vm_pageout.h
@@ -101,10 +101,8 @@ extern void vm_wait(void);
 extern void vm_waitpfault(void);
 
 #ifdef _KERNEL
-boolean_t vm_pageout_fallback_object_lock(vm_page_t, vm_page_t *);
 int vm_pageout_flush(vm_page_t *, int, int, int, int *, boolean_t *);
+void vm_pageout_grow_cache(int, vm_paddr_t, vm_paddr_t);
 void vm_pageout_oom(int shortage);
-boolean_t vm_pageout_page_lock(vm_page_t, vm_page_t *);
-void vm_contig_grow_cache(int, vm_paddr_t, vm_paddr_t);
 #endif
 #endif	/* _VM_VM_PAGEOUT_H_ */
diff --git a/sys/vm/vm_reserv.c b/sys/vm/vm_reserv.c
index 10db93c..549e710 100644
--- a/sys/vm/vm_reserv.c
+++ b/sys/vm/vm_reserv.c
@@ -445,7 +445,7 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, u_long npages,
 		m += VM_LEVEL_0_NPAGES;
 		first += VM_LEVEL_0_NPAGES;
 		allocpages -= VM_LEVEL_0_NPAGES;
-	} while (allocpages > VM_LEVEL_0_NPAGES);
+	} while (allocpages > 0);
 	return (m_ret);
 
 	/*
author	attilio <attilio@FreeBSD.org>	2012-08-03 15:58:05 +0000
committer	attilio <attilio@FreeBSD.org>	2012-08-03 15:58:05 +0000
commit	c52a057b1917918a0faad18ccf6666c4cdcb5c8c (patch)
tree	21862df57c9f0eef84636c11cb5c3ecf3d578893 /sys/vm
parent	675a214708d3e1fb79cbcf6bd8a46e0f1cc4823d (diff)
parent	8916b8f9033faa7d822c84a2bdd638979f11fab1 (diff)
download	FreeBSD-src-c52a057b1917918a0faad18ccf6666c4cdcb5c8c.zip FreeBSD-src-c52a057b1917918a0faad18ccf6666c4cdcb5c8c.tar.gz