27 files changed, 2238 insertions, 1537 deletions
diff --git a/sys/vm/memguard.c b/sys/vm/memguard.c
index d4efc2b..9e1d7a4 100644
--- a/sys/vm/memguard.c
+++ b/sys/vm/memguard.c
@@ -283,7 +283,7 @@ v2sizev(vm_offset_t va)
 void *
 memguard_alloc(unsigned long req_size, int flags)
 {
-	vm_offset_t addr;
+	vm_offset_t addr, origaddr;
 	u_long size_p, size_v;
 	int do_guard, rv;
 
@@ -327,7 +327,7 @@ memguard_alloc(unsigned long req_size, int flags)
 	for (;;) {
 		if (vmem_xalloc(memguard_arena, size_v, 0, 0, 0,
 		    memguard_cursor, VMEM_ADDR_MAX,
-		    M_BESTFIT | M_NOWAIT, &addr) == 0)
+		    M_BESTFIT | M_NOWAIT, &origaddr) == 0)
 			break;
 		/*
 		 * The map has no space.  This may be due to
@@ -342,11 +342,12 @@ memguard_alloc(unsigned long req_size, int flags)
 		memguard_wrap++;
 		memguard_cursor = memguard_base;
 	}
+	addr = origaddr;
 	if (do_guard)
 		addr += PAGE_SIZE;
 	rv = kmem_back(kmem_object, addr, size_p, flags);
 	if (rv != KERN_SUCCESS) {
-		vmem_xfree(memguard_arena, addr, size_v);
+		vmem_xfree(memguard_arena, origaddr, size_v);
 		memguard_fail_pgs++;
 		addr = (vm_offset_t)NULL;
 		goto out;
diff --git a/sys/vm/pmap.h b/sys/vm/pmap.h
index 1d18823..6033f37 100644
--- a/sys/vm/pmap.h
+++ b/sys/vm/pmap.h
@@ -100,9 +100,21 @@ extern vm_offset_t kernel_vm_end;
 /*
  * Flags for pmap_enter().  The bits in the low-order byte are reserved
  * for the protection code (vm_prot_t) that describes the fault type.
+ * Bits 24 through 31 are reserved for the pmap's internal use.
  */
-#define	PMAP_ENTER_NOSLEEP	0x0100
-#define	PMAP_ENTER_WIRED	0x0200
+#define	PMAP_ENTER_NOSLEEP	0x00000100
+#define	PMAP_ENTER_WIRED	0x00000200
+#define	PMAP_ENTER_RESERVED	0xFF000000
+
+/*
+ * Define the maximum number of machine-dependent reference bits that are
+ * cleared by a call to pmap_ts_referenced().  This limit serves two purposes.
+ * First, it bounds the cost of reference bit maintenance on widely shared
+ * pages.  Second, it prevents numeric overflow during maintenance of a
+ * widely shared page's "act_count" field.  An overflow could result in the
+ * premature deactivation of the page.
+ */
+#define	PMAP_TS_REFERENCED_MAX	5
 
 void		 pmap_activate(struct thread *td);
 void		 pmap_advise(pmap_t pmap, vm_offset_t sva, vm_offset_t eva,
diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
index f5a766b..0703312 100644
--- a/sys/vm/swap_pager.c
+++ b/sys/vm/swap_pager.c
@@ -86,10 +86,12 @@ __FBSDID("$FreeBSD$");
 #include <sys/namei.h>
 #include <sys/vnode.h>
 #include <sys/malloc.h>
+#include <sys/pctrie.h>
 #include <sys/racct.h>
 #include <sys/resource.h>
 #include <sys/resourcevar.h>
 #include <sys/rwlock.h>
+#include <sys/sbuf.h>
 #include <sys/sysctl.h>
 #include <sys/sysproto.h>
 #include <sys/blist.h>
@@ -126,22 +128,17 @@ __FBSDID("$FreeBSD$");
 #define SWB_NPAGES	MAX_PAGEOUT_CLUSTER
 #endif
 
+#define	SWAP_META_PAGES		PCTRIE_COUNT
+
 /*
- * The swblock structure maps an object and a small, fixed-size range
- * of page indices to disk addresses within a swap area.
- * The collection of these mappings is implemented as a hash table.
- * Unused disk addresses within a swap area are allocated and managed
- * using a blist.
+ * A swblk structure maps each page index within a
+ * SWAP_META_PAGES-aligned and sized range to the address of an
+ * on-disk swap block (or SWAPBLK_NONE). The collection of these
+ * mappings for an entire vm object is implemented as a pc-trie.
  */
-#define	SWAP_META_PAGES		32
-#define SWAP_META_MASK		(SWAP_META_PAGES - 1)
-
-struct swblock {
-	struct swblock	*swb_hnext;
-	vm_object_t	swb_object;
-	vm_pindex_t	swb_index;
-	int		swb_count;
-	daddr_t		swb_pages[SWAP_META_PAGES];
+struct swblk {
+	vm_pindex_t	p;
+	daddr_t		d[SWAP_META_PAGES];
 };
 
 static MALLOC_DEFINE(M_VMPGDATA, "vm_pgdata", "swap pager private data");
@@ -159,7 +156,7 @@ static vm_ooffset_t swap_reserved;
 SYSCTL_QUAD(_vm, OID_AUTO, swap_reserved, CTLFLAG_RD, &swap_reserved, 0,
     "Amount of swap storage needed to back all allocated anonymous memory.");
 static int overcommit = 0;
-SYSCTL_INT(_vm, OID_AUTO, overcommit, CTLFLAG_RW, &overcommit, 0,
+SYSCTL_INT(_vm, VM_OVERCOMMIT, overcommit, CTLFLAG_RW, &overcommit, 0,
     "Configure virtual memory overcommit behavior. See tuning(7) "
     "for details.");
 static unsigned long swzone;
@@ -314,7 +311,7 @@ swap_release_by_cred(vm_ooffset_t decr, struct ucred *cred)
 #define SWM_FREE	0x02	/* free, period			*/
 #define SWM_POP		0x04	/* pop out			*/
 
-int swap_pager_full = 2;	/* swap space exhaustion (task killing) */
+static int swap_pager_full = 2;	/* swap space exhaustion (task killing) */
 static int swap_pager_almost_full = 1; /* swap space exhaustion (w/hysteresis)*/
 static int nsw_rcount;		/* free read buffers			*/
 static int nsw_wcount_sync;	/* limit write buffers / synchronous	*/
@@ -326,10 +323,10 @@ static int sysctl_swap_async_max(SYSCTL_HANDLER_ARGS);
 SYSCTL_PROC(_vm, OID_AUTO, swap_async_max, CTLTYPE_INT | CTLFLAG_RW |
     CTLFLAG_MPSAFE, NULL, 0, sysctl_swap_async_max, "I",
     "Maximum running async swap ops");
-
-static struct swblock **swhash;
-static int swhash_mask;
-static struct mtx swhash_mtx;
+static int sysctl_swap_fragmentation(SYSCTL_HANDLER_ARGS);
+SYSCTL_PROC(_vm, OID_AUTO, swap_fragmentation, CTLTYPE_STRING | CTLFLAG_RD |
+    CTLFLAG_MPSAFE, NULL, 0, sysctl_swap_fragmentation, "A",
+    "Swap Fragmentation Info");
 
 static struct sx sw_alloc_sx;
 
@@ -344,7 +341,8 @@ static struct sx sw_alloc_sx;
 	(&swap_pager_object_list[((int)(intptr_t)handle >> 4) & (NOBJLISTS-1)])
 
 static struct pagerlst	swap_pager_object_list[NOBJLISTS];
-static uma_zone_t	swap_zone;
+static uma_zone_t swblk_zone;
+static uma_zone_t swpctrie_zone;
 
 /*
  * pagerops for OBJT_SWAP - "swap pager".  Some ops are also global procedure
@@ -402,12 +400,28 @@ static daddr_t	swp_pager_getswapspace(int npages);
 /*
  * Metadata functions
  */
-static struct swblock **swp_pager_hash(vm_object_t object, vm_pindex_t index);
 static void swp_pager_meta_build(vm_object_t, vm_pindex_t, daddr_t);
 static void swp_pager_meta_free(vm_object_t, vm_pindex_t, vm_pindex_t);
 static void swp_pager_meta_free_all(vm_object_t);
 static daddr_t swp_pager_meta_ctl(vm_object_t, vm_pindex_t, int);
 
+static void *
+swblk_trie_alloc(struct pctrie *ptree)
+{
+
+	return (uma_zalloc(swpctrie_zone, M_NOWAIT | (curproc == pageproc ?
+	    M_USE_RESERVE : 0)));
+}
+
+static void
+swblk_trie_free(struct pctrie *ptree, void *node)
+{
+
+	uma_zfree(swpctrie_zone, node);
+}
+
+PCTRIE_DEFINE(SWAP, swblk, p, swblk_trie_alloc, swblk_trie_free);
+
 /*
  * SWP_SIZECHECK() -	update swap_pager_full indication
  *
@@ -436,33 +450,6 @@ swp_sizecheck(void)
 }
 
 /*
- * SWP_PAGER_HASH() -	hash swap meta data
- *
- *	This is an helper function which hashes the swapblk given
- *	the object and page index.  It returns a pointer to a pointer
- *	to the object, or a pointer to a NULL pointer if it could not
- *	find a swapblk.
- */
-static struct swblock **
-swp_pager_hash(vm_object_t object, vm_pindex_t index)
-{
-	struct swblock **pswap;
-	struct swblock *swap;
-
-	index &= ~(vm_pindex_t)SWAP_META_MASK;
-	pswap = &swhash[(index ^ (int)(intptr_t)object) & swhash_mask];
-	while ((swap = *pswap) != NULL) {
-		if (swap->swb_object == object &&
-		    swap->swb_index == index
-		) {
-			break;
-		}
-		pswap = &swap->swb_hnext;
-	}
-	return (pswap);
-}
-
-/*
  * SWAP_PAGER_INIT() -	initialize the swap pager!
  *
  *	Expected to be started from system init.  NOTE:  This code is run
@@ -527,21 +514,25 @@ swap_pager_swap_init(void)
 	mtx_unlock(&pbuf_mtx);
 
 	/*
-	 * Initialize our zone.  Right now I'm just guessing on the number
-	 * we need based on the number of pages in the system.  Each swblock
-	 * can hold 32 pages, so this is probably overkill.  This reservation
-	 * is typically limited to around 32MB by default.
+	 * Initialize our zone, guessing on the number we need based
+	 * on the number of pages in the system.
 	 */
 	n = vm_cnt.v_page_count / 2;
-	if (maxswzone && n > maxswzone / sizeof(struct swblock))
-		n = maxswzone / sizeof(struct swblock);
+	if (maxswzone && n > maxswzone / sizeof(struct swblk))
+		n = maxswzone / sizeof(struct swblk);
+	swpctrie_zone = uma_zcreate("swpctrie", pctrie_node_size(), NULL, NULL,
+	    pctrie_zone_init, NULL, UMA_ALIGN_PTR,
+	    UMA_ZONE_NOFREE | UMA_ZONE_VM);
+	if (swpctrie_zone == NULL)
+		panic("failed to create swap pctrie zone.");
+	swblk_zone = uma_zcreate("swblk", sizeof(struct swblk), NULL, NULL,
+	    NULL, NULL, _Alignof(struct swblk) - 1,
+	    UMA_ZONE_NOFREE | UMA_ZONE_VM);
+	if (swblk_zone == NULL)
+		panic("failed to create swap blk zone.");
 	n2 = n;
-	swap_zone = uma_zcreate("SWAPMETA", sizeof(struct swblock), NULL, NULL,
-	    NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE | UMA_ZONE_VM);
-	if (swap_zone == NULL)
-		panic("failed to create swap_zone.");
 	do {
-		if (uma_zone_reserve_kva(swap_zone, n))
+		if (uma_zone_reserve_kva(swblk_zone, n))
 			break;
 		/*
 		 * if the allocation failed, try a zone two thirds the
@@ -549,25 +540,22 @@ swap_pager_swap_init(void)
 		 */
 		n -= ((n + 2) / 3);
 	} while (n > 0);
-	if (n2 != n)
-		printf("Swap zone entries reduced from %lu to %lu.\n", n2, n);
-	swap_maxpages = n * SWAP_META_PAGES;
-	swzone = n * sizeof(struct swblock);
-	n2 = n;
 
 	/*
-	 * Initialize our meta-data hash table.  The swapper does not need to
-	 * be quite as efficient as the VM system, so we do not use an
-	 * oversized hash table.
-	 *
-	 * 	n: 		size of hash table, must be power of 2
-	 *	swhash_mask:	hash table index mask
+	 * Often uma_zone_reserve_kva() cannot reserve exactly the
+	 * requested size.  Account for the difference when
+	 * calculating swap_maxpages.
 	 */
-	for (n = 1; n < n2 / 8; n *= 2)
-		;
-	swhash = malloc(sizeof(struct swblock *) * n, M_VMPGDATA, M_WAITOK | M_ZERO);
-	swhash_mask = n - 1;
-	mtx_init(&swhash_mtx, "swap_pager swhash", NULL, MTX_DEF);
+	n = uma_zone_get_max(swblk_zone);
+
+	if (n < n2)
+		printf("Swap blk zone entries reduced from %lu to %lu.\n",
+		    n2, n);
+	swap_maxpages = n * SWAP_META_PAGES;
+	swzone = n * sizeof(struct swblk);
+	if (!uma_zone_reserve_kva(swpctrie_zone, n))
+		printf("Cannot reserve swap pctrie zone, "
+		    "reduce kern.maxswzone.\n");
 }
 
 static vm_object_t
@@ -581,14 +569,20 @@ swap_pager_alloc_init(void *handle, struct ucred *cred, vm_ooffset_t size,
 			return (NULL);
 		crhold(cred);
 	}
+
+	/*
+	 * The un_pager.swp.swp_blks trie is initialized by
+	 * vm_object_allocate() to ensure the correct order of
+	 * visibility to other threads.
+	 */
 	object = vm_object_allocate(OBJT_SWAP, OFF_TO_IDX(offset +
 	    PAGE_MASK + size));
+
 	object->handle = handle;
 	if (cred != NULL) {
 		object->cred = cred;
 		object->charge = size;
 	}
-	object->un_pager.swp.swp_bcount = 0;
 	return (object);
 }
 
@@ -798,6 +792,36 @@ swp_pager_freeswapspace(daddr_t blk, int npages)
 }
 
 /*
+ * SYSCTL_SWAP_FRAGMENTATION() -	produce raw swap space stats
+ */
+static int
+sysctl_swap_fragmentation(SYSCTL_HANDLER_ARGS)
+{
+	struct sbuf sbuf;
+	struct swdevt *sp;
+	const char *devname;
+	int error;
+
+	error = sysctl_wire_old_buffer(req, 0);
+	if (error != 0)
+		return (error);
+	sbuf_new_for_sysctl(&sbuf, NULL, 128, req);
+	mtx_lock(&sw_dev_mtx);
+	TAILQ_FOREACH(sp, &swtailq, sw_list) {
+		if (vn_isdisk(sp->sw_vp, NULL))
+			devname = devtoname(sp->sw_vp->v_rdev);
+		else
+			devname = "[file]";
+		sbuf_printf(&sbuf, "\nFree space on device %s:\n", devname);
+		blist_stats(sp->sw_blist, &sbuf);
+	}
+	mtx_unlock(&sw_dev_mtx);
+	error = sbuf_finish(&sbuf);
+	sbuf_delete(&sbuf);
+	return (error);
+}
+
+/*
  * SWAP_PAGER_FREESPACE() -	frees swap blocks associated with a page
  *				range within an object.
  *
@@ -1507,7 +1531,7 @@ swp_pager_async_iodone(struct buf *bp)
 				 * so it doesn't clog the inactive list,
 				 * then finish the I/O.
 				 */
-				vm_page_dirty(m);
+				MPASS(m->dirty == VM_PAGE_BITS_ALL);
 				vm_page_lock(m);
 				vm_page_activate(m);
 				vm_page_unlock(m);
@@ -1643,50 +1667,56 @@ swp_pager_force_pagein(vm_object_t object, vm_pindex_t pindex)
 static void
 swap_pager_swapoff(struct swdevt *sp)
 {
-	struct swblock *swap;
-	vm_object_t locked_obj, object;
-	vm_pindex_t pindex;
-	int i, j, retries;
+	struct swblk *sb;
+	vm_object_t object;
+	vm_pindex_t pi;
+	int i, retries;
 
 	sx_assert(&swdev_syscall_lock, SA_XLOCKED);
 
 	retries = 0;
-	locked_obj = NULL;
 full_rescan:
-	mtx_lock(&swhash_mtx);
-	for (i = 0; i <= swhash_mask; i++) { /* '<=' is correct here */
-restart:
-		for (swap = swhash[i]; swap != NULL; swap = swap->swb_hnext) {
-			object = swap->swb_object;
-			pindex = swap->swb_index;
-			for (j = 0; j < SWAP_META_PAGES; ++j) {
-				if (!swp_pager_isondev(swap->swb_pages[j], sp))
+	mtx_lock(&vm_object_list_mtx);
+	TAILQ_FOREACH(object, &vm_object_list, object_list) {
+		if (object->type != OBJT_SWAP)
+			continue;
+		mtx_unlock(&vm_object_list_mtx);
+		/* Depends on type-stability. */
+		VM_OBJECT_WLOCK(object);
+
+		/*
+		 * Dead objects are eventually terminated on their own.
+		 */
+		if ((object->flags & OBJ_DEAD) != 0)
+			goto next_obj;
+
+		/*
+		 * Sync with fences placed after pctrie
+		 * initialization.  We must not access pctrie below
+		 * unless we checked that our object is swap and not
+		 * dead.
+		 */
+		atomic_thread_fence_acq();
+		if (object->type != OBJT_SWAP)
+			goto next_obj;
+
+		for (pi = 0; (sb = SWAP_PCTRIE_LOOKUP_GE(
+		    &object->un_pager.swp.swp_blks, pi)) != NULL; ) {
+			pi = sb->p + SWAP_META_PAGES;
+			for (i = 0; i < SWAP_META_PAGES; i++) {
+				if (sb->d[i] == SWAPBLK_NONE)
 					continue;
-				if (locked_obj != object) {
-					if (locked_obj != NULL)
-						VM_OBJECT_WUNLOCK(locked_obj);
-					locked_obj = object;
-					if (!VM_OBJECT_TRYWLOCK(object)) {
-						mtx_unlock(&swhash_mtx);
-						/* Depends on type-stability. */
-						VM_OBJECT_WLOCK(object);
-						mtx_lock(&swhash_mtx);
-						goto restart;
-					}
-				}
-				MPASS(locked_obj == object);
-				mtx_unlock(&swhash_mtx);
-				swp_pager_force_pagein(object, pindex + j);
-				mtx_lock(&swhash_mtx);
-				goto restart;
+				if (swp_pager_isondev(sb->d[i], sp))
+					swp_pager_force_pagein(object,
+					    sb->p + i);
 			}
 		}
+next_obj:
+		VM_OBJECT_WUNLOCK(object);
+		mtx_lock(&vm_object_list_mtx);
 	}
-	mtx_unlock(&swhash_mtx);
-	if (locked_obj != NULL) {
-		VM_OBJECT_WUNLOCK(locked_obj);
-		locked_obj = NULL;
-	}
+	mtx_unlock(&vm_object_list_mtx);
+
 	if (sp->sw_used) {
 		/*
 		 * Objects may be locked or paging to the device being
@@ -1729,85 +1759,120 @@ restart:
 static void
 swp_pager_meta_build(vm_object_t object, vm_pindex_t pindex, daddr_t swapblk)
 {
-	static volatile int exhausted;
-	struct swblock *swap;
-	struct swblock **pswap;
-	int idx;
+	static volatile int swblk_zone_exhausted, swpctrie_zone_exhausted;
+	struct swblk *sb, *sb1;
+	vm_pindex_t modpi, rdpi;
+	int error, i;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
+
 	/*
 	 * Convert default object to swap object if necessary
 	 */
 	if (object->type != OBJT_SWAP) {
+		pctrie_init(&object->un_pager.swp.swp_blks);
+
+		/*
+		 * Ensure that swap_pager_swapoff()'s iteration over
+		 * object_list does not see a garbage pctrie.
+		 */
+		atomic_thread_fence_rel();
+
 		object->type = OBJT_SWAP;
-		object->un_pager.swp.swp_bcount = 0;
 		KASSERT(object->handle == NULL, ("default pager with handle"));
 	}
 
-	/*
-	 * Locate hash entry.  If not found create, but if we aren't adding
-	 * anything just return.  If we run out of space in the map we wait
-	 * and, since the hash table may have changed, retry.
-	 */
-retry:
-	mtx_lock(&swhash_mtx);
-	pswap = swp_pager_hash(object, pindex);
-
-	if ((swap = *pswap) == NULL) {
-		int i;
-
+	rdpi = rounddown(pindex, SWAP_META_PAGES);
+	sb = SWAP_PCTRIE_LOOKUP(&object->un_pager.swp.swp_blks, rdpi);
+	if (sb == NULL) {
 		if (swapblk == SWAPBLK_NONE)
-			goto done;
-
-		swap = *pswap = uma_zalloc(swap_zone, M_NOWAIT |
-		    (curproc == pageproc ? M_USE_RESERVE : 0));
-		if (swap == NULL) {
-			mtx_unlock(&swhash_mtx);
+			return;
+		for (;;) {
+			sb = uma_zalloc(swblk_zone, M_NOWAIT | (curproc ==
+			    pageproc ? M_USE_RESERVE : 0));
+			if (sb != NULL) {
+				sb->p = rdpi;
+				for (i = 0; i < SWAP_META_PAGES; i++)
+					sb->d[i] = SWAPBLK_NONE;
+				if (atomic_cmpset_int(&swblk_zone_exhausted,
+				    1, 0))
+					printf("swblk zone ok\n");
+				break;
+			}
 			VM_OBJECT_WUNLOCK(object);
-			if (uma_zone_exhausted(swap_zone)) {
-				if (atomic_cmpset_int(&exhausted, 0, 1))
-					printf("swap zone exhausted, "
+			if (uma_zone_exhausted(swblk_zone)) {
+				if (atomic_cmpset_int(&swblk_zone_exhausted,
+				    0, 1))
+					printf("swap blk zone exhausted, "
 					    "increase kern.maxswzone\n");
 				vm_pageout_oom(VM_OOM_SWAPZ);
-				pause("swzonex", 10);
+				pause("swzonxb", 10);
 			} else
 				VM_WAIT;
 			VM_OBJECT_WLOCK(object);
-			goto retry;
+			sb = SWAP_PCTRIE_LOOKUP(&object->un_pager.swp.swp_blks,
+			    rdpi);
+			if (sb != NULL)
+				/*
+				 * Somebody swapped out a nearby page,
+				 * allocating swblk at the rdpi index,
+				 * while we dropped the object lock.
+				 */
+				goto allocated;
+		}
+		for (;;) {
+			error = SWAP_PCTRIE_INSERT(
+			    &object->un_pager.swp.swp_blks, sb);
+			if (error == 0) {
+				if (atomic_cmpset_int(&swpctrie_zone_exhausted,
+				    1, 0))
+					printf("swpctrie zone ok\n");
+				break;
+			}
+			VM_OBJECT_WUNLOCK(object);
+			if (uma_zone_exhausted(swpctrie_zone)) {
+				if (atomic_cmpset_int(&swpctrie_zone_exhausted,
+				    0, 1))
+					printf("swap pctrie zone exhausted, "
+					    "increase kern.maxswzone\n");
+				vm_pageout_oom(VM_OOM_SWAPZ);
+				pause("swzonxp", 10);
+			} else
+				VM_WAIT;
+			VM_OBJECT_WLOCK(object);
+			sb1 = SWAP_PCTRIE_LOOKUP(&object->un_pager.swp.swp_blks,
+			    rdpi);
+			if (sb1 != NULL) {
+				uma_zfree(swblk_zone, sb);
+				sb = sb1;
+				goto allocated;
+			}
 		}
-
-		if (atomic_cmpset_int(&exhausted, 1, 0))
-			printf("swap zone ok\n");
-
-		swap->swb_hnext = NULL;
-		swap->swb_object = object;
-		swap->swb_index = pindex & ~(vm_pindex_t)SWAP_META_MASK;
-		swap->swb_count = 0;
-
-		++object->un_pager.swp.swp_bcount;
-
-		for (i = 0; i < SWAP_META_PAGES; ++i)
-			swap->swb_pages[i] = SWAPBLK_NONE;
 	}
+allocated:
+	MPASS(sb->p == rdpi);
 
-	/*
-	 * Delete prior contents of metadata
-	 */
-	idx = pindex & SWAP_META_MASK;
-
-	if (swap->swb_pages[idx] != SWAPBLK_NONE) {
-		swp_pager_freeswapspace(swap->swb_pages[idx], 1);
-		--swap->swb_count;
-	}
+	modpi = pindex % SWAP_META_PAGES;
+	/* Delete prior contents of metadata. */
+	if (sb->d[modpi] != SWAPBLK_NONE)
+		swp_pager_freeswapspace(sb->d[modpi], 1);
+	/* Enter block into metadata. */
+	sb->d[modpi] = swapblk;
 
 	/*
-	 * Enter block into metadata
+	 * Free the swblk if we end up with the empty page run.
 	 */
-	swap->swb_pages[idx] = swapblk;
-	if (swapblk != SWAPBLK_NONE)
-		++swap->swb_count;
-done:
-	mtx_unlock(&swhash_mtx);
+	if (swapblk == SWAPBLK_NONE) {
+		for (i = 0; i < SWAP_META_PAGES; i++) {
+			if (sb->d[i] != SWAPBLK_NONE)
+				break;
+		}
+		if (i == SWAP_META_PAGES) {
+			SWAP_PCTRIE_REMOVE(&object->un_pager.swp.swp_blks,
+			    rdpi);
+			uma_zfree(swblk_zone, sb);
+		}
+	}
 }
 
 /*
@@ -1821,42 +1886,40 @@ done:
  *	with resident pages.
  */
 static void
-swp_pager_meta_free(vm_object_t object, vm_pindex_t index, vm_pindex_t count)
+swp_pager_meta_free(vm_object_t object, vm_pindex_t pindex, vm_pindex_t count)
 {
-	struct swblock **pswap, *swap;
-	vm_pindex_t c;
-	daddr_t v;
-	int n, sidx;
+	struct swblk *sb;
+	vm_pindex_t last;
+	int i;
+	bool empty;
 
-	VM_OBJECT_ASSERT_LOCKED(object);
+	VM_OBJECT_ASSERT_WLOCKED(object);
 	if (object->type != OBJT_SWAP || count == 0)
 		return;
 
-	mtx_lock(&swhash_mtx);
-	for (c = 0; c < count;) {
-		pswap = swp_pager_hash(object, index);
-		sidx = index & SWAP_META_MASK;
-		n = SWAP_META_PAGES - sidx;
-		index += n;
-		if ((swap = *pswap) == NULL) {
-			c += n;
-			continue;
-		}
-		for (; c < count && sidx < SWAP_META_PAGES; ++c, ++sidx) {
-			if ((v = swap->swb_pages[sidx]) == SWAPBLK_NONE)
+	last = pindex + count - 1;
+	for (;;) {
+		sb = SWAP_PCTRIE_LOOKUP_GE(&object->un_pager.swp.swp_blks,
+		    rounddown(pindex, SWAP_META_PAGES));
+		if (sb == NULL || sb->p > last)
+			break;
+		empty = true;
+		for (i = 0; i < SWAP_META_PAGES; i++) {
+			if (sb->d[i] == SWAPBLK_NONE)
 				continue;
-			swp_pager_freeswapspace(v, 1);
-			swap->swb_pages[sidx] = SWAPBLK_NONE;
-			if (--swap->swb_count == 0) {
-				*pswap = swap->swb_hnext;
-				uma_zfree(swap_zone, swap);
-				--object->un_pager.swp.swp_bcount;
-				c += SWAP_META_PAGES - sidx;
-				break;
-			}
+			if (pindex <= sb->p + i && sb->p + i <= last) {
+				swp_pager_freeswapspace(sb->d[i], 1);
+				sb->d[i] = SWAPBLK_NONE;
+			} else
+				empty = false;
+		}
+		pindex = sb->p + SWAP_META_PAGES;
+		if (empty) {
+			SWAP_PCTRIE_REMOVE(&object->un_pager.swp.swp_blks,
+			    sb->p);
+			uma_zfree(swblk_zone, sb);
 		}
 	}
-	mtx_unlock(&swhash_mtx);
 }
 
 /*
@@ -1868,36 +1931,23 @@ swp_pager_meta_free(vm_object_t object, vm_pindex_t index, vm_pindex_t count)
 static void
 swp_pager_meta_free_all(vm_object_t object)
 {
-	struct swblock **pswap, *swap;
-	vm_pindex_t index;
-	daddr_t v;
+	struct swblk *sb;
+	vm_pindex_t pindex;
 	int i;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	if (object->type != OBJT_SWAP)
 		return;
 
-	index = 0;
-	while (object->un_pager.swp.swp_bcount != 0) {
-		mtx_lock(&swhash_mtx);
-		pswap = swp_pager_hash(object, index);
-		if ((swap = *pswap) != NULL) {
-			for (i = 0; i < SWAP_META_PAGES; ++i) {
-				v = swap->swb_pages[i];
-				if (v != SWAPBLK_NONE) {
-					--swap->swb_count;
-					swp_pager_freeswapspace(v, 1);
-				}
-			}
-			if (swap->swb_count != 0)
-				panic(
-				    "swap_pager_meta_free_all: swb_count != 0");
-			*pswap = swap->swb_hnext;
-			uma_zfree(swap_zone, swap);
-			--object->un_pager.swp.swp_bcount;
+	for (pindex = 0; (sb = SWAP_PCTRIE_LOOKUP_GE(
+	    &object->un_pager.swp.swp_blks, pindex)) != NULL;) {
+		pindex = sb->p + SWAP_META_PAGES;
+		for (i = 0; i < SWAP_META_PAGES; i++) {
+			if (sb->d[i] != SWAPBLK_NONE)
+				swp_pager_freeswapspace(sb->d[i], 1);
 		}
-		mtx_unlock(&swhash_mtx);
-		index += SWAP_META_PAGES;
+		SWAP_PCTRIE_REMOVE(&object->un_pager.swp.swp_blks, sb->p);
+		uma_zfree(swblk_zone, sb);
 	}
 }
 
@@ -1911,9 +1961,6 @@ swp_pager_meta_free_all(vm_object_t object)
  *	was invalid.  This routine will automatically free any invalid
  *	meta-data swapblks.
  *
- *	It is not possible to store invalid swapblks in the swap meta data
- *	(other then a literal 'SWAPBLK_NONE'), so we don't bother checking.
- *
  *	When acting on a busy resident page and paging is in progress, we
  *	have to wait until paging is complete but otherwise can act on the
  *	busy page.
@@ -1924,43 +1971,45 @@ swp_pager_meta_free_all(vm_object_t object)
 static daddr_t
 swp_pager_meta_ctl(vm_object_t object, vm_pindex_t pindex, int flags)
 {
-	struct swblock **pswap;
-	struct swblock *swap;
+	struct swblk *sb;
 	daddr_t r1;
-	int idx;
+	int i;
+
+	if ((flags & (SWM_FREE | SWM_POP)) != 0)
+		VM_OBJECT_ASSERT_WLOCKED(object);
+	else
+		VM_OBJECT_ASSERT_LOCKED(object);
 
-	VM_OBJECT_ASSERT_LOCKED(object);
 	/*
-	 * The meta data only exists of the object is OBJT_SWAP
+	 * The meta data only exists if the object is OBJT_SWAP
 	 * and even then might not be allocated yet.
 	 */
 	if (object->type != OBJT_SWAP)
 		return (SWAPBLK_NONE);
 
-	r1 = SWAPBLK_NONE;
-	mtx_lock(&swhash_mtx);
-	pswap = swp_pager_hash(object, pindex);
-
-	if ((swap = *pswap) != NULL) {
-		idx = pindex & SWAP_META_MASK;
-		r1 = swap->swb_pages[idx];
-
-		if (r1 != SWAPBLK_NONE) {
-			if (flags & SWM_FREE) {
-				swp_pager_freeswapspace(r1, 1);
-				r1 = SWAPBLK_NONE;
-			}
-			if (flags & (SWM_FREE|SWM_POP)) {
-				swap->swb_pages[idx] = SWAPBLK_NONE;
-				if (--swap->swb_count == 0) {
-					*pswap = swap->swb_hnext;
-					uma_zfree(swap_zone, swap);
-					--object->un_pager.swp.swp_bcount;
-				}
-			}
+	sb = SWAP_PCTRIE_LOOKUP(&object->un_pager.swp.swp_blks,
+	    rounddown(pindex, SWAP_META_PAGES));
+	if (sb == NULL)
+		return (SWAPBLK_NONE);
+	r1 = sb->d[pindex % SWAP_META_PAGES];
+	if (r1 == SWAPBLK_NONE)
+		return (SWAPBLK_NONE);
+	if ((flags & (SWM_FREE | SWM_POP)) != 0) {
+		sb->d[pindex % SWAP_META_PAGES] = SWAPBLK_NONE;
+		for (i = 0; i < SWAP_META_PAGES; i++) {
+			if (sb->d[i] != SWAPBLK_NONE)
+				break;
+		}
+		if (i == SWAP_META_PAGES) {
+			SWAP_PCTRIE_REMOVE(&object->un_pager.swp.swp_blks,
+			    rounddown(pindex, SWAP_META_PAGES));
+			uma_zfree(swblk_zone, sb);
 		}
 	}
-	mtx_unlock(&swhash_mtx);
+	if ((flags & SWM_FREE) != 0) {
+		swp_pager_freeswapspace(r1, 1);
+		r1 = SWAPBLK_NONE;
+	}
 	return (r1);
 }
 
@@ -1974,32 +2023,38 @@ swp_pager_meta_ctl(vm_object_t object, vm_pindex_t pindex, int flags)
 vm_pindex_t
 swap_pager_find_least(vm_object_t object, vm_pindex_t pindex)
 {
-	struct swblock **pswap, *swap;
-	vm_pindex_t i, j, lim;
-	int idx;
+	struct swblk *sb;
+	int i;
 
 	VM_OBJECT_ASSERT_LOCKED(object);
-	if (object->type != OBJT_SWAP || object->un_pager.swp.swp_bcount == 0)
+	if (object->type != OBJT_SWAP)
 		return (object->size);
 
-	mtx_lock(&swhash_mtx);
-	for (j = pindex; j < object->size; j = lim) {
-		pswap = swp_pager_hash(object, j);
-		lim = rounddown2(j + SWAP_META_PAGES, SWAP_META_PAGES);
-		if (lim > object->size)
-			lim = object->size;
-		if ((swap = *pswap) != NULL) {
-			for (idx = j & SWAP_META_MASK, i = j; i < lim;
-			    i++, idx++) {
-				if (swap->swb_pages[idx] != SWAPBLK_NONE)
-					goto found;
-			}
+	sb = SWAP_PCTRIE_LOOKUP_GE(&object->un_pager.swp.swp_blks,
+	    rounddown(pindex, SWAP_META_PAGES));
+	if (sb == NULL)
+		return (object->size);
+	if (sb->p < pindex) {
+		for (i = pindex % SWAP_META_PAGES; i < SWAP_META_PAGES; i++) {
+			if (sb->d[i] != SWAPBLK_NONE)
+				return (sb->p + i);
 		}
+		sb = SWAP_PCTRIE_LOOKUP_GE(&object->un_pager.swp.swp_blks,
+		    roundup(pindex, SWAP_META_PAGES));
+		if (sb == NULL)
+			return (object->size);
 	}
-	i = object->size;
-found:
-	mtx_unlock(&swhash_mtx);
-	return (i);
+	for (i = 0; i < SWAP_META_PAGES; i++) {
+		if (sb->d[i] != SWAPBLK_NONE)
+			return (sb->p + i);
+	}
+
+	/*
+	 * We get here if a swblk is present in the trie but it
+	 * doesn't map any blocks.
+	 */
+	MPASS(0);
+	return (object->size);
 }
 
 /*
@@ -2035,7 +2090,7 @@ sys_swapon(struct thread *td, struct swapon_args *uap)
 	 * Swap metadata may not fit in the KVM if we have physical
 	 * memory of >1GB.
 	 */
-	if (swap_zone == NULL) {
+	if (swblk_zone == NULL) {
 		error = ENOMEM;
 		goto done;
 	}
@@ -2071,15 +2126,16 @@ done:
 /*
  * Check that the total amount of swap currently configured does not
  * exceed half the theoretical maximum.  If it does, print a warning
- * message and return -1; otherwise, return 0.
+ * message.
  */
-static int
-swapon_check_swzone(unsigned long npages)
+static void
+swapon_check_swzone(void)
 {
-	unsigned long maxpages;
+	unsigned long maxpages, npages;
 
+	npages = swap_total / PAGE_SIZE;
 	/* absolute maximum we can handle assuming 100% efficiency */
-	maxpages = uma_zone_get_max(swap_zone) * SWAP_META_PAGES;
+	maxpages = uma_zone_get_max(swblk_zone) * SWAP_META_PAGES;
 
 	/* recommend using no more than half that amount */
 	if (npages > maxpages / 2) {
@@ -2088,9 +2144,7 @@ swapon_check_swzone(unsigned long npages)
 		    npages, maxpages / 2);
 		printf("warning: increase kern.maxswzone "
 		    "or reduce amount of swap.\n");
-		return (-1);
 	}
-	return (0);
 }
 
 static void
@@ -2158,7 +2212,7 @@ swaponsomething(struct vnode *vp, void *id, u_long nblks,
 	nswapdev++;
 	swap_pager_avail += nblks - 2;
 	swap_total += (vm_ooffset_t)nblks * PAGE_SIZE;
-	swapon_check_swzone(swap_total / PAGE_SIZE);
+	swapon_check_swzone();
 	swp_sizecheck();
 	mtx_unlock(&sw_dev_mtx);
 }
@@ -2379,15 +2433,9 @@ SYSCTL_NODE(_vm, OID_AUTO, swap_info, CTLFLAG_RD | CTLFLAG_MPSAFE,
     "Swap statistics by device");
 
 /*
- * vmspace_swap_count() - count the approximate swap usage in pages for a
- *			  vmspace.
- *
- *	The map must be locked.
- *
- *	Swap usage is determined by taking the proportional swap used by
- *	VM objects backing the VM map.  To make up for fractional losses,
- *	if the VM object has any swap use at all the associated map entries
- *	count for at least 1 swap page.
+ * Count the approximate swap usage in pages for a vmspace.  The
+ * shadowed or not yet copied on write swap blocks are not accounted.
+ * The map must be locked.
  */
 long
 vmspace_swap_count(struct vmspace *vmspace)
@@ -2395,23 +2443,38 @@ vmspace_swap_count(struct vmspace *vmspace)
 	vm_map_t map;
 	vm_map_entry_t cur;
 	vm_object_t object;
-	long count, n;
+	struct swblk *sb;
+	vm_pindex_t e, pi;
+	long count;
+	int i;
 
 	map = &vmspace->vm_map;
 	count = 0;
 
 	for (cur = map->header.next; cur != &map->header; cur = cur->next) {
-		if ((cur->eflags & MAP_ENTRY_IS_SUB_MAP) == 0 &&
-		    (object = cur->object.vm_object) != NULL) {
-			VM_OBJECT_WLOCK(object);
-			if (object->type == OBJT_SWAP &&
-			    object->un_pager.swp.swp_bcount != 0) {
-				n = (cur->end - cur->start) / PAGE_SIZE;
-				count += object->un_pager.swp.swp_bcount *
-				    SWAP_META_PAGES * n / object->size + 1;
+		if ((cur->eflags & MAP_ENTRY_IS_SUB_MAP) != 0)
+			continue;
+		object = cur->object.vm_object;
+		if (object == NULL || object->type != OBJT_SWAP)
+			continue;
+		VM_OBJECT_RLOCK(object);
+		if (object->type != OBJT_SWAP)
+			goto unlock;
+		pi = OFF_TO_IDX(cur->offset);
+		e = pi + OFF_TO_IDX(cur->end - cur->start);
+		for (;; pi = sb->p + SWAP_META_PAGES) {
+			sb = SWAP_PCTRIE_LOOKUP_GE(
+			    &object->un_pager.swp.swp_blks, pi);
+			if (sb == NULL || sb->p >= e)
+				break;
+			for (i = 0; i < SWAP_META_PAGES; i++) {
+				if (sb->p + i < e &&
+				    sb->d[i] != SWAPBLK_NONE)
+					count++;
 			}
-			VM_OBJECT_WUNLOCK(object);
 		}
+unlock:
+		VM_OBJECT_RUNLOCK(object);
 	}
 	return (count);
 }
diff --git a/sys/vm/swap_pager.h b/sys/vm/swap_pager.h
index 0131297..1abded5 100644
--- a/sys/vm/swap_pager.h
+++ b/sys/vm/swap_pager.h
@@ -73,7 +73,6 @@ struct swdevt {
 
 #ifdef _KERNEL
 
-extern int swap_pager_full;
 extern int swap_pager_avail;
 
 struct xswdev;
diff --git a/sys/vm/uma.h b/sys/vm/uma.h
index f4c2de8..55b9e61 100644
--- a/sys/vm/uma.h
+++ b/sys/vm/uma.h
@@ -296,6 +296,7 @@ uma_zone_t uma_zcache_create(char *name, int size, uma_ctor ctor, uma_dtor dtor,
 #define UMA_ALIGN_SHORT	(sizeof(short) - 1)	/* "" short */
 #define UMA_ALIGN_CHAR	(sizeof(char) - 1)	/* "" char */
 #define UMA_ALIGN_CACHE	(0 - 1)			/* Cache line size align */
+#define	UMA_ALIGNOF(type) (_Alignof(type) - 1)	/* Alignment fit for 'type' */
 
 /*
  * Destroys an empty uma zone.  If the zone is not empty uma complains loudly.
diff --git a/sys/vm/uma_core.c b/sys/vm/uma_core.c
index 26439dc..8504a72 100644
--- a/sys/vm/uma_core.c
+++ b/sys/vm/uma_core.c
@@ -1326,10 +1326,6 @@ keg_large_init(uma_keg_t keg)
 	keg->uk_ipers = 1;
 	keg->uk_rsize = keg->uk_size;
 
-	/* We can't do OFFPAGE if we're internal, bail out here. */
-	if (keg->uk_flags & UMA_ZFLAG_INTERNAL)
-		return;
-
 	/* Check whether we have enough space to not do OFFPAGE. */
 	if ((keg->uk_flags & UMA_ZONE_OFFPAGE) == 0) {
 		shsize = sizeof(struct uma_slab);
@@ -1337,8 +1333,17 @@ keg_large_init(uma_keg_t keg)
 			shsize = (shsize & ~UMA_ALIGN_PTR) +
 			    (UMA_ALIGN_PTR + 1);
 
-		if ((PAGE_SIZE * keg->uk_ppera) - keg->uk_rsize < shsize)
-			keg->uk_flags |= UMA_ZONE_OFFPAGE;
+		if (PAGE_SIZE * keg->uk_ppera - keg->uk_rsize < shsize) {
+			/*
+			 * We can't do OFFPAGE if we're internal, in which case
+			 * we need an extra page per allocation to contain the
+			 * slab header.
+			 */
+			if ((keg->uk_flags & UMA_ZFLAG_INTERNAL) == 0)
+				keg->uk_flags |= UMA_ZONE_OFFPAGE;
+			else
+				keg->uk_ppera++;
+		}
 	}
 
 	if ((keg->uk_flags & UMA_ZONE_OFFPAGE) &&
diff --git a/sys/vm/uma_int.h b/sys/vm/uma_int.h
index 8423d1c..ee315ab 100644
--- a/sys/vm/uma_int.h
+++ b/sys/vm/uma_int.h
@@ -28,6 +28,7 @@
  *
  */
 
+#include <sys/_bitset.h>
 #include <sys/_task.h>
 
 /* 
@@ -210,7 +211,7 @@ struct uma_keg {
 	vm_offset_t	uk_kva;		/* Zone base KVA */
 	uma_zone_t	uk_slabzone;	/* Slab zone backing us, if OFFPAGE */
 
-	uint16_t	uk_pgoff;	/* Offset to uma_slab struct */
+	uint32_t	uk_pgoff;	/* Offset to uma_slab struct */
 	uint16_t	uk_ppera;	/* pages per allocation from backend */
 	uint16_t	uk_ipers;	/* Items per slab */
 	uint32_t	uk_flags;	/* Internal flags */
diff --git a/sys/vm/vm_extern.h b/sys/vm/vm_extern.h
index 0489b6e..8e55273 100644
--- a/sys/vm/vm_extern.h
+++ b/sys/vm/vm_extern.h
@@ -71,7 +71,6 @@ void kmem_init(vm_offset_t, vm_offset_t);
 void kmem_init_zero_region(void);
 void kmeminit(void);
 
-void swapout_procs(int);
 int kernacc(void *, int, int);
 int useracc(void *, int, int);
 int vm_fault(vm_map_t, vm_offset_t, vm_prot_t, int);
diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
index d2147f6..92c761b 100644
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
@@ -236,14 +236,15 @@ vm_fault_dirty(vm_map_entry_t entry, vm_page_t m, vm_prot_t prot,
 	 * written NOW so dirty it explicitly to save on
 	 * pmap_is_modified() calls later.
 	 *
-	 * Also tell the backing pager, if any, that it should remove
-	 * any swap backing since the page is now dirty.
+	 * Also, since the page is now dirty, we can possibly tell
+	 * the pager to release any swap backing the page.  Calling
+	 * the pager requires a write lock on the object.
 	 */
 	if (need_dirty)
 		vm_page_dirty(m);
 	if (!set_wd)
 		vm_page_unlock(m);
-	if (need_dirty)
+	else if (need_dirty)
 		vm_pager_page_unswapped(m);
 }
 
@@ -266,8 +267,12 @@ static int
 vm_fault_soft_fast(struct faultstate *fs, vm_offset_t vaddr, vm_prot_t prot,
     int fault_type, int fault_flags, boolean_t wired, vm_page_t *m_hold)
 {
-	vm_page_t m;
-	int rv;
+	vm_page_t m, m_map;
+#if defined(__amd64__) && VM_NRESERVLEVEL > 0
+	vm_page_t m_super;
+	int flags;
+#endif
+	int psind, rv;
 
 	MPASS(fs->vp == NULL);
 	m = vm_page_lookup(fs->first_object, fs->first_pindex);
@@ -275,14 +280,46 @@ vm_fault_soft_fast(struct faultstate *fs, vm_offset_t vaddr, vm_prot_t prot,
 	if (m == NULL || ((prot & VM_PROT_WRITE) != 0 &&
 	    vm_page_busied(m)) || m->valid != VM_PAGE_BITS_ALL)
 		return (KERN_FAILURE);
-	rv = pmap_enter(fs->map->pmap, vaddr, m, prot, fault_type |
-	    PMAP_ENTER_NOSLEEP | (wired ? PMAP_ENTER_WIRED : 0), 0);
+	m_map = m;
+	psind = 0;
+#if defined(__amd64__) && VM_NRESERVLEVEL > 0
+	if ((m->flags & PG_FICTITIOUS) == 0 &&
+	    (m_super = vm_reserv_to_superpage(m)) != NULL &&
+	    rounddown2(vaddr, pagesizes[m_super->psind]) >= fs->entry->start &&
+	    roundup2(vaddr + 1, pagesizes[m_super->psind]) <= fs->entry->end &&
+	    (vaddr & (pagesizes[m_super->psind] - 1)) == (VM_PAGE_TO_PHYS(m) &
+	    (pagesizes[m_super->psind] - 1)) &&
+	    pmap_ps_enabled(fs->map->pmap)) {
+		flags = PS_ALL_VALID;
+		if ((prot & VM_PROT_WRITE) != 0) {
+			/*
+			 * Create a superpage mapping allowing write access
+			 * only if none of the constituent pages are busy and
+			 * all of them are already dirty (except possibly for
+			 * the page that was faulted on).
+			 */
+			flags |= PS_NONE_BUSY;
+			if ((fs->first_object->flags & OBJ_UNMANAGED) == 0)
+				flags |= PS_ALL_DIRTY;
+		}
+		if (vm_page_ps_test(m_super, flags, m)) {
+			m_map = m_super;
+			psind = m_super->psind;
+			vaddr = rounddown2(vaddr, pagesizes[psind]);
+			/* Preset the modified bit for dirty superpages. */
+			if ((flags & PS_ALL_DIRTY) != 0)
+				fault_type |= VM_PROT_WRITE;
+		}
+	}
+#endif
+	rv = pmap_enter(fs->map->pmap, vaddr, m_map, prot, fault_type |
+	    PMAP_ENTER_NOSLEEP | (wired ? PMAP_ENTER_WIRED : 0), psind);
 	if (rv != KERN_SUCCESS)
 		return (rv);
 	vm_fault_fill_hold(m_hold, m);
 	vm_fault_dirty(fs->entry, m, prot, fault_type, fault_flags, false);
 	VM_OBJECT_RUNLOCK(fs->first_object);
-	if (!wired)
+	if (psind == 0 && !wired)
 		vm_fault_prefault(fs, vaddr, PFBAK, PFFOR);
 	vm_map_lookup_done(fs->map, fs->entry);
 	curthread->td_ru.ru_minflt++;
diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c
index 60b822e..14ec78a 100644
--- a/sys/vm/vm_glue.c
+++ b/sys/vm/vm_glue.c
@@ -101,13 +101,6 @@ __FBSDID("$FreeBSD$");
 
 #include <machine/cpu.h>
 
-#ifndef NO_SWAPPING
-static int swapout(struct proc *);
-static void swapclear(struct proc *);
-static void vm_thread_swapin(struct thread *td);
-static void vm_thread_swapout(struct thread *td);
-#endif
-
 /*
  * MPSAFE
  *
@@ -308,10 +301,6 @@ SYSCTL_INT(_vm, OID_AUTO, kstack_cache_size, CTLFLAG_RW, &kstack_cache_size, 0,
 SYSCTL_INT(_vm, OID_AUTO, kstacks, CTLFLAG_RD, &kstacks, 0,
     "");
 
-#ifndef KSTACK_MAX_PAGES
-#define KSTACK_MAX_PAGES 32
-#endif
-
 /*
  * Create the kernel stack (including pcb for i386) for a new thread.
  * This routine directly affects the fork perf for a process and
@@ -322,7 +311,7 @@ vm_thread_new(struct thread *td, int pages)
 {
 	vm_object_t ksobj;
 	vm_offset_t ks;
-	vm_page_t m, ma[KSTACK_MAX_PAGES];
+	vm_page_t ma[KSTACK_MAX_PAGES];
 	struct kstack_cache_entry *ks_ce;
 	int i;
 
@@ -391,15 +380,10 @@ vm_thread_new(struct thread *td, int pages)
 	 * page of stack.
 	 */
 	VM_OBJECT_WLOCK(ksobj);
-	for (i = 0; i < pages; i++) {
-		/*
-		 * Get a kernel stack page.
-		 */
-		m = vm_page_grab(ksobj, i, VM_ALLOC_NOBUSY |
-		    VM_ALLOC_NORMAL | VM_ALLOC_WIRED);
-		ma[i] = m;
-		m->valid = VM_PAGE_BITS_ALL;
-	}
+	(void)vm_page_grab_pages(ksobj, 0, VM_ALLOC_NORMAL | VM_ALLOC_NOBUSY |
+	    VM_ALLOC_WIRED, ma, pages);
+	for (i = 0; i < pages; i++)
+		ma[i]->valid = VM_PAGE_BITS_ALL;
 	VM_OBJECT_WUNLOCK(ksobj);
 	pmap_qenter(ks, ma, pages);
 	return (1);
@@ -532,80 +516,6 @@ intr_prof_stack_use(struct thread *td, struct trapframe *frame)
 }
 #endif /* KSTACK_USAGE_PROF */
 
-#ifndef NO_SWAPPING
-/*
- * Allow a thread's kernel stack to be paged out.
- */
-static void
-vm_thread_swapout(struct thread *td)
-{
-	vm_object_t ksobj;
-	vm_page_t m;
-	int i, pages;
-
-	cpu_thread_swapout(td);
-	pages = td->td_kstack_pages;
-	ksobj = td->td_kstack_obj;
-	pmap_qremove(td->td_kstack, pages);
-	VM_OBJECT_WLOCK(ksobj);
-	for (i = 0; i < pages; i++) {
-		m = vm_page_lookup(ksobj, i);
-		if (m == NULL)
-			panic("vm_thread_swapout: kstack already missing?");
-		vm_page_dirty(m);
-		vm_page_lock(m);
-		vm_page_unwire(m, PQ_INACTIVE);
-		vm_page_unlock(m);
-	}
-	VM_OBJECT_WUNLOCK(ksobj);
-}
-
-/*
- * Bring the kernel stack for a specified thread back in.
- */
-static void
-vm_thread_swapin(struct thread *td)
-{
-	vm_object_t ksobj;
-	vm_page_t ma[KSTACK_MAX_PAGES];
-	int pages;
-
-	pages = td->td_kstack_pages;
-	ksobj = td->td_kstack_obj;
-	VM_OBJECT_WLOCK(ksobj);
-	for (int i = 0; i < pages; i++)
-		ma[i] = vm_page_grab(ksobj, i, VM_ALLOC_NORMAL |
-		    VM_ALLOC_WIRED);
-	for (int i = 0; i < pages;) {
-		int j, a, count, rv;
-
-		vm_page_assert_xbusied(ma[i]);
-		if (ma[i]->valid == VM_PAGE_BITS_ALL) {
-			vm_page_xunbusy(ma[i]);
-			i++;
-			continue;
-		}
-		vm_object_pip_add(ksobj, 1);
-		for (j = i + 1; j < pages; j++)
-			if (ma[j]->valid == VM_PAGE_BITS_ALL)
-				break;
-		rv = vm_pager_has_page(ksobj, ma[i]->pindex, NULL, &a);
-		KASSERT(rv == 1, ("%s: missing page %p", __func__, ma[i]));
-		count = min(a + 1, j - i);
-		rv = vm_pager_get_pages(ksobj, ma + i, count, NULL, NULL);
-		KASSERT(rv == VM_PAGER_OK, ("%s: cannot get kstack for proc %d",
-		    __func__, td->td_proc->p_pid));
-		vm_object_pip_wakeup(ksobj);
-		for (j = i; j < i + count; j++)
-			vm_page_xunbusy(ma[j]);
-		i += count;
-	}
-	VM_OBJECT_WUNLOCK(ksobj);
-	pmap_qenter(td->td_kstack, ma, pages);
-	cpu_thread_swapin(td);
-}
-#endif /* !NO_SWAPPING */
-
 /*
  * Implement fork's actions on an address space.
  * Here we arrange for the address space to be copied or referenced,
@@ -679,425 +589,8 @@ vm_waitproc(p)
 }
 
 void
-faultin(p)
-	struct proc *p;
-{
-#ifdef NO_SWAPPING
-
-	PROC_LOCK_ASSERT(p, MA_OWNED);
-	if ((p->p_flag & P_INMEM) == 0)
-		panic("faultin: proc swapped out with NO_SWAPPING!");
-#else /* !NO_SWAPPING */
-	struct thread *td;
-
-	PROC_LOCK_ASSERT(p, MA_OWNED);
-	/*
-	 * If another process is swapping in this process,
-	 * just wait until it finishes.
-	 */
-	if (p->p_flag & P_SWAPPINGIN) {
-		while (p->p_flag & P_SWAPPINGIN)
-			msleep(&p->p_flag, &p->p_mtx, PVM, "faultin", 0);
-		return;
-	}
-	if ((p->p_flag & P_INMEM) == 0) {
-		/*
-		 * Don't let another thread swap process p out while we are
-		 * busy swapping it in.
-		 */
-		++p->p_lock;
-		p->p_flag |= P_SWAPPINGIN;
-		PROC_UNLOCK(p);
-
-		/*
-		 * We hold no lock here because the list of threads
-		 * can not change while all threads in the process are
-		 * swapped out.
-		 */
-		FOREACH_THREAD_IN_PROC(p, td)
-			vm_thread_swapin(td);
-		PROC_LOCK(p);
-		swapclear(p);
-		p->p_swtick = ticks;
-
-		wakeup(&p->p_flag);
-
-		/* Allow other threads to swap p out now. */
-		--p->p_lock;
-	}
-#endif /* NO_SWAPPING */
-}
-
-/*
- * This swapin algorithm attempts to swap-in processes only if there
- * is enough space for them.  Of course, if a process waits for a long
- * time, it will be swapped in anyway.
- */
-void
-swapper(void)
-{
-	struct proc *p;
-	struct thread *td;
-	struct proc *pp;
-	int slptime;
-	int swtime;
-	int ppri;
-	int pri;
-
-loop:
-	if (vm_page_count_min()) {
-		VM_WAIT;
-		goto loop;
-	}
-
-	pp = NULL;
-	ppri = INT_MIN;
-	sx_slock(&allproc_lock);
-	FOREACH_PROC_IN_SYSTEM(p) {
-		PROC_LOCK(p);
-		if (p->p_state == PRS_NEW ||
-		    p->p_flag & (P_SWAPPINGOUT | P_SWAPPINGIN | P_INMEM)) {
-			PROC_UNLOCK(p);
-			continue;
-		}
-		swtime = (ticks - p->p_swtick) / hz;
-		FOREACH_THREAD_IN_PROC(p, td) {
-			/*
-			 * An otherwise runnable thread of a process
-			 * swapped out has only the TDI_SWAPPED bit set.
-			 * 
-			 */
-			thread_lock(td);
-			if (td->td_inhibitors == TDI_SWAPPED) {
-				slptime = (ticks - td->td_slptick) / hz;
-				pri = swtime + slptime;
-				if ((td->td_flags & TDF_SWAPINREQ) == 0)
-					pri -= p->p_nice * 8;
-				/*
-				 * if this thread is higher priority
-				 * and there is enough space, then select
-				 * this process instead of the previous
-				 * selection.
-				 */
-				if (pri > ppri) {
-					pp = p;
-					ppri = pri;
-				}
-			}
-			thread_unlock(td);
-		}
-		PROC_UNLOCK(p);
-	}
-	sx_sunlock(&allproc_lock);
-
-	/*
-	 * Nothing to do, back to sleep.
-	 */
-	if ((p = pp) == NULL) {
-		tsleep(&proc0, PVM, "swapin", MAXSLP * hz / 2);
-		goto loop;
-	}
-	PROC_LOCK(p);
-
-	/*
-	 * Another process may be bringing or may have already
-	 * brought this process in while we traverse all threads.
-	 * Or, this process may even be being swapped out again.
-	 */
-	if (p->p_flag & (P_INMEM | P_SWAPPINGOUT | P_SWAPPINGIN)) {
-		PROC_UNLOCK(p);
-		goto loop;
-	}
-
-	/*
-	 * We would like to bring someone in. (only if there is space).
-	 * [What checks the space? ]
-	 */
-	faultin(p);
-	PROC_UNLOCK(p);
-	goto loop;
-}
-
-void
 kick_proc0(void)
 {
 
 	wakeup(&proc0);
 }
-
-#ifndef NO_SWAPPING
-
-/*
- * Swap_idle_threshold1 is the guaranteed swapped in time for a process
- */
-static int swap_idle_threshold1 = 2;
-SYSCTL_INT(_vm, OID_AUTO, swap_idle_threshold1, CTLFLAG_RW,
-    &swap_idle_threshold1, 0, "Guaranteed swapped in time for a process");
-
-/*
- * Swap_idle_threshold2 is the time that a process can be idle before
- * it will be swapped out, if idle swapping is enabled.
- */
-static int swap_idle_threshold2 = 10;
-SYSCTL_INT(_vm, OID_AUTO, swap_idle_threshold2, CTLFLAG_RW,
-    &swap_idle_threshold2, 0, "Time before a process will be swapped out");
-
-/*
- * First, if any processes have been sleeping or stopped for at least
- * "swap_idle_threshold1" seconds, they are swapped out.  If, however,
- * no such processes exist, then the longest-sleeping or stopped
- * process is swapped out.  Finally, and only as a last resort, if
- * there are no sleeping or stopped processes, the longest-resident
- * process is swapped out.
- */
-void
-swapout_procs(action)
-int action;
-{
-	struct proc *p;
-	struct thread *td;
-	int didswap = 0;
-
-retry:
-	sx_slock(&allproc_lock);
-	FOREACH_PROC_IN_SYSTEM(p) {
-		struct vmspace *vm;
-		int minslptime = 100000;
-		int slptime;
-
-		PROC_LOCK(p);
-		/*
-		 * Watch out for a process in
-		 * creation.  It may have no
-		 * address space or lock yet.
-		 */
-		if (p->p_state == PRS_NEW) {
-			PROC_UNLOCK(p);
-			continue;
-		}
-		/*
-		 * An aio daemon switches its
-		 * address space while running.
-		 * Perform a quick check whether
-		 * a process has P_SYSTEM.
-		 * Filter out exiting processes.
-		 */
-		if ((p->p_flag & (P_SYSTEM | P_WEXIT)) != 0) {
-			PROC_UNLOCK(p);
-			continue;
-		}
-		_PHOLD_LITE(p);
-		PROC_UNLOCK(p);
-		sx_sunlock(&allproc_lock);
-
-		/*
-		 * Do not swapout a process that
-		 * is waiting for VM data
-		 * structures as there is a possible
-		 * deadlock.  Test this first as
-		 * this may block.
-		 *
-		 * Lock the map until swapout
-		 * finishes, or a thread of this
-		 * process may attempt to alter
-		 * the map.
-		 */
-		vm = vmspace_acquire_ref(p);
-		if (vm == NULL)
-			goto nextproc2;
-		if (!vm_map_trylock(&vm->vm_map))
-			goto nextproc1;
-
-		PROC_LOCK(p);
-		if (p->p_lock != 1 || (p->p_flag & (P_STOPPED_SINGLE |
-		    P_TRACED | P_SYSTEM)) != 0)
-			goto nextproc;
-
-		/*
-		 * only aiod changes vmspace, however it will be
-		 * skipped because of the if statement above checking 
-		 * for P_SYSTEM
-		 */
-		if ((p->p_flag & (P_INMEM|P_SWAPPINGOUT|P_SWAPPINGIN)) != P_INMEM)
-			goto nextproc;
-
-		switch (p->p_state) {
-		default:
-			/* Don't swap out processes in any sort
-			 * of 'special' state. */
-			break;
-
-		case PRS_NORMAL:
-			/*
-			 * do not swapout a realtime process
-			 * Check all the thread groups..
-			 */
-			FOREACH_THREAD_IN_PROC(p, td) {
-				thread_lock(td);
-				if (PRI_IS_REALTIME(td->td_pri_class)) {
-					thread_unlock(td);
-					goto nextproc;
-				}
-				slptime = (ticks - td->td_slptick) / hz;
-				/*
-				 * Guarantee swap_idle_threshold1
-				 * time in memory.
-				 */
-				if (slptime < swap_idle_threshold1) {
-					thread_unlock(td);
-					goto nextproc;
-				}
-
-				/*
-				 * Do not swapout a process if it is
-				 * waiting on a critical event of some
-				 * kind or there is a thread whose
-				 * pageable memory may be accessed.
-				 *
-				 * This could be refined to support
-				 * swapping out a thread.
-				 */
-				if (!thread_safetoswapout(td)) {
-					thread_unlock(td);
-					goto nextproc;
-				}
-				/*
-				 * If the system is under memory stress,
-				 * or if we are swapping
-				 * idle processes >= swap_idle_threshold2,
-				 * then swap the process out.
-				 */
-				if (((action & VM_SWAP_NORMAL) == 0) &&
-				    (((action & VM_SWAP_IDLE) == 0) ||
-				    (slptime < swap_idle_threshold2))) {
-					thread_unlock(td);
-					goto nextproc;
-				}
-
-				if (minslptime > slptime)
-					minslptime = slptime;
-				thread_unlock(td);
-			}
-
-			/*
-			 * If the pageout daemon didn't free enough pages,
-			 * or if this process is idle and the system is
-			 * configured to swap proactively, swap it out.
-			 */
-			if ((action & VM_SWAP_NORMAL) ||
-				((action & VM_SWAP_IDLE) &&
-				 (minslptime > swap_idle_threshold2))) {
-				_PRELE(p);
-				if (swapout(p) == 0)
-					didswap++;
-				PROC_UNLOCK(p);
-				vm_map_unlock(&vm->vm_map);
-				vmspace_free(vm);
-				goto retry;
-			}
-		}
-nextproc:
-		PROC_UNLOCK(p);
-		vm_map_unlock(&vm->vm_map);
-nextproc1:
-		vmspace_free(vm);
-nextproc2:
-		sx_slock(&allproc_lock);
-		PRELE(p);
-	}
-	sx_sunlock(&allproc_lock);
-	/*
-	 * If we swapped something out, and another process needed memory,
-	 * then wakeup the sched process.
-	 */
-	if (didswap)
-		wakeup(&proc0);
-}
-
-static void
-swapclear(p)
-	struct proc *p;
-{
-	struct thread *td;
-
-	PROC_LOCK_ASSERT(p, MA_OWNED);
-
-	FOREACH_THREAD_IN_PROC(p, td) {
-		thread_lock(td);
-		td->td_flags |= TDF_INMEM;
-		td->td_flags &= ~TDF_SWAPINREQ;
-		TD_CLR_SWAPPED(td);
-		if (TD_CAN_RUN(td))
-			if (setrunnable(td)) {
-#ifdef INVARIANTS
-				/*
-				 * XXX: We just cleared TDI_SWAPPED
-				 * above and set TDF_INMEM, so this
-				 * should never happen.
-				 */
-				panic("not waking up swapper");
-#endif
-			}
-		thread_unlock(td);
-	}
-	p->p_flag &= ~(P_SWAPPINGIN|P_SWAPPINGOUT);
-	p->p_flag |= P_INMEM;
-}
-
-static int
-swapout(p)
-	struct proc *p;
-{
-	struct thread *td;
-
-	PROC_LOCK_ASSERT(p, MA_OWNED);
-#if defined(SWAP_DEBUG)
-	printf("swapping out %d\n", p->p_pid);
-#endif
-
-	/*
-	 * The states of this process and its threads may have changed
-	 * by now.  Assuming that there is only one pageout daemon thread,
-	 * this process should still be in memory.
-	 */
-	KASSERT((p->p_flag & (P_INMEM|P_SWAPPINGOUT|P_SWAPPINGIN)) == P_INMEM,
-		("swapout: lost a swapout race?"));
-
-	/*
-	 * remember the process resident count
-	 */
-	p->p_vmspace->vm_swrss = vmspace_resident_count(p->p_vmspace);
-	/*
-	 * Check and mark all threads before we proceed.
-	 */
-	p->p_flag &= ~P_INMEM;
-	p->p_flag |= P_SWAPPINGOUT;
-	FOREACH_THREAD_IN_PROC(p, td) {
-		thread_lock(td);
-		if (!thread_safetoswapout(td)) {
-			thread_unlock(td);
-			swapclear(p);
-			return (EBUSY);
-		}
-		td->td_flags &= ~TDF_INMEM;
-		TD_SET_SWAPPED(td);
-		thread_unlock(td);
-	}
-	td = FIRST_THREAD_IN_PROC(p);
-	++td->td_ru.ru_nswap;
-	PROC_UNLOCK(p);
-
-	/*
-	 * This list is stable because all threads are now prevented from
-	 * running.  The list is only modified in the context of a running
-	 * thread in this process.
-	 */
-	FOREACH_THREAD_IN_PROC(p, td)
-		vm_thread_swapout(td);
-
-	PROC_LOCK(p);
-	p->p_flag &= ~P_SWAPPINGOUT;
-	p->p_swtick = ticks;
-	return (0);
-}
-#endif /* !NO_SWAPPING */
diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c
index e1c3db0..3778f76 100644
--- a/sys/vm/vm_kern.c
+++ b/sys/vm/vm_kern.c
@@ -121,8 +121,7 @@ SYSCTL_ULONG(_vm, OID_AUTO, max_kernel_address, CTLFLAG_RD,
  *	a mapping on demand through vm_fault() will result in a panic. 
  */
 vm_offset_t
-kva_alloc(size)
-	vm_size_t size;
+kva_alloc(vm_size_t size)
 {
 	vm_offset_t addr;
 
@@ -143,9 +142,7 @@ kva_alloc(size)
  *	This routine may not block on kernel maps.
  */
 void
-kva_free(addr, size)
-	vm_offset_t addr;
-	vm_size_t size;
+kva_free(vm_offset_t addr, vm_size_t size)
 {
 
 	size = round_page(size);
@@ -430,9 +427,7 @@ kmem_free(struct vmem *vmem, vm_offset_t addr, vm_size_t size)
  *	This routine may block.
  */
 vm_offset_t
-kmap_alloc_wait(map, size)
-	vm_map_t map;
-	vm_size_t size;
+kmap_alloc_wait(vm_map_t map, vm_size_t size)
 {
 	vm_offset_t addr;
 
@@ -470,10 +465,7 @@ kmap_alloc_wait(map, size)
  *	waiting for memory in that map.
  */
 void
-kmap_free_wakeup(map, addr, size)
-	vm_map_t map;
-	vm_offset_t addr;
-	vm_size_t size;
+kmap_free_wakeup(vm_map_t map, vm_offset_t addr, vm_size_t size)
 {
 
 	vm_map_lock(map);
@@ -517,8 +509,7 @@ kmem_init_zero_region(void)
  *	`start' as allocated, and the range between `start' and `end' as free.
  */
 void
-kmem_init(start, end)
-	vm_offset_t start, end;
+kmem_init(vm_offset_t start, vm_offset_t end)
 {
 	vm_map_t m;
 
diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index ce1696f..4a5eadf 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -1556,6 +1556,18 @@ again:
 	return (result);
 }
 
+/*
+ *	vm_map_find_min() is a variant of vm_map_find() that takes an
+ *	additional parameter (min_addr) and treats the given address
+ *	(*addr) differently.  Specifically, it treats *addr as a hint
+ *	and not as the minimum address where the mapping is created.
+ *
+ *	This function works in two phases.  First, it tries to
+ *	allocate above the hint.  If that fails and the hint is
+ *	greater than min_addr, it performs a second pass, replacing
+ *	the hint with min_addr as the minimum address for the
+ *	allocation.
+ */
 int
 vm_map_find_min(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
     vm_offset_t *addr, vm_size_t length, vm_offset_t min_addr,
@@ -1962,7 +1974,7 @@ vm_map_pmap_enter(vm_map_t map, vm_offset_t addr, vm_prot_t prot,
 			    (pagesizes[p->psind] - 1)) == 0) {
 				mask = atop(pagesizes[p->psind]) - 1;
 				if (tmpidx + mask < psize &&
-				    vm_page_ps_is_valid(p)) {
+				    vm_page_ps_test(p, PS_ALL_VALID, NULL)) {
 					p += mask;
 					threshold += mask;
 				}
@@ -3610,12 +3622,13 @@ vm_map_stack_locked(vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
 	KASSERT(orient != (MAP_STACK_GROWS_DOWN | MAP_STACK_GROWS_UP),
 	    ("bi-dir stack"));
 
-	sgp = (vm_size_t)stack_guard_page * PAGE_SIZE;
 	if (addrbos < vm_map_min(map) ||
-	    addrbos > vm_map_max(map) ||
-	    addrbos + max_ssize < addrbos ||
-	    sgp >= max_ssize)
-		return (KERN_NO_SPACE);
+	    addrbos + max_ssize > vm_map_max(map) ||
+	    addrbos + max_ssize <= addrbos)
+		return (KERN_INVALID_ADDRESS);
+	sgp = (vm_size_t)stack_guard_page * PAGE_SIZE;
+	if (sgp >= max_ssize)
+		return (KERN_INVALID_ARGUMENT);
 
 	init_ssize = growsize;
 	if (max_ssize < init_ssize + sgp)
diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c
index 17631f9..258a96e 100644
--- a/sys/vm/vm_mmap.c
+++ b/sys/vm/vm_mmap.c
@@ -1196,7 +1196,7 @@ vm_mmap_vnode(struct thread *td, vm_size_t objsize,
 {
 	struct vattr va;
 	vm_object_t obj;
-	vm_offset_t foff;
+	vm_ooffset_t foff;
 	struct ucred *cred;
 	int error, flags, locktype;
 
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
index 5bf9e26..11e1f84 100644
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@@ -73,6 +73,7 @@ __FBSDID("$FreeBSD$");
 #include <sys/mman.h>
 #include <sys/mount.h>
 #include <sys/kernel.h>
+#include <sys/pctrie.h>
 #include <sys/sysctl.h>
 #include <sys/mutex.h>
 #include <sys/proc.h>		/* for curproc, pageproc */
@@ -208,6 +209,7 @@ vm_object_zinit(void *mem, int size, int flags)
 	object->paging_in_progress = 0;
 	object->resident_page_count = 0;
 	object->shadow_count = 0;
+	object->flags = OBJ_DEAD;
 
 	mtx_lock(&vm_object_list_mtx);
 	TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
@@ -223,6 +225,16 @@ _vm_object_allocate(objtype_t type, vm_pindex_t size, vm_object_t object)
 	LIST_INIT(&object->shadow_head);
 
 	object->type = type;
+	if (type == OBJT_SWAP)
+		pctrie_init(&object->un_pager.swp.swp_blks);
+
+	/*
+	 * Ensure that swap_pager_swapoff() iteration over object_list
+	 * sees up to date type and pctrie head if it observed
+	 * non-dead object.
+	 */
+	atomic_thread_fence_rel();
+
 	switch (type) {
 	case OBJT_DEAD:
 		panic("_vm_object_allocate: can't create OBJT_DEAD");
@@ -694,6 +706,89 @@ vm_object_destroy(vm_object_t object)
 }
 
 /*
+ *	vm_object_terminate_pages removes any remaining pageable pages
+ *	from the object and resets the object to an empty state.
+ */
+static void
+vm_object_terminate_pages(vm_object_t object)
+{
+	vm_page_t p, p_next;
+	struct mtx *mtx, *mtx1;
+	struct vm_pagequeue *pq, *pq1;
+
+	VM_OBJECT_ASSERT_WLOCKED(object);
+
+	mtx = NULL;
+	pq = NULL;
+
+	/*
+	 * Free any remaining pageable pages.  This also removes them from the
+	 * paging queues.  However, don't free wired pages, just remove them
+	 * from the object.  Rather than incrementally removing each page from
+	 * the object, the page and object are reset to any empty state. 
+	 */
+	TAILQ_FOREACH_SAFE(p, &object->memq, listq, p_next) {
+		vm_page_assert_unbusied(p);
+		if ((object->flags & OBJ_UNMANAGED) == 0) {
+			/*
+			 * vm_page_free_prep() only needs the page
+			 * lock for managed pages.
+			 */
+			mtx1 = vm_page_lockptr(p);
+			if (mtx1 != mtx) {
+				if (mtx != NULL)
+					mtx_unlock(mtx);
+				if (pq != NULL) {
+					vm_pagequeue_unlock(pq);
+					pq = NULL;
+				}
+				mtx = mtx1;
+				mtx_lock(mtx);
+			}
+		}
+		p->object = NULL;
+		if (p->wire_count != 0)
+			goto unlist;
+		PCPU_INC(cnt.v_pfree);
+		p->flags &= ~PG_ZERO;
+		if (p->queue != PQ_NONE) {
+			KASSERT(p->queue < PQ_COUNT, ("vm_object_terminate: "
+			    "page %p is not queued", p));
+			pq1 = vm_page_pagequeue(p);
+			if (pq != pq1) {
+				if (pq != NULL)
+					vm_pagequeue_unlock(pq);
+				pq = pq1;
+				vm_pagequeue_lock(pq);
+			}
+		}
+		if (vm_page_free_prep(p, true))
+			continue;
+unlist:
+		TAILQ_REMOVE(&object->memq, p, listq);
+	}
+	if (pq != NULL)
+		vm_pagequeue_unlock(pq);
+	if (mtx != NULL)
+		mtx_unlock(mtx);
+
+	vm_page_free_phys_pglist(&object->memq);
+
+	/*
+	 * If the object contained any pages, then reset it to an empty state.
+	 * None of the object's fields, including "resident_page_count", were
+	 * modified by the preceding loop.
+	 */
+	if (object->resident_page_count != 0) {
+		vm_radix_reclaim_allnodes(&object->rtree);
+		TAILQ_INIT(&object->memq);
+		object->resident_page_count = 0;
+		if (object->type == OBJT_VNODE)
+			vdrop(object->handle);
+	}
+}
+
+/*
  *	vm_object_terminate actually destroys the specified object, freeing
  *	up all previously used resources.
  *
@@ -703,7 +798,6 @@ vm_object_destroy(vm_object_t object)
 void
 vm_object_terminate(vm_object_t object)
 {
-	vm_page_t p, p_next;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 
@@ -746,41 +840,8 @@ vm_object_terminate(vm_object_t object)
 		("vm_object_terminate: object with references, ref_count=%d",
 		object->ref_count));
 
-	/*
-	 * Free any remaining pageable pages.  This also removes them from the
-	 * paging queues.  However, don't free wired pages, just remove them
-	 * from the object.  Rather than incrementally removing each page from
-	 * the object, the page and object are reset to any empty state. 
-	 */
-	TAILQ_FOREACH_SAFE(p, &object->memq, listq, p_next) {
-		vm_page_assert_unbusied(p);
-		vm_page_lock(p);
-		/*
-		 * Optimize the page's removal from the object by resetting
-		 * its "object" field.  Specifically, if the page is not
-		 * wired, then the effect of this assignment is that
-		 * vm_page_free()'s call to vm_page_remove() will return
-		 * immediately without modifying the page or the object.
-		 */ 
-		p->object = NULL;
-		if (p->wire_count == 0) {
-			vm_page_free(p);
-			PCPU_INC(cnt.v_pfree);
-		}
-		vm_page_unlock(p);
-	}
-	/*
-	 * If the object contained any pages, then reset it to an empty state.
-	 * None of the object's fields, including "resident_page_count", were
-	 * modified by the preceding loop.
-	 */
-	if (object->resident_page_count != 0) {
-		vm_radix_reclaim_allnodes(&object->rtree);
-		TAILQ_INIT(&object->memq);
-		object->resident_page_count = 0;
-		if (object->type == OBJT_VNODE)
-			vdrop(object->handle);
-	}
+	if ((object->flags & OBJ_PG_DTOR) == 0)
+		vm_object_terminate_pages(object);
 
 #if VM_NRESERVLEVEL > 0
 	if (__predict_false(!LIST_EMPTY(&object->rvq)))
@@ -1022,8 +1083,8 @@ vm_object_sync(vm_object_t object, vm_ooffset_t offset, vm_size_t size,
 	 * I/O.
 	 */
 	if (object->type == OBJT_VNODE &&
-	    (object->flags & OBJ_MIGHTBEDIRTY) != 0) {
-		vp = object->handle;
+	    (object->flags & OBJ_MIGHTBEDIRTY) != 0 &&
+	    ((vp = object->handle)->v_vflag & VV_NOSYNC) == 0) {
 		VM_OBJECT_WUNLOCK(object);
 		(void) vn_start_write(vp, &mp, V_WAIT);
 		vn_lock(vp, LK_EXCLUSIVE | LK_RETRY);
@@ -1898,6 +1959,8 @@ vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end,
     int options)
 {
 	vm_page_t p, next;
+	struct mtx *mtx;
+	struct pglist pgl;
 
 	VM_OBJECT_ASSERT_WLOCKED(object);
 	KASSERT((object->flags & OBJ_UNMANAGED) == 0 ||
@@ -1906,8 +1969,10 @@ vm_object_page_remove(vm_object_t object, vm_pindex_t start, vm_pindex_t end,
 	if (object->resident_page_count == 0)
 		return;
 	vm_object_pip_add(object, 1);
+	TAILQ_INIT(&pgl);
 again:
 	p = vm_page_find_least(object, start);
+	mtx = NULL;
 
 	/*
 	 * Here, the variable "p" is either (1) the page with the least pindex
@@ -1924,7 +1989,7 @@ again:
 		 * however, be invalidated if the option OBJPR_CLEANONLY is
 		 * not specified.
 		 */
-		vm_page_lock(p);
+		vm_page_change_lock(p, &mtx);
 		if (vm_page_xbusied(p)) {
 			VM_OBJECT_WUNLOCK(object);
 			vm_page_busy_sleep(p, "vmopax", true);
@@ -1932,13 +1997,14 @@ again:
 			goto again;
 		}
 		if (p->wire_count != 0) {
-			if ((options & OBJPR_NOTMAPPED) == 0)
+			if ((options & OBJPR_NOTMAPPED) == 0 &&
+			    object->ref_count != 0)
 				pmap_remove_all(p);
 			if ((options & OBJPR_CLEANONLY) == 0) {
 				p->valid = 0;
 				vm_page_undirty(p);
 			}
-			goto next;
+			continue;
 		}
 		if (vm_page_busied(p)) {
 			VM_OBJECT_WUNLOCK(object);
@@ -1949,17 +2015,21 @@ again:
 		KASSERT((p->flags & PG_FICTITIOUS) == 0,
 		    ("vm_object_page_remove: page %p is fictitious", p));
 		if ((options & OBJPR_CLEANONLY) != 0 && p->valid != 0) {
-			if ((options & OBJPR_NOTMAPPED) == 0)
+			if ((options & OBJPR_NOTMAPPED) == 0 &&
+			    object->ref_count != 0)
 				pmap_remove_write(p);
-			if (p->dirty)
-				goto next;
+			if (p->dirty != 0)
+				continue;
 		}
-		if ((options & OBJPR_NOTMAPPED) == 0)
+		if ((options & OBJPR_NOTMAPPED) == 0 && object->ref_count != 0)
 			pmap_remove_all(p);
-		vm_page_free(p);
-next:
-		vm_page_unlock(p);
+		p->flags &= ~PG_ZERO;
+		if (vm_page_free_prep(p, false))
+			TAILQ_INSERT_TAIL(&pgl, p, listq);
 	}
+	if (mtx != NULL)
+		mtx_unlock(mtx);
+	vm_page_free_phys_pglist(&pgl);
 	vm_object_pip_wakeup(object);
 }
 
@@ -1982,7 +2052,7 @@ next:
 void
 vm_object_page_noreuse(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
 {
-	struct mtx *mtx, *new_mtx;
+	struct mtx *mtx;
 	vm_page_t p, next;
 
 	VM_OBJECT_ASSERT_LOCKED(object);
@@ -1999,17 +2069,7 @@ vm_object_page_noreuse(vm_object_t object, vm_pindex_t start, vm_pindex_t end)
 	mtx = NULL;
 	for (; p != NULL && (p->pindex < end || end == 0); p = next) {
 		next = TAILQ_NEXT(p, listq);
-
-		/*
-		 * Avoid releasing and reacquiring the same page lock.
-		 */
-		new_mtx = vm_page_lockptr(p);
-		if (mtx != new_mtx) {
-			if (mtx != NULL)
-				mtx_unlock(mtx);
-			mtx = new_mtx;
-			mtx_lock(mtx);
-		}
+		vm_page_change_lock(p, &mtx);
 		vm_page_deactivate_noreuse(p);
 	}
 	if (mtx != NULL)
diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h
index 9b2192e..17a885e 100644
--- a/sys/vm/vm_object.h
+++ b/sys/vm/vm_object.h
@@ -70,6 +70,7 @@
 #include <sys/queue.h>
 #include <sys/_lock.h>
 #include <sys/_mutex.h>
+#include <sys/_pctrie.h>
 #include <sys/_rwlock.h>
 
 #include <vm/_vm_radix.h>
@@ -86,12 +87,17 @@
  *
  */
 
+#ifndef VM_PAGE_HAVE_PGLIST
+TAILQ_HEAD(pglist, vm_page);
+#define VM_PAGE_HAVE_PGLIST
+#endif
+
 struct vm_object {
 	struct rwlock lock;
 	TAILQ_ENTRY(vm_object) object_list; /* list of all objects */
 	LIST_HEAD(, vm_object) shadow_head; /* objects that this is a shadow for */
 	LIST_ENTRY(vm_object) shadow_list; /* chain of shadow objects */
-	TAILQ_HEAD(respgs, vm_page) memq; /* list of resident pages */
+	struct pglist memq;		/* list of resident pages */
 	struct vm_radix rtree;		/* root of the resident page radix trie*/
 	vm_pindex_t size;		/* Object size */
 	int generation;			/* generation ID */
@@ -151,13 +157,12 @@ struct vm_object {
 		 *		     the handle changed and hash-chain
 		 *		     invalid.
 		 *
-		 *	swp_bcount - number of swap 'swblock' metablocks, each
-		 *		     contains up to 16 swapblk assignments.
-		 *		     see vm/swap_pager.h
+		 *	swp_blks -   pc-trie of the allocated swap blocks.
+		 *
 		 */
 		struct {
 			void *swp_tmpfs;
-			int swp_bcount;
+			struct pctrie swp_blks;
 		} swp;
 	} un_pager;
 	struct ucred *cred;
@@ -171,11 +176,12 @@ struct vm_object {
 #define	OBJ_FICTITIOUS	0x0001		/* (c) contains fictitious pages */
 #define	OBJ_UNMANAGED	0x0002		/* (c) contains unmanaged pages */
 #define	OBJ_POPULATE	0x0004		/* pager implements populate() */
-#define OBJ_DEAD	0x0008		/* dead objects (during rundown) */
+#define	OBJ_DEAD	0x0008		/* dead objects (during rundown) */
 #define	OBJ_NOSPLIT	0x0010		/* dont split this object */
 #define	OBJ_UMTXDEAD	0x0020		/* umtx pshared was terminated */
-#define OBJ_PIPWNT	0x0040		/* paging in progress wanted */
-#define OBJ_MIGHTBEDIRTY 0x0100		/* object might be dirty, only for vnode */
+#define	OBJ_PIPWNT	0x0040		/* paging in progress wanted */
+#define	OBJ_PG_DTOR	0x0080		/* dont reset object, leave that for dtor */
+#define	OBJ_MIGHTBEDIRTY 0x0100		/* object might be dirty, only for vnode */
 #define	OBJ_TMPFS_NODE	0x0200		/* object belongs to tmpfs VREG node */
 #define	OBJ_TMPFS_DIRTY	0x0400		/* dirty tmpfs obj */
 #define	OBJ_COLORED	0x1000		/* pg_color is defined */
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index d8a9c21..16dc868 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -158,6 +158,7 @@ static uma_zone_t fakepg_zone;
 static void vm_page_alloc_check(vm_page_t m);
 static void vm_page_clear_dirty_mask(vm_page_t m, vm_page_bits_t pagebits);
 static void vm_page_enqueue(uint8_t queue, vm_page_t m);
+static void vm_page_free_phys(vm_page_t m);
 static void vm_page_free_wakeup(void);
 static void vm_page_init_fakepg(void *dummy);
 static int vm_page_insert_after(vm_page_t m, vm_object_t object,
@@ -407,6 +408,29 @@ vm_page_domain_init(struct vm_domain *vmd)
 }
 
 /*
+ * Initialize a physical page in preparation for adding it to the free
+ * lists.
+ */
+static void
+vm_page_init_page(vm_page_t m, vm_paddr_t pa, int segind)
+{
+
+	m->object = NULL;
+	m->wire_count = 0;
+	m->busy_lock = VPB_UNBUSIED;
+	m->hold_count = 0;
+	m->flags = 0;
+	m->phys_addr = pa;
+	m->queue = PQ_NONE;
+	m->psind = 0;
+	m->segind = segind;
+	m->order = VM_NFREEORDER;
+	m->pool = VM_FREEPOOL_DEFAULT;
+	m->valid = m->dirty = 0;
+	pmap_page_init(m);
+}
+
+/*
  *	vm_page_startup:
  *
  *	Initializes the resident memory module.  Allocates physical memory for
@@ -417,17 +441,15 @@ vm_page_domain_init(struct vm_domain *vmd)
 vm_offset_t
 vm_page_startup(vm_offset_t vaddr)
 {
-	vm_offset_t mapped;
-	vm_paddr_t high_avail, low_avail, page_range, size;
-	vm_paddr_t new_end;
-	int i;
-	vm_paddr_t pa;
-	vm_paddr_t last_pa;
+	struct vm_domain *vmd;
+	struct vm_phys_seg *seg;
+	vm_page_t m;
 	char *list, *listend;
-	vm_paddr_t end;
-	vm_paddr_t biggestsize;
-	int biggestone;
-	int pages_per_zone;
+	vm_offset_t mapped;
+	vm_paddr_t end, high_avail, low_avail, new_end, page_range, size;
+	vm_paddr_t biggestsize, last_pa, pa;
+	u_long pagecount;
+	int biggestone, i, pages_per_zone, segind;
 
 	biggestsize = 0;
 	biggestone = 0;
@@ -463,7 +485,8 @@ vm_page_startup(vm_offset_t vaddr)
 	 * in proportion to the zone structure size.
 	 */
 	pages_per_zone = howmany(sizeof(struct uma_zone) +
-	    sizeof(struct uma_cache) * (mp_maxid + 1), UMA_SLAB_SIZE);
+	    sizeof(struct uma_cache) * (mp_maxid + 1) +
+	    roundup2(sizeof(struct uma_slab), sizeof(void *)), UMA_SLAB_SIZE);
 	if (pages_per_zone > 1) {
 		/* Reserve more pages so that we don't run out. */
 		boot_pages = UMA_BOOT_PAGES_ZONES * pages_per_zone;
@@ -507,6 +530,8 @@ vm_page_startup(vm_offset_t vaddr)
 	vm_page_dump = (void *)(uintptr_t)pmap_map(&vaddr, new_end,
 	    new_end + vm_page_dump_size, VM_PROT_READ | VM_PROT_WRITE);
 	bzero((void *)vm_page_dump, vm_page_dump_size);
+#else
+	(void)last_pa;
 #endif
 #if defined(__aarch64__) || defined(__amd64__) || defined(__mips__)
 	/*
@@ -611,7 +636,9 @@ vm_page_startup(vm_offset_t vaddr)
 	new_end = trunc_page(end - page_range * sizeof(struct vm_page));
 	mapped = pmap_map(&vaddr, new_end, end,
 	    VM_PROT_READ | VM_PROT_WRITE);
-	vm_page_array = (vm_page_t) mapped;
+	vm_page_array = (vm_page_t)mapped;
+	vm_page_array_size = page_range;
+
 #if VM_NRESERVLEVEL > 0
 	/*
 	 * Allocate physical memory for the reservation management system's
@@ -638,33 +665,53 @@ vm_page_startup(vm_offset_t vaddr)
 		vm_phys_add_seg(phys_avail[i], phys_avail[i + 1]);
 
 	/*
-	 * Clear all of the page structures
-	 */
-	bzero((caddr_t) vm_page_array, page_range * sizeof(struct vm_page));
-	for (i = 0; i < page_range; i++)
-		vm_page_array[i].order = VM_NFREEORDER;
-	vm_page_array_size = page_range;
-
-	/*
 	 * Initialize the physical memory allocator.
 	 */
 	vm_phys_init();
 
 	/*
-	 * Add every available physical page that is not blacklisted to
-	 * the free lists.
+	 * Initialize the page structures and add every available page to the
+	 * physical memory allocator's free lists.
 	 */
 	vm_cnt.v_page_count = 0;
 	vm_cnt.v_free_count = 0;
-	for (i = 0; phys_avail[i + 1] != 0; i += 2) {
-		pa = phys_avail[i];
-		last_pa = phys_avail[i + 1];
-		while (pa < last_pa) {
-			vm_phys_add_page(pa);
-			pa += PAGE_SIZE;
+	for (segind = 0; segind < vm_phys_nsegs; segind++) {
+		seg = &vm_phys_segs[segind];
+		for (m = seg->first_page, pa = seg->start; pa < seg->end;
+		    m++, pa += PAGE_SIZE)
+			vm_page_init_page(m, pa, segind);
+
+		/*
+		 * Add the segment to the free lists only if it is covered by
+		 * one of the ranges in phys_avail.  Because we've added the
+		 * ranges to the vm_phys_segs array, we can assume that each
+		 * segment is either entirely contained in one of the ranges,
+		 * or doesn't overlap any of them.
+		 */
+		for (i = 0; phys_avail[i + 1] != 0; i += 2) {
+			if (seg->start < phys_avail[i] ||
+			    seg->end > phys_avail[i + 1])
+				continue;
+
+			m = seg->first_page;
+			pagecount = (u_long)atop(seg->end - seg->start);
+
+			mtx_lock(&vm_page_queue_free_mtx);
+			vm_phys_free_contig(m, pagecount);
+			vm_phys_freecnt_adj(m, (int)pagecount);
+			mtx_unlock(&vm_page_queue_free_mtx);
+			vm_cnt.v_page_count += (u_int)pagecount;
+
+			vmd = &vm_dom[seg->domain];
+			vmd->vmd_page_count += (u_int)pagecount;
+			vmd->vmd_segs |= 1UL << m->segind;
+			break;
 		}
 	}
 
+	/*
+	 * Remove blacklisted pages from the physical memory allocator.
+	 */
 	TAILQ_INIT(&blacklist_head);
 	vm_page_blacklist_load(&list, &listend);
 	vm_page_blacklist_check(list, listend);
@@ -905,6 +952,23 @@ vm_page_flash(vm_page_t m)
 }
 
 /*
+ * Avoid releasing and reacquiring the same page lock.
+ */
+void
+vm_page_change_lock(vm_page_t m, struct mtx **mtx)
+{
+	struct mtx *mtx1;
+
+	mtx1 = vm_page_lockptr(m);
+	if (*mtx == mtx1)
+		return;
+	if (*mtx != NULL)
+		mtx_unlock(*mtx);
+	*mtx = mtx1;
+	mtx_lock(mtx1);
+}
+
+/*
  * Keep page from being freed by the page daemon
  * much of the same effect as wiring, except much lower
  * overhead and should be used only for *very* temporary
@@ -937,20 +1001,11 @@ vm_page_unhold(vm_page_t mem)
 void
 vm_page_unhold_pages(vm_page_t *ma, int count)
 {
-	struct mtx *mtx, *new_mtx;
+	struct mtx *mtx;
 
 	mtx = NULL;
 	for (; count != 0; count--) {
-		/*
-		 * Avoid releasing and reacquiring the same page lock.
-		 */
-		new_mtx = vm_page_lockptr(*ma);
-		if (mtx != new_mtx) {
-			if (mtx != NULL)
-				mtx_unlock(mtx);
-			mtx = new_mtx;
-			mtx_lock(mtx);
-		}
+		vm_page_change_lock(*ma, &mtx);
 		vm_page_unhold(*ma);
 		ma++;
 	}
@@ -1989,7 +2044,7 @@ vm_page_t
 vm_page_scan_contig(u_long npages, vm_page_t m_start, vm_page_t m_end,
     u_long alignment, vm_paddr_t boundary, int options)
 {
-	struct mtx *m_mtx, *new_mtx;
+	struct mtx *m_mtx;
 	vm_object_t object;
 	vm_paddr_t pa;
 	vm_page_t m, m_run;
@@ -2005,8 +2060,10 @@ vm_page_scan_contig(u_long npages, vm_page_t m_start, vm_page_t m_end,
 	run_len = 0;
 	m_mtx = NULL;
 	for (m = m_start; m < m_end && run_len < npages; m += m_inc) {
-		KASSERT((m->flags & (PG_FICTITIOUS | PG_MARKER)) == 0,
-		    ("page %p is PG_FICTITIOUS or PG_MARKER", m));
+		KASSERT((m->flags & PG_MARKER) == 0,
+		    ("page %p is PG_MARKER", m));
+		KASSERT((m->flags & PG_FICTITIOUS) == 0 || m->wire_count == 1,
+		    ("fictitious page %p has invalid wire count", m));
 
 		/*
 		 * If the current page would be the start of a run, check its
@@ -2032,16 +2089,7 @@ vm_page_scan_contig(u_long npages, vm_page_t m_start, vm_page_t m_end,
 		} else
 			KASSERT(m_run != NULL, ("m_run == NULL"));
 
-		/*
-		 * Avoid releasing and reacquiring the same page lock.
-		 */
-		new_mtx = vm_page_lockptr(m);
-		if (m_mtx != new_mtx) {
-			if (m_mtx != NULL)
-				mtx_unlock(m_mtx);
-			m_mtx = new_mtx;
-			mtx_lock(m_mtx);
-		}
+		vm_page_change_lock(m, &m_mtx);
 		m_inc = 1;
 retry:
 		if (m->wire_count != 0 || m->hold_count != 0)
@@ -2191,7 +2239,7 @@ static int
 vm_page_reclaim_run(int req_class, u_long npages, vm_page_t m_run,
     vm_paddr_t high)
 {
-	struct mtx *m_mtx, *new_mtx;
+	struct mtx *m_mtx;
 	struct spglist free;
 	vm_object_t object;
 	vm_paddr_t pa;
@@ -2212,13 +2260,7 @@ vm_page_reclaim_run(int req_class, u_long npages, vm_page_t m_run,
 		/*
 		 * Avoid releasing and reacquiring the same page lock.
 		 */
-		new_mtx = vm_page_lockptr(m);
-		if (m_mtx != new_mtx) {
-			if (m_mtx != NULL)
-				mtx_unlock(m_mtx);
-			m_mtx = new_mtx;
-			mtx_lock(m_mtx);
-		}
+		vm_page_change_lock(m, &m_mtx);
 retry:
 		if (m->wire_count != 0 || m->hold_count != 0)
 			error = EBUSY;
@@ -2331,12 +2373,7 @@ retry:
 					 * The new page must be deactivated
 					 * before the object is unlocked.
 					 */
-					new_mtx = vm_page_lockptr(m_new);
-					if (m_mtx != new_mtx) {
-						mtx_unlock(m_mtx);
-						m_mtx = new_mtx;
-						mtx_lock(m_mtx);
-					}
+					vm_page_change_lock(m_new, &m_mtx);
 					vm_page_deactivate(m_new);
 				} else {
 					m->flags &= ~PG_ZERO;
@@ -2379,13 +2416,7 @@ unlock:
 		mtx_lock(&vm_page_queue_free_mtx);
 		do {
 			SLIST_REMOVE_HEAD(&free, plinks.s.ss);
-			vm_phys_freecnt_adj(m, 1);
-#if VM_NRESERVLEVEL > 0
-			if (!vm_reserv_free_page(m))
-#else
-			if (true)
-#endif
-				vm_phys_free_pages(m, 0);
+			vm_page_free_phys(m);
 		} while ((m = SLIST_FIRST(&free)) != NULL);
 		vm_page_zero_idle_wakeup();
 		vm_page_free_wakeup();
@@ -2722,7 +2753,7 @@ vm_page_activate(vm_page_t m)
  *
  *	The page queues must be locked.
  */
-static inline void
+static void
 vm_page_free_wakeup(void)
 {
 
@@ -2748,17 +2779,30 @@ vm_page_free_wakeup(void)
 }
 
 /*
- *	vm_page_free_toq:
+ *	vm_page_free_prep:
  *
- *	Returns the given page to the free list,
- *	disassociating it with any VM object.
+ *	Prepares the given page to be put on the free list,
+ *	disassociating it from any VM object. The caller may return
+ *	the page to the free list only if this function returns true.
  *
- *	The object must be locked.  The page must be locked if it is managed.
+ *	The object must be locked.  The page must be locked if it is
+ *	managed.  For a queued managed page, the pagequeue_locked
+ *	argument specifies whether the page queue is already locked.
  */
-void
-vm_page_free_toq(vm_page_t m)
+bool
+vm_page_free_prep(vm_page_t m, bool pagequeue_locked)
 {
 
+#if defined(DIAGNOSTIC) && defined(PHYS_TO_DMAP)
+	if ((m->flags & PG_ZERO) != 0) {
+		uint64_t *p;
+		int i;
+		p = (uint64_t *)PHYS_TO_DMAP(VM_PAGE_TO_PHYS(m));
+		for (i = 0; i < PAGE_SIZE / sizeof(uint64_t); i++, p++)
+			KASSERT(*p == 0, ("vm_page_free_prep %p PG_ZERO %d %jx",
+			    m, i, (uintmax_t)*p));
+	}
+#endif
 	if ((m->oflags & VPO_UNMANAGED) == 0) {
 		vm_page_lock_assert(m, MA_OWNED);
 		KASSERT(!pmap_page_is_mapped(m),
@@ -2777,16 +2821,20 @@ vm_page_free_toq(vm_page_t m)
 	 * callback routine until after we've put the page on the
 	 * appropriate free queue.
 	 */
-	vm_page_remque(m);
+	if (m->queue != PQ_NONE) {
+		if (pagequeue_locked)
+			vm_page_dequeue_locked(m);
+		else
+			vm_page_dequeue(m);
+	}
 	vm_page_remove(m);
 
 	/*
 	 * If fictitious remove object association and
 	 * return, otherwise delay object association removal.
 	 */
-	if ((m->flags & PG_FICTITIOUS) != 0) {
-		return;
-	}
+	if ((m->flags & PG_FICTITIOUS) != 0)
+		return (false);
 
 	m->valid = 0;
 	vm_page_undirty(m);
@@ -2798,32 +2846,72 @@ vm_page_free_toq(vm_page_t m)
 		KASSERT((m->flags & PG_UNHOLDFREE) == 0,
 		    ("vm_page_free: freeing PG_UNHOLDFREE page %p", m));
 		m->flags |= PG_UNHOLDFREE;
-	} else {
-		/*
-		 * Restore the default memory attribute to the page.
-		 */
-		if (pmap_page_get_memattr(m) != VM_MEMATTR_DEFAULT)
-			pmap_page_set_memattr(m, VM_MEMATTR_DEFAULT);
+		return (false);
+	}
 
-		/*
-		 * Insert the page into the physical memory allocator's free
-		 * page queues.
-		 */
-		mtx_lock(&vm_page_queue_free_mtx);
-		vm_phys_freecnt_adj(m, 1);
+	/*
+	 * Restore the default memory attribute to the page.
+	 */
+	if (pmap_page_get_memattr(m) != VM_MEMATTR_DEFAULT)
+		pmap_page_set_memattr(m, VM_MEMATTR_DEFAULT);
+
+	return (true);
+}
+
+/*
+ * Insert the page into the physical memory allocator's free page
+ * queues.  This is the last step to free a page.
+ */
+static void
+vm_page_free_phys(vm_page_t m)
+{
+
+	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
+
+	vm_phys_freecnt_adj(m, 1);
 #if VM_NRESERVLEVEL > 0
-		if (!vm_reserv_free_page(m))
-#else
-		if (TRUE)
+	if (!vm_reserv_free_page(m))
 #endif
 			vm_phys_free_pages(m, 0);
-		if ((m->flags & PG_ZERO) != 0)
-			++vm_page_zero_count;
-		else
-			vm_page_zero_idle_wakeup();
-		vm_page_free_wakeup();
-		mtx_unlock(&vm_page_queue_free_mtx);
-	}
+	if ((m->flags & PG_ZERO) != 0)
+		++vm_page_zero_count;
+	else
+		vm_page_zero_idle_wakeup();
+}
+
+void
+vm_page_free_phys_pglist(struct pglist *tq)
+{
+	vm_page_t m;
+
+	if (TAILQ_EMPTY(tq))
+		return;
+	mtx_lock(&vm_page_queue_free_mtx);
+	TAILQ_FOREACH(m, tq, listq)
+		vm_page_free_phys(m);
+	vm_page_free_wakeup();
+	mtx_unlock(&vm_page_queue_free_mtx);
+}
+
+/*
+ *	vm_page_free_toq:
+ *
+ *	Returns the given page to the free list, disassociating it
+ *	from any VM object.
+ *
+ *	The object must be locked.  The page must be locked if it is
+ *	managed.
+ */
+void
+vm_page_free_toq(vm_page_t m)
+{
+
+	if (!vm_page_free_prep(m, false))
+		return;
+	mtx_lock(&vm_page_queue_free_mtx);
+	vm_page_free_phys(m);
+	vm_page_free_wakeup();
+	mtx_unlock(&vm_page_queue_free_mtx);
 }
 
 /*
@@ -3006,23 +3094,25 @@ vm_page_launder(vm_page_t m)
  * vm_page_try_to_free()
  *
  *	Attempt to free the page.  If we cannot free it, we do nothing.
- *	1 is returned on success, 0 on failure.
+ *	true is returned on success, false on failure.
  */
-int
+bool
 vm_page_try_to_free(vm_page_t m)
 {
 
-	vm_page_lock_assert(m, MA_OWNED);
+	vm_page_assert_locked(m);
 	if (m->object != NULL)
 		VM_OBJECT_ASSERT_WLOCKED(m->object);
-	if (m->dirty || m->hold_count || m->wire_count ||
+	if (m->dirty != 0 || m->hold_count != 0 || m->wire_count != 0 ||
 	    (m->oflags & VPO_UNMANAGED) != 0 || vm_page_busied(m))
-		return (0);
-	pmap_remove_all(m);
-	if (m->dirty)
-		return (0);
+		return (false);
+	if (m->object != NULL && m->object->ref_count != 0) {
+		pmap_remove_all(m);
+		if (m->dirty != 0)
+			return (false);
+	}
 	vm_page_free(m);
-	return (1);
+	return (true);
 }
 
 /*
@@ -3142,6 +3232,107 @@ retrylookup:
 }
 
 /*
+ * Return the specified range of pages from the given object.  For each
+ * page offset within the range, if a page already exists within the object
+ * at that offset and it is busy, then wait for it to change state.  If,
+ * instead, the page doesn't exist, then allocate it.
+ *
+ * The caller must always specify an allocation class.
+ *
+ * allocation classes:
+ *	VM_ALLOC_NORMAL		normal process request
+ *	VM_ALLOC_SYSTEM		system *really* needs the pages
+ *
+ * The caller must always specify that the pages are to be busied and/or
+ * wired.
+ *
+ * optional allocation flags:
+ *	VM_ALLOC_IGN_SBUSY	do not sleep on soft busy pages
+ *	VM_ALLOC_NOBUSY		do not exclusive busy the page
+ *	VM_ALLOC_NOWAIT		do not sleep
+ *	VM_ALLOC_SBUSY		set page to sbusy state
+ *	VM_ALLOC_WIRED		wire the pages
+ *	VM_ALLOC_ZERO		zero and validate any invalid pages
+ *
+ * If VM_ALLOC_NOWAIT is not specified, this routine may sleep.  Otherwise, it
+ * may return a partial prefix of the requested range.
+ */
+int
+vm_page_grab_pages(vm_object_t object, vm_pindex_t pindex, int allocflags,
+    vm_page_t *ma, int count)
+{
+	vm_page_t m;
+	int i;
+	bool sleep;
+
+	VM_OBJECT_ASSERT_WLOCKED(object);
+	KASSERT(((u_int)allocflags >> VM_ALLOC_COUNT_SHIFT) == 0,
+	    ("vm_page_grap_pages: VM_ALLOC_COUNT() is not allowed"));
+	KASSERT((allocflags & VM_ALLOC_NOBUSY) == 0 ||
+	    (allocflags & VM_ALLOC_WIRED) != 0,
+	    ("vm_page_grab_pages: the pages must be busied or wired"));
+	KASSERT((allocflags & VM_ALLOC_SBUSY) == 0 ||
+	    (allocflags & VM_ALLOC_IGN_SBUSY) != 0,
+	    ("vm_page_grab_pages: VM_ALLOC_SBUSY/IGN_SBUSY mismatch"));
+	if (count == 0)
+		return (0);
+	i = 0;
+retrylookup:
+	m = vm_page_lookup(object, pindex + i);
+	for (; i < count; i++) {
+		if (m != NULL) {
+			sleep = (allocflags & VM_ALLOC_IGN_SBUSY) != 0 ?
+			    vm_page_xbusied(m) : vm_page_busied(m);
+			if (sleep) {
+				if ((allocflags & VM_ALLOC_NOWAIT) != 0)
+					break;
+				/*
+				 * Reference the page before unlocking and
+				 * sleeping so that the page daemon is less
+				 * likely to reclaim it.
+				 */
+				vm_page_aflag_set(m, PGA_REFERENCED);
+				vm_page_lock(m);
+				VM_OBJECT_WUNLOCK(object);
+				vm_page_busy_sleep(m, "grbmaw", (allocflags &
+				    VM_ALLOC_IGN_SBUSY) != 0);
+				VM_OBJECT_WLOCK(object);
+				goto retrylookup;
+			}
+			if ((allocflags & VM_ALLOC_WIRED) != 0) {
+				vm_page_lock(m);
+				vm_page_wire(m);
+				vm_page_unlock(m);
+			}
+			if ((allocflags & (VM_ALLOC_NOBUSY |
+			    VM_ALLOC_SBUSY)) == 0)
+				vm_page_xbusy(m);
+			if ((allocflags & VM_ALLOC_SBUSY) != 0)
+				vm_page_sbusy(m);
+		} else {
+			m = vm_page_alloc(object, pindex + i, (allocflags &
+			    ~VM_ALLOC_IGN_SBUSY) | VM_ALLOC_COUNT(count - i));
+			if (m == NULL) {
+				if ((allocflags & VM_ALLOC_NOWAIT) != 0)
+					break;
+				VM_OBJECT_WUNLOCK(object);
+				VM_WAIT;
+				VM_OBJECT_WLOCK(object);
+				goto retrylookup;
+			}
+		}
+		if (m->valid == 0 && (allocflags & VM_ALLOC_ZERO) != 0) {
+			if ((m->flags & PG_ZERO) == 0)
+				pmap_zero_page(m);
+			m->valid = VM_PAGE_BITS_ALL;
+		}
+		ma[i] = m;
+		m = vm_page_next(m);
+	}
+	return (i);
+}
+
+/*
  * Mapping function for valid or dirty bits in a page.
  *
  * Inputs are required to range within a page.
@@ -3459,16 +3650,17 @@ vm_page_is_valid(vm_page_t m, int base, int size)
 }
 
 /*
- *	vm_page_ps_is_valid:
- *
- *	Returns TRUE if the entire (super)page is valid and FALSE otherwise.
+ * Returns true if all of the specified predicates are true for the entire
+ * (super)page and false otherwise.
  */
-boolean_t
-vm_page_ps_is_valid(vm_page_t m)
+bool
+vm_page_ps_test(vm_page_t m, int flags, vm_page_t skip_m)
 {
+	vm_object_t object;
 	int i, npages;
 
-	VM_OBJECT_ASSERT_LOCKED(m->object);
+	object = m->object;
+	VM_OBJECT_ASSERT_LOCKED(object);
 	npages = atop(pagesizes[m->psind]);
 
 	/*
@@ -3477,10 +3669,28 @@ vm_page_ps_is_valid(vm_page_t m)
 	 * occupy adjacent entries in vm_page_array[].
 	 */
 	for (i = 0; i < npages; i++) {
-		if (m[i].valid != VM_PAGE_BITS_ALL)
-			return (FALSE);
+		/* Always test object consistency, including "skip_m". */
+		if (m[i].object != object)
+			return (false);
+		if (&m[i] == skip_m)
+			continue;
+		if ((flags & PS_NONE_BUSY) != 0 && vm_page_busied(&m[i]))
+			return (false);
+		if ((flags & PS_ALL_DIRTY) != 0) {
+			/*
+			 * Calling vm_page_test_dirty() or pmap_is_modified()
+			 * might stop this case from spuriously returning
+			 * "false".  However, that would require a write lock
+			 * on the object containing "m[i]".
+			 */
+			if (m[i].dirty != VM_PAGE_BITS_ALL)
+				return (false);
+		}
+		if ((flags & PS_ALL_VALID) != 0 &&
+		    m[i].valid != VM_PAGE_BITS_ALL)
+			return (false);
 	}
-	return (TRUE);
+	return (true);
 }
 
 /*
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
index 1ee8dde..d27fe10 100644
--- a/sys/vm/vm_page.h
+++ b/sys/vm/vm_page.h
@@ -209,7 +209,10 @@ struct vm_page {
 #define	PQ_LAUNDRY	2
 #define	PQ_COUNT	3
 
+#ifndef VM_PAGE_HAVE_PGLIST
 TAILQ_HEAD(pglist, vm_page);
+#define VM_PAGE_HAVE_PGLIST
+#endif
 SLIST_HEAD(spglist, vm_page);
 
 struct vm_pagequeue {
@@ -387,6 +390,9 @@ vm_page_t PHYS_TO_VM_PAGE(vm_paddr_t pa);
  * vm_page_alloc_freelist().  Some functions support only a subset
  * of the flags, and ignore others, see the flags legend.
  *
+ * The meaning of VM_ALLOC_ZERO differs slightly between the vm_page_alloc*()
+ * and the vm_page_grab*() functions.  See these functions for details.
+ *
  * Bits 0 - 1 define class.
  * Bits 2 - 15 dedicated for flags.
  * Legend:
@@ -394,6 +400,7 @@ vm_page_t PHYS_TO_VM_PAGE(vm_paddr_t pa);
  * (c) - vm_page_alloc_contig() supports the flag.
  * (f) - vm_page_alloc_freelist() supports the flag.
  * (g) - vm_page_grab() supports the flag.
+ * (p) - vm_page_grab_pages() supports the flag.
  * Bits above 15 define the count of additional pages that the caller
  * intends to allocate.
  */
@@ -401,16 +408,16 @@ vm_page_t PHYS_TO_VM_PAGE(vm_paddr_t pa);
 #define VM_ALLOC_INTERRUPT	1
 #define VM_ALLOC_SYSTEM		2
 #define	VM_ALLOC_CLASS_MASK	3
-#define	VM_ALLOC_WIRED		0x0020	/* (acfg) Allocate non pageable page */
-#define	VM_ALLOC_ZERO		0x0040	/* (acfg) Try to obtain a zeroed page */
+#define	VM_ALLOC_WIRED		0x0020	/* (acfgp) Allocate a wired page */
+#define	VM_ALLOC_ZERO		0x0040	/* (acfgp) Allocate a prezeroed page */
 #define	VM_ALLOC_NOOBJ		0x0100	/* (acg) No associated object */
-#define	VM_ALLOC_NOBUSY		0x0200	/* (acg) Do not busy the page */
+#define	VM_ALLOC_NOBUSY		0x0200	/* (acgp) Do not excl busy the page */
 #define	VM_ALLOC_IFCACHED	0x0400
 #define	VM_ALLOC_IFNOTCACHED	0x0800
-#define	VM_ALLOC_IGN_SBUSY	0x1000	/* (g) Ignore shared busy flag */
+#define	VM_ALLOC_IGN_SBUSY	0x1000	/* (gp) Ignore shared busy flag */
 #define	VM_ALLOC_NODUMP		0x2000	/* (ag) don't include in dump */
-#define	VM_ALLOC_SBUSY		0x4000	/* (acg) Shared busy the page */
-#define	VM_ALLOC_NOWAIT		0x8000	/* (g) Do not sleep, return NULL */
+#define	VM_ALLOC_SBUSY		0x4000	/* (acgp) Shared busy the page */
+#define	VM_ALLOC_NOWAIT		0x8000	/* (gp) Do not sleep */
 #define	VM_ALLOC_COUNT_SHIFT	16
 #define	VM_ALLOC_COUNT(count)	((count) << VM_ALLOC_COUNT_SHIFT)
 
@@ -433,6 +440,18 @@ malloc2vm_flags(int malloc_flags)
 }
 #endif
 
+/*
+ * Predicates supported by vm_page_ps_test():
+ *
+ *	PS_ALL_DIRTY is true only if the entire (super)page is dirty.
+ *	However, it can be spuriously false when the (super)page has become
+ *	dirty in the pmap but that information has not been propagated to the
+ *	machine-independent layer.
+ */
+#define	PS_ALL_DIRTY	0x1
+#define	PS_ALL_VALID	0x2
+#define	PS_NONE_BUSY	0x4
+
 void vm_page_busy_downgrade(vm_page_t m);
 void vm_page_busy_sleep(vm_page_t m, const char *msg, bool nonshared);
 void vm_page_flash(vm_page_t m);
@@ -448,13 +467,17 @@ vm_page_t vm_page_alloc_contig(vm_object_t object, vm_pindex_t pindex, int req,
     u_long npages, vm_paddr_t low, vm_paddr_t high, u_long alignment,
     vm_paddr_t boundary, vm_memattr_t memattr);
 vm_page_t vm_page_alloc_freelist(int, int);
+void vm_page_change_lock(vm_page_t m, struct mtx **mtx);
 vm_page_t vm_page_grab (vm_object_t, vm_pindex_t, int);
-int vm_page_try_to_free (vm_page_t);
+int vm_page_grab_pages(vm_object_t object, vm_pindex_t pindex, int allocflags,
+    vm_page_t *ma, int count);
 void vm_page_deactivate (vm_page_t);
 void vm_page_deactivate_noreuse(vm_page_t);
 void vm_page_dequeue(vm_page_t m);
 void vm_page_dequeue_locked(vm_page_t m);
 vm_page_t vm_page_find_least(vm_object_t, vm_pindex_t);
+void vm_page_free_phys_pglist(struct pglist *tq);
+bool vm_page_free_prep(vm_page_t m, bool pagequeue_locked);
 vm_page_t vm_page_getfake(vm_paddr_t paddr, vm_memattr_t memattr);
 void vm_page_initfake(vm_page_t m, vm_paddr_t paddr, vm_memattr_t memattr);
 int vm_page_insert (vm_page_t, vm_object_t, vm_pindex_t);
@@ -464,7 +487,7 @@ vm_page_t vm_page_next(vm_page_t m);
 int vm_page_pa_tryrelock(pmap_t, vm_paddr_t, vm_paddr_t *);
 struct vm_pagequeue *vm_page_pagequeue(vm_page_t m);
 vm_page_t vm_page_prev(vm_page_t m);
-boolean_t vm_page_ps_is_valid(vm_page_t m);
+bool vm_page_ps_test(vm_page_t m, int flags, vm_page_t skip_m);
 void vm_page_putfake(vm_page_t m);
 void vm_page_readahead_finish(vm_page_t m);
 bool vm_page_reclaim_contig(int req, u_long npages, vm_paddr_t low,
@@ -483,6 +506,7 @@ void vm_page_set_valid_range(vm_page_t m, int base, int size);
 int vm_page_sleep_if_busy(vm_page_t m, const char *msg);
 vm_offset_t vm_page_startup(vm_offset_t vaddr);
 void vm_page_sunbusy(vm_page_t m);
+bool vm_page_try_to_free(vm_page_t m);
 int vm_page_trysbusy(vm_page_t m);
 void vm_page_unhold_pages(vm_page_t *ma, int count);
 boolean_t vm_page_unwire(vm_page_t m, uint8_t queue);
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index c9646cf..793e2e9 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -141,19 +141,6 @@ SYSINIT(pagedaemon, SI_SUB_KTHREAD_PAGE, SI_ORDER_SECOND, kproc_start,
 SDT_PROVIDER_DEFINE(vm);
 SDT_PROBE_DEFINE(vm, , , vm__lowmem_scan);
 
-#if !defined(NO_SWAPPING)
-/* the kernel process "vm_daemon"*/
-static void vm_daemon(void);
-static struct	proc *vmproc;
-
-static struct kproc_desc vm_kp = {
-	"vmdaemon",
-	vm_daemon,
-	&vmproc
-};
-SYSINIT(vmdaemon, SI_SUB_KTHREAD_VM, SI_ORDER_FIRST, kproc_start, &vm_kp);
-#endif
-
 /* Pagedaemon activity rates, in subdivisions of one second. */
 #define	VM_LAUNDER_RATE		10
 #define	VM_INACT_SCAN_RATE	2
@@ -171,26 +158,11 @@ static enum {
 	VM_LAUNDRY_SHORTFALL
 } vm_laundry_request = VM_LAUNDRY_IDLE;
 
-#if !defined(NO_SWAPPING)
-static int vm_pageout_req_swapout;	/* XXX */
-static int vm_daemon_needed;
-static struct mtx vm_daemon_mtx;
-/* Allow for use by vm_pageout before vm_daemon is initialized. */
-MTX_SYSINIT(vm_daemon, &vm_daemon_mtx, "vm daemon", MTX_DEF);
-#endif
 static int vm_pageout_update_period;
 static int disable_swap_pageouts;
 static int lowmem_period = 10;
 static time_t lowmem_uptime;
 
-#if defined(NO_SWAPPING)
-static int vm_swap_enabled = 0;
-static int vm_swap_idle_enabled = 0;
-#else
-static int vm_swap_enabled = 1;
-static int vm_swap_idle_enabled = 0;
-#endif
-
 static int vm_panic_on_oom = 0;
 
 SYSCTL_INT(_vm, OID_AUTO, panic_on_oom,
@@ -198,56 +170,44 @@ SYSCTL_INT(_vm, OID_AUTO, panic_on_oom,
 	"panic on out of memory instead of killing the largest process");
 
 SYSCTL_INT(_vm, OID_AUTO, pageout_wakeup_thresh,
-	CTLFLAG_RW, &vm_pageout_wakeup_thresh, 0,
+	CTLFLAG_RWTUN, &vm_pageout_wakeup_thresh, 0,
 	"free page threshold for waking up the pageout daemon");
 
 SYSCTL_INT(_vm, OID_AUTO, pageout_update_period,
-	CTLFLAG_RW, &vm_pageout_update_period, 0,
+	CTLFLAG_RWTUN, &vm_pageout_update_period, 0,
 	"Maximum active LRU update period");
   
-SYSCTL_INT(_vm, OID_AUTO, lowmem_period, CTLFLAG_RW, &lowmem_period, 0,
+SYSCTL_INT(_vm, OID_AUTO, lowmem_period, CTLFLAG_RWTUN, &lowmem_period, 0,
 	"Low memory callback period");
 
-#if defined(NO_SWAPPING)
-SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled,
-	CTLFLAG_RD, &vm_swap_enabled, 0, "Enable entire process swapout");
-SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled,
-	CTLFLAG_RD, &vm_swap_idle_enabled, 0, "Allow swapout on idle criteria");
-#else
-SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled,
-	CTLFLAG_RW, &vm_swap_enabled, 0, "Enable entire process swapout");
-SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled,
-	CTLFLAG_RW, &vm_swap_idle_enabled, 0, "Allow swapout on idle criteria");
-#endif
-
 SYSCTL_INT(_vm, OID_AUTO, disable_swapspace_pageouts,
-	CTLFLAG_RW, &disable_swap_pageouts, 0, "Disallow swapout of dirty pages");
+	CTLFLAG_RWTUN, &disable_swap_pageouts, 0, "Disallow swapout of dirty pages");
 
 static int pageout_lock_miss;
 SYSCTL_INT(_vm, OID_AUTO, pageout_lock_miss,
 	CTLFLAG_RD, &pageout_lock_miss, 0, "vget() lock misses during pageout");
 
 SYSCTL_INT(_vm, OID_AUTO, pageout_oom_seq,
-	CTLFLAG_RW, &vm_pageout_oom_seq, 0,
+	CTLFLAG_RWTUN, &vm_pageout_oom_seq, 0,
 	"back-to-back calls to oom detector to start OOM");
 
 static int act_scan_laundry_weight = 3;
-SYSCTL_INT(_vm, OID_AUTO, act_scan_laundry_weight, CTLFLAG_RW,
+SYSCTL_INT(_vm, OID_AUTO, act_scan_laundry_weight, CTLFLAG_RWTUN,
     &act_scan_laundry_weight, 0,
     "weight given to clean vs. dirty pages in active queue scans");
 
 static u_int vm_background_launder_target;
-SYSCTL_UINT(_vm, OID_AUTO, background_launder_target, CTLFLAG_RW,
+SYSCTL_UINT(_vm, OID_AUTO, background_launder_target, CTLFLAG_RWTUN,
     &vm_background_launder_target, 0,
     "background laundering target, in pages");
 
 static u_int vm_background_launder_rate = 4096;
-SYSCTL_UINT(_vm, OID_AUTO, background_launder_rate, CTLFLAG_RW,
+SYSCTL_UINT(_vm, OID_AUTO, background_launder_rate, CTLFLAG_RWTUN,
     &vm_background_launder_rate, 0,
     "background laundering rate, in kilobytes per second");
 
 static u_int vm_background_launder_max = 20 * 1024;
-SYSCTL_UINT(_vm, OID_AUTO, background_launder_max, CTLFLAG_RW,
+SYSCTL_UINT(_vm, OID_AUTO, background_launder_max, CTLFLAG_RWTUN,
     &vm_background_launder_max, 0, "background laundering cap, in kilobytes");
 
 int vm_pageout_page_count = 32;
@@ -261,11 +221,6 @@ static boolean_t vm_pageout_fallback_object_lock(vm_page_t, vm_page_t *);
 static int vm_pageout_launder(struct vm_domain *vmd, int launder,
     bool in_shortfall);
 static void vm_pageout_laundry_worker(void *arg);
-#if !defined(NO_SWAPPING)
-static void vm_pageout_map_deactivate_pages(vm_map_t, long);
-static void vm_pageout_object_deactivate_pages(pmap_t, vm_object_t, long);
-static void vm_req_vmdaemon(int req);
-#endif
 static boolean_t vm_pageout_page_lock(vm_page_t, vm_page_t *);
 
 /*
@@ -401,6 +356,8 @@ vm_pageout_cluster(vm_page_t m)
 	 */
 	vm_page_assert_unbusied(m);
 	KASSERT(m->hold_count == 0, ("page %p is held", m));
+
+	pmap_remove_write(m);
 	vm_page_unlock(m);
 
 	mc[vm_pageout_page_count] = pb = ps = m;
@@ -443,6 +400,7 @@ more:
 			ib = 0;
 			break;
 		}
+		pmap_remove_write(p);
 		vm_page_unlock(p);
 		mc[--page_base] = pb = p;
 		++pageout_count;
@@ -468,6 +426,7 @@ more:
 			vm_page_unlock(p);
 			break;
 		}
+		pmap_remove_write(p);
 		vm_page_unlock(p);
 		mc[page_base + pageout_count] = ps = p;
 		++pageout_count;
@@ -512,8 +471,8 @@ vm_pageout_flush(vm_page_t *mc, int count, int flags, int mreq, int *prunlen,
 	VM_OBJECT_ASSERT_WLOCKED(object);
 
 	/*
-	 * Initiate I/O.  Bump the vm_page_t->busy counter and
-	 * mark the pages read-only.
+	 * Initiate I/O.  Mark the pages busy and verify that they're valid
+	 * and read-only.
 	 *
 	 * We do not have to fixup the clean/dirty bits here... we can
 	 * allow the pager to do it after the I/O completes.
@@ -525,8 +484,9 @@ vm_pageout_flush(vm_page_t *mc, int count, int flags, int mreq, int *prunlen,
 		KASSERT(mc[i]->valid == VM_PAGE_BITS_ALL,
 		    ("vm_pageout_flush: partially invalid page %p index %d/%d",
 			mc[i], i, count));
+		KASSERT((mc[i]->aflags & PGA_WRITEABLE) == 0,
+		    ("vm_pageout_flush: writeable page %p", mc[i]));
 		vm_page_sbusy(mc[i]);
-		pmap_remove_write(mc[i]);
 	}
 	vm_object_pip_add(object, count);
 
@@ -599,171 +559,6 @@ vm_pageout_flush(vm_page_t *mc, int count, int flags, int mreq, int *prunlen,
 	return (numpagedout);
 }
 
-#if !defined(NO_SWAPPING)
-/*
- *	vm_pageout_object_deactivate_pages
- *
- *	Deactivate enough pages to satisfy the inactive target
- *	requirements.
- *
- *	The object and map must be locked.
- */
-static void
-vm_pageout_object_deactivate_pages(pmap_t pmap, vm_object_t first_object,
-    long desired)
-{
-	vm_object_t backing_object, object;
-	vm_page_t p;
-	int act_delta, remove_mode;
-
-	VM_OBJECT_ASSERT_LOCKED(first_object);
-	if ((first_object->flags & OBJ_FICTITIOUS) != 0)
-		return;
-	for (object = first_object;; object = backing_object) {
-		if (pmap_resident_count(pmap) <= desired)
-			goto unlock_return;
-		VM_OBJECT_ASSERT_LOCKED(object);
-		if ((object->flags & OBJ_UNMANAGED) != 0 ||
-		    object->paging_in_progress != 0)
-			goto unlock_return;
-
-		remove_mode = 0;
-		if (object->shadow_count > 1)
-			remove_mode = 1;
-		/*
-		 * Scan the object's entire memory queue.
-		 */
-		TAILQ_FOREACH(p, &object->memq, listq) {
-			if (pmap_resident_count(pmap) <= desired)
-				goto unlock_return;
-			if (vm_page_busied(p))
-				continue;
-			PCPU_INC(cnt.v_pdpages);
-			vm_page_lock(p);
-			if (p->wire_count != 0 || p->hold_count != 0 ||
-			    !pmap_page_exists_quick(pmap, p)) {
-				vm_page_unlock(p);
-				continue;
-			}
-			act_delta = pmap_ts_referenced(p);
-			if ((p->aflags & PGA_REFERENCED) != 0) {
-				if (act_delta == 0)
-					act_delta = 1;
-				vm_page_aflag_clear(p, PGA_REFERENCED);
-			}
-			if (!vm_page_active(p) && act_delta != 0) {
-				vm_page_activate(p);
-				p->act_count += act_delta;
-			} else if (vm_page_active(p)) {
-				if (act_delta == 0) {
-					p->act_count -= min(p->act_count,
-					    ACT_DECLINE);
-					if (!remove_mode && p->act_count == 0) {
-						pmap_remove_all(p);
-						vm_page_deactivate(p);
-					} else
-						vm_page_requeue(p);
-				} else {
-					vm_page_activate(p);
-					if (p->act_count < ACT_MAX -
-					    ACT_ADVANCE)
-						p->act_count += ACT_ADVANCE;
-					vm_page_requeue(p);
-				}
-			} else if (vm_page_inactive(p))
-				pmap_remove_all(p);
-			vm_page_unlock(p);
-		}
-		if ((backing_object = object->backing_object) == NULL)
-			goto unlock_return;
-		VM_OBJECT_RLOCK(backing_object);
-		if (object != first_object)
-			VM_OBJECT_RUNLOCK(object);
-	}
-unlock_return:
-	if (object != first_object)
-		VM_OBJECT_RUNLOCK(object);
-}
-
-/*
- * deactivate some number of pages in a map, try to do it fairly, but
- * that is really hard to do.
- */
-static void
-vm_pageout_map_deactivate_pages(map, desired)
-	vm_map_t map;
-	long desired;
-{
-	vm_map_entry_t tmpe;
-	vm_object_t obj, bigobj;
-	int nothingwired;
-
-	if (!vm_map_trylock(map))
-		return;
-
-	bigobj = NULL;
-	nothingwired = TRUE;
-
-	/*
-	 * first, search out the biggest object, and try to free pages from
-	 * that.
-	 */
-	tmpe = map->header.next;
-	while (tmpe != &map->header) {
-		if ((tmpe->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
-			obj = tmpe->object.vm_object;
-			if (obj != NULL && VM_OBJECT_TRYRLOCK(obj)) {
-				if (obj->shadow_count <= 1 &&
-				    (bigobj == NULL ||
-				     bigobj->resident_page_count < obj->resident_page_count)) {
-					if (bigobj != NULL)
-						VM_OBJECT_RUNLOCK(bigobj);
-					bigobj = obj;
-				} else
-					VM_OBJECT_RUNLOCK(obj);
-			}
-		}
-		if (tmpe->wired_count > 0)
-			nothingwired = FALSE;
-		tmpe = tmpe->next;
-	}
-
-	if (bigobj != NULL) {
-		vm_pageout_object_deactivate_pages(map->pmap, bigobj, desired);
-		VM_OBJECT_RUNLOCK(bigobj);
-	}
-	/*
-	 * Next, hunt around for other pages to deactivate.  We actually
-	 * do this search sort of wrong -- .text first is not the best idea.
-	 */
-	tmpe = map->header.next;
-	while (tmpe != &map->header) {
-		if (pmap_resident_count(vm_map_pmap(map)) <= desired)
-			break;
-		if ((tmpe->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
-			obj = tmpe->object.vm_object;
-			if (obj != NULL) {
-				VM_OBJECT_RLOCK(obj);
-				vm_pageout_object_deactivate_pages(map->pmap, obj, desired);
-				VM_OBJECT_RUNLOCK(obj);
-			}
-		}
-		tmpe = tmpe->next;
-	}
-
-	/*
-	 * Remove all mappings if a process is swapped out, this will free page
-	 * table pages.
-	 */
-	if (desired == 0 && nothingwired) {
-		pmap_remove(vm_map_pmap(map), vm_map_min(map),
-		    vm_map_max(map));
-	}
-
-	vm_map_unlock(map);
-}
-#endif		/* !defined(NO_SWAPPING) */
-
 /*
  * Attempt to acquire all of the necessary locks to launder a page and
  * then call through the clustering layer to PUTPAGES.  Wait a short
@@ -822,7 +617,17 @@ vm_pageout_clean(vm_page_t m, int *numpagedout)
 			goto unlock_mp;
 		}
 		VM_OBJECT_WLOCK(object);
+
+		/*
+		 * Ensure that the object and vnode were not disassociated
+		 * while locks were dropped.
+		 */
+		if (vp->v_object != object) {
+			error = ENOENT;
+			goto unlock_all;
+		}
 		vm_page_lock(m);
+
 		/*
 		 * While the object and page were unlocked, the page
 		 * may have been:
@@ -1506,14 +1311,12 @@ drop_page:
 		vm_pagequeue_unlock(pq);
 	}
 
-#if !defined(NO_SWAPPING)
 	/*
 	 * Wakeup the swapout daemon if we didn't free the targeted number of
 	 * pages.
 	 */
-	if (vm_swap_enabled && page_shortage > 0)
-		vm_req_vmdaemon(VM_SWAP_NORMAL);
-#endif
+	if (page_shortage > 0)
+		vm_swapout_run();
 
 	/*
 	 * If the inactive queue scan fails repeatedly to meet its
@@ -1663,18 +1466,8 @@ drop_page:
 		vm_page_unlock(m);
 	}
 	vm_pagequeue_unlock(pq);
-#if !defined(NO_SWAPPING)
-	/*
-	 * Idle process swapout -- run once per second.
-	 */
-	if (vm_swap_idle_enabled) {
-		static long lsec;
-		if (time_second != lsec) {
-			vm_req_vmdaemon(VM_SWAP_IDLE);
-			lsec = time_second;
-		}
-	}
-#endif
+	if (pass > 0)
+		vm_swapout_run_idle();
 	return (page_shortage <= 0);
 }
 
@@ -1971,7 +1764,7 @@ vm_pageout_worker(void *arg)
 			 */
 			mtx_unlock(&vm_page_queue_free_mtx);
 			if (pass >= 1)
-				pause("psleep", hz / VM_INACT_SCAN_RATE);
+				pause("pwait", hz / VM_INACT_SCAN_RATE);
 			pass++;
 		} else {
 			/*
@@ -2100,167 +1893,3 @@ pagedaemon_wakeup(void)
 		wakeup(&vm_pageout_wanted);
 	}
 }
-
-#if !defined(NO_SWAPPING)
-static void
-vm_req_vmdaemon(int req)
-{
-	static int lastrun = 0;
-
-	mtx_lock(&vm_daemon_mtx);
-	vm_pageout_req_swapout |= req;
-	if ((ticks > (lastrun + hz)) || (ticks < lastrun)) {
-		wakeup(&vm_daemon_needed);
-		lastrun = ticks;
-	}
-	mtx_unlock(&vm_daemon_mtx);
-}
-
-static void
-vm_daemon(void)
-{
-	struct rlimit rsslim;
-	struct proc *p;
-	struct thread *td;
-	struct vmspace *vm;
-	int breakout, swapout_flags, tryagain, attempts;
-#ifdef RACCT
-	uint64_t rsize, ravailable;
-#endif
-
-	while (TRUE) {
-		mtx_lock(&vm_daemon_mtx);
-		msleep(&vm_daemon_needed, &vm_daemon_mtx, PPAUSE, "psleep",
-#ifdef RACCT
-		    racct_enable ? hz : 0
-#else
-		    0
-#endif
-		);
-		swapout_flags = vm_pageout_req_swapout;
-		vm_pageout_req_swapout = 0;
-		mtx_unlock(&vm_daemon_mtx);
-		if (swapout_flags)
-			swapout_procs(swapout_flags);
-
-		/*
-		 * scan the processes for exceeding their rlimits or if
-		 * process is swapped out -- deactivate pages
-		 */
-		tryagain = 0;
-		attempts = 0;
-again:
-		attempts++;
-		sx_slock(&allproc_lock);
-		FOREACH_PROC_IN_SYSTEM(p) {
-			vm_pindex_t limit, size;
-
-			/*
-			 * if this is a system process or if we have already
-			 * looked at this process, skip it.
-			 */
-			PROC_LOCK(p);
-			if (p->p_state != PRS_NORMAL ||
-			    p->p_flag & (P_INEXEC | P_SYSTEM | P_WEXIT)) {
-				PROC_UNLOCK(p);
-				continue;
-			}
-			/*
-			 * if the process is in a non-running type state,
-			 * don't touch it.
-			 */
-			breakout = 0;
-			FOREACH_THREAD_IN_PROC(p, td) {
-				thread_lock(td);
-				if (!TD_ON_RUNQ(td) &&
-				    !TD_IS_RUNNING(td) &&
-				    !TD_IS_SLEEPING(td) &&
-				    !TD_IS_SUSPENDED(td)) {
-					thread_unlock(td);
-					breakout = 1;
-					break;
-				}
-				thread_unlock(td);
-			}
-			if (breakout) {
-				PROC_UNLOCK(p);
-				continue;
-			}
-			/*
-			 * get a limit
-			 */
-			lim_rlimit_proc(p, RLIMIT_RSS, &rsslim);
-			limit = OFF_TO_IDX(
-			    qmin(rsslim.rlim_cur, rsslim.rlim_max));
-
-			/*
-			 * let processes that are swapped out really be
-			 * swapped out set the limit to nothing (will force a
-			 * swap-out.)
-			 */
-			if ((p->p_flag & P_INMEM) == 0)
-				limit = 0;	/* XXX */
-			vm = vmspace_acquire_ref(p);
-			_PHOLD_LITE(p);
-			PROC_UNLOCK(p);
-			if (vm == NULL) {
-				PRELE(p);
-				continue;
-			}
-			sx_sunlock(&allproc_lock);
-
-			size = vmspace_resident_count(vm);
-			if (size >= limit) {
-				vm_pageout_map_deactivate_pages(
-				    &vm->vm_map, limit);
-				size = vmspace_resident_count(vm);
-			}
-#ifdef RACCT
-			if (racct_enable) {
-				rsize = IDX_TO_OFF(size);
-				PROC_LOCK(p);
-				if (p->p_state == PRS_NORMAL)
-					racct_set(p, RACCT_RSS, rsize);
-				ravailable = racct_get_available(p, RACCT_RSS);
-				PROC_UNLOCK(p);
-				if (rsize > ravailable) {
-					/*
-					 * Don't be overly aggressive; this
-					 * might be an innocent process,
-					 * and the limit could've been exceeded
-					 * by some memory hog.  Don't try
-					 * to deactivate more than 1/4th
-					 * of process' resident set size.
-					 */
-					if (attempts <= 8) {
-						if (ravailable < rsize -
-						    (rsize / 4)) {
-							ravailable = rsize -
-							    (rsize / 4);
-						}
-					}
-					vm_pageout_map_deactivate_pages(
-					    &vm->vm_map,
-					    OFF_TO_IDX(ravailable));
-					/* Update RSS usage after paging out. */
-					size = vmspace_resident_count(vm);
-					rsize = IDX_TO_OFF(size);
-					PROC_LOCK(p);
-					if (p->p_state == PRS_NORMAL)
-						racct_set(p, RACCT_RSS, rsize);
-					PROC_UNLOCK(p);
-					if (rsize > ravailable)
-						tryagain = 1;
-				}
-			}
-#endif
-			vmspace_free(vm);
-			sx_slock(&allproc_lock);
-			PRELE(p);
-		}
-		sx_sunlock(&allproc_lock);
-		if (tryagain != 0 && attempts <= 10)
-			goto again;
-	}
-}
-#endif			/* !defined(NO_SWAPPING) */
diff --git a/sys/vm/vm_pageout.h b/sys/vm/vm_pageout.h
index b44ca2f..6f8e0e9 100644
--- a/sys/vm/vm_pageout.h
+++ b/sys/vm/vm_pageout.h
@@ -77,12 +77,6 @@ extern int vm_pageout_page_count;
 extern bool vm_pageout_wanted;
 extern bool vm_pages_needed;
 
-/*
- * Swap out requests
- */
-#define VM_SWAP_NORMAL 1
-#define VM_SWAP_IDLE 2
-
 #define	VM_OOM_MEM	1
 #define	VM_OOM_SWAPZ	2
 
@@ -109,5 +103,8 @@ extern void vm_waitpfault(void);
 #ifdef _KERNEL
 int vm_pageout_flush(vm_page_t *, int, int, int, int *, boolean_t *);
 void vm_pageout_oom(int shortage);
+
+void vm_swapout_run(void);
+void vm_swapout_run_idle(void);
 #endif
 #endif	/* _VM_VM_PAGEOUT_H_ */
diff --git a/sys/vm/vm_param.h b/sys/vm/vm_param.h
index 68e5cd1..20dc143 100644
--- a/sys/vm/vm_param.h
+++ b/sys/vm/vm_param.h
@@ -84,7 +84,8 @@
 #define VM_V_PAGEOUT_FREE_MIN	9	/* vm_cnt.v_pageout_free_min */
 #define	VM_OBSOLETE_10		10	/* pageout algorithm */
 #define VM_SWAPPING_ENABLED	11	/* swapping enabled */
-#define	VM_MAXID		12	/* number of valid vm ids */
+#define VM_OVERCOMMIT		12	/* vm.overcommit */
+#define	VM_MAXID		13	/* number of valid vm ids */
 
 /*
  * Structure for swap device statistics
diff --git a/sys/vm/vm_phys.c b/sys/vm/vm_phys.c
index 484417b..60b452f 100644
--- a/sys/vm/vm_phys.c
+++ b/sys/vm/vm_phys.c
@@ -175,7 +175,6 @@ static vm_page_t vm_phys_alloc_seg_contig(struct vm_phys_seg *seg,
     vm_paddr_t boundary);
 static void _vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end, int domain);
 static void vm_phys_create_seg(vm_paddr_t start, vm_paddr_t end);
-static int vm_phys_paddr_to_segind(vm_paddr_t pa);
 static void vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl,
     int order);
 
@@ -731,35 +730,6 @@ vm_phys_split_pages(vm_page_t m, int oind, struct vm_freelist *fl, int order)
 }
 
 /*
- * Initialize a physical page and add it to the free lists.
- */
-void
-vm_phys_add_page(vm_paddr_t pa)
-{
-	vm_page_t m;
-	struct vm_domain *vmd;
-
-	vm_cnt.v_page_count++;
-	m = vm_phys_paddr_to_vm_page(pa);
-	m->busy_lock = VPB_UNBUSIED;
-	m->phys_addr = pa;
-	m->queue = PQ_NONE;
-	m->segind = vm_phys_paddr_to_segind(pa);
-	vmd = vm_phys_domain(m);
-	vmd->vmd_page_count++;
-	vmd->vmd_segs |= 1UL << m->segind;
-	KASSERT(m->order == VM_NFREEORDER,
-	    ("vm_phys_add_page: page %p has unexpected order %d",
-	    m, m->order));
-	m->pool = VM_FREEPOOL_DEFAULT;
-	pmap_page_init(m);
-	mtx_lock(&vm_page_queue_free_mtx);
-	vm_phys_freecnt_adj(m, 1);
-	vm_phys_free_pages(m, 0);
-	mtx_unlock(&vm_page_queue_free_mtx);
-}
-
-/*
  * Allocate a contiguous, power of two-sized set of physical pages
  * from the free lists.
  *
@@ -912,6 +882,7 @@ vm_phys_fictitious_init_range(vm_page_t range, vm_paddr_t start,
 {
 	long i;
 
+	bzero(range, page_count * sizeof(*range));
 	for (i = 0; i < page_count; i++) {
 		vm_page_initfake(&range[i], start + PAGE_SIZE * i, memattr);
 		range[i].oflags &= ~VPO_UNMANAGED;
@@ -986,7 +957,7 @@ vm_phys_fictitious_reg_range(vm_paddr_t start, vm_paddr_t end,
 alloc:
 #endif
 		fp = malloc(page_count * sizeof(struct vm_page), M_FICT_PAGES,
-		    M_WAITOK | M_ZERO);
+		    M_WAITOK);
 #ifdef VM_PHYSSEG_DENSE
 	}
 #endif
@@ -1067,24 +1038,6 @@ vm_phys_fictitious_unreg_range(vm_paddr_t start, vm_paddr_t end)
 }
 
 /*
- * Find the segment containing the given physical address.
- */
-static int
-vm_phys_paddr_to_segind(vm_paddr_t pa)
-{
-	struct vm_phys_seg *seg;
-	int segind;
-
-	for (segind = 0; segind < vm_phys_nsegs; segind++) {
-		seg = &vm_phys_segs[segind];
-		if (pa >= seg->start && pa < seg->end)
-			return (segind);
-	}
-	panic("vm_phys_paddr_to_segind: paddr %#jx is not in any segment" ,
-	    (uintmax_t)pa);
-}
-
-/*
  * Free a contiguous, power of two-sized set of physical pages.
  *
  * The free page queues must be locked.
diff --git a/sys/vm/vm_phys.h b/sys/vm/vm_phys.h
index ee4aa2d..c5dd58d 100644
--- a/sys/vm/vm_phys.h
+++ b/sys/vm/vm_phys.h
@@ -69,7 +69,6 @@ extern int vm_phys_nsegs;
 /*
  * The following functions are only to be used by the virtual memory system.
  */
-void vm_phys_add_page(vm_paddr_t pa);
 void vm_phys_add_seg(vm_paddr_t start, vm_paddr_t end);
 vm_page_t vm_phys_alloc_contig(u_long npages, vm_paddr_t low, vm_paddr_t high,
     u_long alignment, vm_paddr_t boundary);
diff --git a/sys/vm/vm_reserv.c b/sys/vm/vm_reserv.c
index 7e2bfb6..ce3289e 100644
--- a/sys/vm/vm_reserv.c
+++ b/sys/vm/vm_reserv.c
@@ -1120,4 +1120,18 @@ vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end, vm_paddr_t high_water)
 	return (new_end);
 }
 
+/*
+ * Returns the superpage containing the given page.
+ */
+vm_page_t
+vm_reserv_to_superpage(vm_page_t m)
+{
+	vm_reserv_t rv;
+
+	VM_OBJECT_ASSERT_LOCKED(m->object);
+	rv = vm_reserv_from_page(m);
+	return (rv->object == m->object && rv->popcnt == VM_LEVEL_0_NPAGES ?
+	    rv->pages : NULL);
+}
+
 #endif	/* VM_NRESERVLEVEL > 0 */
diff --git a/sys/vm/vm_reserv.h b/sys/vm/vm_reserv.h
index 8b33b48..3d9472d 100644
--- a/sys/vm/vm_reserv.h
+++ b/sys/vm/vm_reserv.h
@@ -64,6 +64,7 @@ void		vm_reserv_rename(vm_page_t m, vm_object_t new_object,
 int		vm_reserv_size(int level);
 vm_paddr_t	vm_reserv_startup(vm_offset_t *vaddr, vm_paddr_t end,
 		    vm_paddr_t high_water);
+vm_page_t	vm_reserv_to_superpage(vm_page_t m);
 
 #endif	/* VM_NRESERVLEVEL > 0 */
 #endif	/* _KERNEL */
diff --git a/sys/vm/vm_swapout.c b/sys/vm/vm_swapout.c
new file mode 100644
index 0000000..91b6422
--- /dev/null
+++ b/sys/vm/vm_swapout.c
@@ -0,0 +1,975 @@
+/*-
+ * Copyright (c) 1991 Regents of the University of California.
+ * All rights reserved.
+ * Copyright (c) 1994 John S. Dyson
+ * All rights reserved.
+ * Copyright (c) 1994 David Greenman
+ * All rights reserved.
+ * Copyright (c) 2005 Yahoo! Technologies Norway AS
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)vm_pageout.c	7.4 (Berkeley) 5/7/91
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include "opt_kstack_pages.h"
+#include "opt_kstack_max_pages.h"
+#include "opt_vm.h"
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/limits.h>
+#include <sys/kernel.h>
+#include <sys/eventhandler.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/_kstack_cache.h>
+#include <sys/kthread.h>
+#include <sys/ktr.h>
+#include <sys/mount.h>
+#include <sys/racct.h>
+#include <sys/resourcevar.h>
+#include <sys/sched.h>
+#include <sys/sdt.h>
+#include <sys/signalvar.h>
+#include <sys/smp.h>
+#include <sys/time.h>
+#include <sys/vnode.h>
+#include <sys/vmmeter.h>
+#include <sys/rwlock.h>
+#include <sys/sx.h>
+#include <sys/sysctl.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_object.h>
+#include <vm/vm_page.h>
+#include <vm/vm_map.h>
+#include <vm/vm_pageout.h>
+#include <vm/vm_pager.h>
+#include <vm/vm_phys.h>
+#include <vm/swap_pager.h>
+#include <vm/vm_extern.h>
+#include <vm/uma.h>
+
+/* the kernel process "vm_daemon" */
+static void vm_daemon(void);
+static struct proc *vmproc;
+
+static struct kproc_desc vm_kp = {
+	"vmdaemon",
+	vm_daemon,
+	&vmproc
+};
+SYSINIT(vmdaemon, SI_SUB_KTHREAD_VM, SI_ORDER_FIRST, kproc_start, &vm_kp);
+
+static int vm_swap_enabled = 1;
+static int vm_swap_idle_enabled = 0;
+
+SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled, CTLFLAG_RW,
+    &vm_swap_enabled, 0,
+    "Enable entire process swapout");
+SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled, CTLFLAG_RW,
+    &vm_swap_idle_enabled, 0,
+    "Allow swapout on idle criteria");
+
+/*
+ * Swap_idle_threshold1 is the guaranteed swapped in time for a process
+ */
+static int swap_idle_threshold1 = 2;
+SYSCTL_INT(_vm, OID_AUTO, swap_idle_threshold1, CTLFLAG_RW,
+    &swap_idle_threshold1, 0,
+    "Guaranteed swapped in time for a process");
+
+/*
+ * Swap_idle_threshold2 is the time that a process can be idle before
+ * it will be swapped out, if idle swapping is enabled.
+ */
+static int swap_idle_threshold2 = 10;
+SYSCTL_INT(_vm, OID_AUTO, swap_idle_threshold2, CTLFLAG_RW,
+    &swap_idle_threshold2, 0,
+    "Time before a process will be swapped out");
+
+static int vm_pageout_req_swapout;	/* XXX */
+static int vm_daemon_needed;
+static struct mtx vm_daemon_mtx;
+/* Allow for use by vm_pageout before vm_daemon is initialized. */
+MTX_SYSINIT(vm_daemon, &vm_daemon_mtx, "vm daemon", MTX_DEF);
+
+static void swapclear(struct proc *);
+static int swapout(struct proc *);
+static void vm_swapout_map_deactivate_pages(vm_map_t, long);
+static void vm_swapout_object_deactivate_pages(pmap_t, vm_object_t, long);
+static void swapout_procs(int action);
+static void vm_req_vmdaemon(int req);
+static void vm_thread_swapin(struct thread *td);
+static void vm_thread_swapout(struct thread *td);
+
+/*
+ *	vm_swapout_object_deactivate_pages
+ *
+ *	Deactivate enough pages to satisfy the inactive target
+ *	requirements.
+ *
+ *	The object and map must be locked.
+ */
+static void
+vm_swapout_object_deactivate_pages(pmap_t pmap, vm_object_t first_object,
+    long desired)
+{
+	vm_object_t backing_object, object;
+	vm_page_t p;
+	int act_delta, remove_mode;
+
+	VM_OBJECT_ASSERT_LOCKED(first_object);
+	if ((first_object->flags & OBJ_FICTITIOUS) != 0)
+		return;
+	for (object = first_object;; object = backing_object) {
+		if (pmap_resident_count(pmap) <= desired)
+			goto unlock_return;
+		VM_OBJECT_ASSERT_LOCKED(object);
+		if ((object->flags & OBJ_UNMANAGED) != 0 ||
+		    object->paging_in_progress != 0)
+			goto unlock_return;
+
+		remove_mode = 0;
+		if (object->shadow_count > 1)
+			remove_mode = 1;
+		/*
+		 * Scan the object's entire memory queue.
+		 */
+		TAILQ_FOREACH(p, &object->memq, listq) {
+			if (pmap_resident_count(pmap) <= desired)
+				goto unlock_return;
+			if (vm_page_busied(p))
+				continue;
+			PCPU_INC(cnt.v_pdpages);
+			vm_page_lock(p);
+			if (p->wire_count != 0 || p->hold_count != 0 ||
+			    !pmap_page_exists_quick(pmap, p)) {
+				vm_page_unlock(p);
+				continue;
+			}
+			act_delta = pmap_ts_referenced(p);
+			if ((p->aflags & PGA_REFERENCED) != 0) {
+				if (act_delta == 0)
+					act_delta = 1;
+				vm_page_aflag_clear(p, PGA_REFERENCED);
+			}
+			if (!vm_page_active(p) && act_delta != 0) {
+				vm_page_activate(p);
+				p->act_count += act_delta;
+			} else if (vm_page_active(p)) {
+				if (act_delta == 0) {
+					p->act_count -= min(p->act_count,
+					    ACT_DECLINE);
+					if (!remove_mode && p->act_count == 0) {
+						pmap_remove_all(p);
+						vm_page_deactivate(p);
+					} else
+						vm_page_requeue(p);
+				} else {
+					vm_page_activate(p);
+					if (p->act_count < ACT_MAX -
+					    ACT_ADVANCE)
+						p->act_count += ACT_ADVANCE;
+					vm_page_requeue(p);
+				}
+			} else if (vm_page_inactive(p))
+				pmap_remove_all(p);
+			vm_page_unlock(p);
+		}
+		if ((backing_object = object->backing_object) == NULL)
+			goto unlock_return;
+		VM_OBJECT_RLOCK(backing_object);
+		if (object != first_object)
+			VM_OBJECT_RUNLOCK(object);
+	}
+unlock_return:
+	if (object != first_object)
+		VM_OBJECT_RUNLOCK(object);
+}
+
+/*
+ * deactivate some number of pages in a map, try to do it fairly, but
+ * that is really hard to do.
+ */
+static void
+vm_swapout_map_deactivate_pages(vm_map_t map, long desired)
+{
+	vm_map_entry_t tmpe;
+	vm_object_t obj, bigobj;
+	int nothingwired;
+
+	if (!vm_map_trylock(map))
+		return;
+
+	bigobj = NULL;
+	nothingwired = TRUE;
+
+	/*
+	 * first, search out the biggest object, and try to free pages from
+	 * that.
+	 */
+	tmpe = map->header.next;
+	while (tmpe != &map->header) {
+		if ((tmpe->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
+			obj = tmpe->object.vm_object;
+			if (obj != NULL && VM_OBJECT_TRYRLOCK(obj)) {
+				if (obj->shadow_count <= 1 &&
+				    (bigobj == NULL ||
+				     bigobj->resident_page_count <
+				     obj->resident_page_count)) {
+					if (bigobj != NULL)
+						VM_OBJECT_RUNLOCK(bigobj);
+					bigobj = obj;
+				} else
+					VM_OBJECT_RUNLOCK(obj);
+			}
+		}
+		if (tmpe->wired_count > 0)
+			nothingwired = FALSE;
+		tmpe = tmpe->next;
+	}
+
+	if (bigobj != NULL) {
+		vm_swapout_object_deactivate_pages(map->pmap, bigobj, desired);
+		VM_OBJECT_RUNLOCK(bigobj);
+	}
+	/*
+	 * Next, hunt around for other pages to deactivate.  We actually
+	 * do this search sort of wrong -- .text first is not the best idea.
+	 */
+	tmpe = map->header.next;
+	while (tmpe != &map->header) {
+		if (pmap_resident_count(vm_map_pmap(map)) <= desired)
+			break;
+		if ((tmpe->eflags & MAP_ENTRY_IS_SUB_MAP) == 0) {
+			obj = tmpe->object.vm_object;
+			if (obj != NULL) {
+				VM_OBJECT_RLOCK(obj);
+				vm_swapout_object_deactivate_pages(map->pmap,
+				    obj, desired);
+				VM_OBJECT_RUNLOCK(obj);
+			}
+		}
+		tmpe = tmpe->next;
+	}
+
+	/*
+	 * Remove all mappings if a process is swapped out, this will free page
+	 * table pages.
+	 */
+	if (desired == 0 && nothingwired) {
+		pmap_remove(vm_map_pmap(map), vm_map_min(map),
+		    vm_map_max(map));
+	}
+
+	vm_map_unlock(map);
+}
+
+/*
+ * Swap out requests
+ */
+#define VM_SWAP_NORMAL 1
+#define VM_SWAP_IDLE 2
+
+void
+vm_swapout_run(void)
+{
+
+	if (vm_swap_enabled)
+		vm_req_vmdaemon(VM_SWAP_NORMAL);
+}
+
+/*
+ * Idle process swapout -- run once per second when pagedaemons are
+ * reclaiming pages.
+ */
+void
+vm_swapout_run_idle(void)
+{
+	static long lsec;
+
+	if (!vm_swap_idle_enabled || time_second == lsec)
+		return;
+	vm_req_vmdaemon(VM_SWAP_IDLE);
+	lsec = time_second;
+}
+
+static void
+vm_req_vmdaemon(int req)
+{
+	static int lastrun = 0;
+
+	mtx_lock(&vm_daemon_mtx);
+	vm_pageout_req_swapout |= req;
+	if ((ticks > (lastrun + hz)) || (ticks < lastrun)) {
+		wakeup(&vm_daemon_needed);
+		lastrun = ticks;
+	}
+	mtx_unlock(&vm_daemon_mtx);
+}
+
+static void
+vm_daemon(void)
+{
+	struct rlimit rsslim;
+	struct proc *p;
+	struct thread *td;
+	struct vmspace *vm;
+	int breakout, swapout_flags, tryagain, attempts;
+#ifdef RACCT
+	uint64_t rsize, ravailable;
+#endif
+
+	while (TRUE) {
+		mtx_lock(&vm_daemon_mtx);
+		msleep(&vm_daemon_needed, &vm_daemon_mtx, PPAUSE, "psleep",
+#ifdef RACCT
+		    racct_enable ? hz : 0
+#else
+		    0
+#endif
+		);
+		swapout_flags = vm_pageout_req_swapout;
+		vm_pageout_req_swapout = 0;
+		mtx_unlock(&vm_daemon_mtx);
+		if (swapout_flags)
+			swapout_procs(swapout_flags);
+
+		/*
+		 * scan the processes for exceeding their rlimits or if
+		 * process is swapped out -- deactivate pages
+		 */
+		tryagain = 0;
+		attempts = 0;
+again:
+		attempts++;
+		sx_slock(&allproc_lock);
+		FOREACH_PROC_IN_SYSTEM(p) {
+			vm_pindex_t limit, size;
+
+			/*
+			 * if this is a system process or if we have already
+			 * looked at this process, skip it.
+			 */
+			PROC_LOCK(p);
+			if (p->p_state != PRS_NORMAL ||
+			    p->p_flag & (P_INEXEC | P_SYSTEM | P_WEXIT)) {
+				PROC_UNLOCK(p);
+				continue;
+			}
+			/*
+			 * if the process is in a non-running type state,
+			 * don't touch it.
+			 */
+			breakout = 0;
+			FOREACH_THREAD_IN_PROC(p, td) {
+				thread_lock(td);
+				if (!TD_ON_RUNQ(td) &&
+				    !TD_IS_RUNNING(td) &&
+				    !TD_IS_SLEEPING(td) &&
+				    !TD_IS_SUSPENDED(td)) {
+					thread_unlock(td);
+					breakout = 1;
+					break;
+				}
+				thread_unlock(td);
+			}
+			if (breakout) {
+				PROC_UNLOCK(p);
+				continue;
+			}
+			/*
+			 * get a limit
+			 */
+			lim_rlimit_proc(p, RLIMIT_RSS, &rsslim);
+			limit = OFF_TO_IDX(
+			    qmin(rsslim.rlim_cur, rsslim.rlim_max));
+
+			/*
+			 * let processes that are swapped out really be
+			 * swapped out set the limit to nothing (will force a
+			 * swap-out.)
+			 */
+			if ((p->p_flag & P_INMEM) == 0)
+				limit = 0;	/* XXX */
+			vm = vmspace_acquire_ref(p);
+			_PHOLD_LITE(p);
+			PROC_UNLOCK(p);
+			if (vm == NULL) {
+				PRELE(p);
+				continue;
+			}
+			sx_sunlock(&allproc_lock);
+
+			size = vmspace_resident_count(vm);
+			if (size >= limit) {
+				vm_swapout_map_deactivate_pages(
+				    &vm->vm_map, limit);
+				size = vmspace_resident_count(vm);
+			}
+#ifdef RACCT
+			if (racct_enable) {
+				rsize = IDX_TO_OFF(size);
+				PROC_LOCK(p);
+				if (p->p_state == PRS_NORMAL)
+					racct_set(p, RACCT_RSS, rsize);
+				ravailable = racct_get_available(p, RACCT_RSS);
+				PROC_UNLOCK(p);
+				if (rsize > ravailable) {
+					/*
+					 * Don't be overly aggressive; this
+					 * might be an innocent process,
+					 * and the limit could've been exceeded
+					 * by some memory hog.  Don't try
+					 * to deactivate more than 1/4th
+					 * of process' resident set size.
+					 */
+					if (attempts <= 8) {
+						if (ravailable < rsize -
+						    (rsize / 4)) {
+							ravailable = rsize -
+							    (rsize / 4);
+						}
+					}
+					vm_swapout_map_deactivate_pages(
+					    &vm->vm_map,
+					    OFF_TO_IDX(ravailable));
+					/* Update RSS usage after paging out. */
+					size = vmspace_resident_count(vm);
+					rsize = IDX_TO_OFF(size);
+					PROC_LOCK(p);
+					if (p->p_state == PRS_NORMAL)
+						racct_set(p, RACCT_RSS, rsize);
+					PROC_UNLOCK(p);
+					if (rsize > ravailable)
+						tryagain = 1;
+				}
+			}
+#endif
+			vmspace_free(vm);
+			sx_slock(&allproc_lock);
+			PRELE(p);
+		}
+		sx_sunlock(&allproc_lock);
+		if (tryagain != 0 && attempts <= 10)
+			goto again;
+	}
+}
+
+/*
+ * Allow a thread's kernel stack to be paged out.
+ */
+static void
+vm_thread_swapout(struct thread *td)
+{
+	vm_object_t ksobj;
+	vm_page_t m;
+	int i, pages;
+
+	cpu_thread_swapout(td);
+	pages = td->td_kstack_pages;
+	ksobj = td->td_kstack_obj;
+	pmap_qremove(td->td_kstack, pages);
+	VM_OBJECT_WLOCK(ksobj);
+	for (i = 0; i < pages; i++) {
+		m = vm_page_lookup(ksobj, i);
+		if (m == NULL)
+			panic("vm_thread_swapout: kstack already missing?");
+		vm_page_dirty(m);
+		vm_page_lock(m);
+		vm_page_unwire(m, PQ_INACTIVE);
+		vm_page_unlock(m);
+	}
+	VM_OBJECT_WUNLOCK(ksobj);
+}
+
+/*
+ * Bring the kernel stack for a specified thread back in.
+ */
+static void
+vm_thread_swapin(struct thread *td)
+{
+	vm_object_t ksobj;
+	vm_page_t ma[KSTACK_MAX_PAGES];
+	int pages;
+
+	pages = td->td_kstack_pages;
+	ksobj = td->td_kstack_obj;
+	VM_OBJECT_WLOCK(ksobj);
+	(void)vm_page_grab_pages(ksobj, 0, VM_ALLOC_NORMAL | VM_ALLOC_WIRED, ma,
+	    pages);
+	for (int i = 0; i < pages;) {
+		int j, a, count, rv;
+
+		vm_page_assert_xbusied(ma[i]);
+		if (ma[i]->valid == VM_PAGE_BITS_ALL) {
+			vm_page_xunbusy(ma[i]);
+			i++;
+			continue;
+		}
+		vm_object_pip_add(ksobj, 1);
+		for (j = i + 1; j < pages; j++)
+			if (ma[j]->valid == VM_PAGE_BITS_ALL)
+				break;
+		rv = vm_pager_has_page(ksobj, ma[i]->pindex, NULL, &a);
+		KASSERT(rv == 1, ("%s: missing page %p", __func__, ma[i]));
+		count = min(a + 1, j - i);
+		rv = vm_pager_get_pages(ksobj, ma + i, count, NULL, NULL);
+		KASSERT(rv == VM_PAGER_OK, ("%s: cannot get kstack for proc %d",
+		    __func__, td->td_proc->p_pid));
+		vm_object_pip_wakeup(ksobj);
+		for (j = i; j < i + count; j++)
+			vm_page_xunbusy(ma[j]);
+		i += count;
+	}
+	VM_OBJECT_WUNLOCK(ksobj);
+	pmap_qenter(td->td_kstack, ma, pages);
+	cpu_thread_swapin(td);
+}
+
+void
+faultin(struct proc *p)
+{
+	struct thread *td;
+
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+	/*
+	 * If another process is swapping in this process,
+	 * just wait until it finishes.
+	 */
+	if (p->p_flag & P_SWAPPINGIN) {
+		while (p->p_flag & P_SWAPPINGIN)
+			msleep(&p->p_flag, &p->p_mtx, PVM, "faultin", 0);
+		return;
+	}
+	if ((p->p_flag & P_INMEM) == 0) {
+		/*
+		 * Don't let another thread swap process p out while we are
+		 * busy swapping it in.
+		 */
+		++p->p_lock;
+		p->p_flag |= P_SWAPPINGIN;
+		PROC_UNLOCK(p);
+
+		/*
+		 * We hold no lock here because the list of threads
+		 * can not change while all threads in the process are
+		 * swapped out.
+		 */
+		FOREACH_THREAD_IN_PROC(p, td)
+			vm_thread_swapin(td);
+		PROC_LOCK(p);
+		swapclear(p);
+		p->p_swtick = ticks;
+
+		wakeup(&p->p_flag);
+
+		/* Allow other threads to swap p out now. */
+		--p->p_lock;
+	}
+}
+
+/*
+ * This swapin algorithm attempts to swap-in processes only if there
+ * is enough space for them.  Of course, if a process waits for a long
+ * time, it will be swapped in anyway.
+ */
+void
+swapper(void)
+{
+	struct proc *p;
+	struct thread *td;
+	struct proc *pp;
+	int slptime;
+	int swtime;
+	int ppri;
+	int pri;
+
+loop:
+	if (vm_page_count_min()) {
+		VM_WAIT;
+		goto loop;
+	}
+
+	pp = NULL;
+	ppri = INT_MIN;
+	sx_slock(&allproc_lock);
+	FOREACH_PROC_IN_SYSTEM(p) {
+		PROC_LOCK(p);
+		if (p->p_state == PRS_NEW ||
+		    p->p_flag & (P_SWAPPINGOUT | P_SWAPPINGIN | P_INMEM)) {
+			PROC_UNLOCK(p);
+			continue;
+		}
+		swtime = (ticks - p->p_swtick) / hz;
+		FOREACH_THREAD_IN_PROC(p, td) {
+			/*
+			 * An otherwise runnable thread of a process
+			 * swapped out has only the TDI_SWAPPED bit set.
+			 */
+			thread_lock(td);
+			if (td->td_inhibitors == TDI_SWAPPED) {
+				slptime = (ticks - td->td_slptick) / hz;
+				pri = swtime + slptime;
+				if ((td->td_flags & TDF_SWAPINREQ) == 0)
+					pri -= p->p_nice * 8;
+				/*
+				 * if this thread is higher priority
+				 * and there is enough space, then select
+				 * this process instead of the previous
+				 * selection.
+				 */
+				if (pri > ppri) {
+					pp = p;
+					ppri = pri;
+				}
+			}
+			thread_unlock(td);
+		}
+		PROC_UNLOCK(p);
+	}
+	sx_sunlock(&allproc_lock);
+
+	/*
+	 * Nothing to do, back to sleep.
+	 */
+	if ((p = pp) == NULL) {
+		tsleep(&proc0, PVM, "swapin", MAXSLP * hz / 2);
+		goto loop;
+	}
+	PROC_LOCK(p);
+
+	/*
+	 * Another process may be bringing or may have already
+	 * brought this process in while we traverse all threads.
+	 * Or, this process may even be being swapped out again.
+	 */
+	if (p->p_flag & (P_INMEM | P_SWAPPINGOUT | P_SWAPPINGIN)) {
+		PROC_UNLOCK(p);
+		goto loop;
+	}
+
+	/*
+	 * We would like to bring someone in. (only if there is space).
+	 * [What checks the space? ]
+	 */
+	faultin(p);
+	PROC_UNLOCK(p);
+	goto loop;
+}
+
+/*
+ * First, if any processes have been sleeping or stopped for at least
+ * "swap_idle_threshold1" seconds, they are swapped out.  If, however,
+ * no such processes exist, then the longest-sleeping or stopped
+ * process is swapped out.  Finally, and only as a last resort, if
+ * there are no sleeping or stopped processes, the longest-resident
+ * process is swapped out.
+ */
+static void
+swapout_procs(int action)
+{
+	struct proc *p;
+	struct thread *td;
+	int didswap = 0;
+
+retry:
+	sx_slock(&allproc_lock);
+	FOREACH_PROC_IN_SYSTEM(p) {
+		struct vmspace *vm;
+		int minslptime = 100000;
+		int slptime;
+
+		PROC_LOCK(p);
+		/*
+		 * Watch out for a process in
+		 * creation.  It may have no
+		 * address space or lock yet.
+		 */
+		if (p->p_state == PRS_NEW) {
+			PROC_UNLOCK(p);
+			continue;
+		}
+		/*
+		 * An aio daemon switches its
+		 * address space while running.
+		 * Perform a quick check whether
+		 * a process has P_SYSTEM.
+		 * Filter out exiting processes.
+		 */
+		if ((p->p_flag & (P_SYSTEM | P_WEXIT)) != 0) {
+			PROC_UNLOCK(p);
+			continue;
+		}
+		_PHOLD_LITE(p);
+		PROC_UNLOCK(p);
+		sx_sunlock(&allproc_lock);
+
+		/*
+		 * Do not swapout a process that
+		 * is waiting for VM data
+		 * structures as there is a possible
+		 * deadlock.  Test this first as
+		 * this may block.
+		 *
+		 * Lock the map until swapout
+		 * finishes, or a thread of this
+		 * process may attempt to alter
+		 * the map.
+		 */
+		vm = vmspace_acquire_ref(p);
+		if (vm == NULL)
+			goto nextproc2;
+		if (!vm_map_trylock(&vm->vm_map))
+			goto nextproc1;
+
+		PROC_LOCK(p);
+		if (p->p_lock != 1 || (p->p_flag & (P_STOPPED_SINGLE |
+		    P_TRACED | P_SYSTEM)) != 0)
+			goto nextproc;
+
+		/*
+		 * only aiod changes vmspace, however it will be
+		 * skipped because of the if statement above checking 
+		 * for P_SYSTEM
+		 */
+		if ((p->p_flag & (P_INMEM|P_SWAPPINGOUT|P_SWAPPINGIN)) != P_INMEM)
+			goto nextproc;
+
+		switch (p->p_state) {
+		default:
+			/* Don't swap out processes in any sort
+			 * of 'special' state. */
+			break;
+
+		case PRS_NORMAL:
+			/*
+			 * do not swapout a realtime process
+			 * Check all the thread groups..
+			 */
+			FOREACH_THREAD_IN_PROC(p, td) {
+				thread_lock(td);
+				if (PRI_IS_REALTIME(td->td_pri_class)) {
+					thread_unlock(td);
+					goto nextproc;
+				}
+				slptime = (ticks - td->td_slptick) / hz;
+				/*
+				 * Guarantee swap_idle_threshold1
+				 * time in memory.
+				 */
+				if (slptime < swap_idle_threshold1) {
+					thread_unlock(td);
+					goto nextproc;
+				}
+
+				/*
+				 * Do not swapout a process if it is
+				 * waiting on a critical event of some
+				 * kind or there is a thread whose
+				 * pageable memory may be accessed.
+				 *
+				 * This could be refined to support
+				 * swapping out a thread.
+				 */
+				if (!thread_safetoswapout(td)) {
+					thread_unlock(td);
+					goto nextproc;
+				}
+				/*
+				 * If the system is under memory stress,
+				 * or if we are swapping
+				 * idle processes >= swap_idle_threshold2,
+				 * then swap the process out.
+				 */
+				if (((action & VM_SWAP_NORMAL) == 0) &&
+				    (((action & VM_SWAP_IDLE) == 0) ||
+				    (slptime < swap_idle_threshold2))) {
+					thread_unlock(td);
+					goto nextproc;
+				}
+
+				if (minslptime > slptime)
+					minslptime = slptime;
+				thread_unlock(td);
+			}
+
+			/*
+			 * If the pageout daemon didn't free enough pages,
+			 * or if this process is idle and the system is
+			 * configured to swap proactively, swap it out.
+			 */
+			if ((action & VM_SWAP_NORMAL) ||
+				((action & VM_SWAP_IDLE) &&
+				 (minslptime > swap_idle_threshold2))) {
+				_PRELE(p);
+				if (swapout(p) == 0)
+					didswap++;
+				PROC_UNLOCK(p);
+				vm_map_unlock(&vm->vm_map);
+				vmspace_free(vm);
+				goto retry;
+			}
+		}
+nextproc:
+		PROC_UNLOCK(p);
+		vm_map_unlock(&vm->vm_map);
+nextproc1:
+		vmspace_free(vm);
+nextproc2:
+		sx_slock(&allproc_lock);
+		PRELE(p);
+	}
+	sx_sunlock(&allproc_lock);
+	/*
+	 * If we swapped something out, and another process needed memory,
+	 * then wakeup the sched process.
+	 */
+	if (didswap)
+		wakeup(&proc0);
+}
+
+static void
+swapclear(struct proc *p)
+{
+	struct thread *td;
+
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+
+	FOREACH_THREAD_IN_PROC(p, td) {
+		thread_lock(td);
+		td->td_flags |= TDF_INMEM;
+		td->td_flags &= ~TDF_SWAPINREQ;
+		TD_CLR_SWAPPED(td);
+		if (TD_CAN_RUN(td))
+			if (setrunnable(td)) {
+#ifdef INVARIANTS
+				/*
+				 * XXX: We just cleared TDI_SWAPPED
+				 * above and set TDF_INMEM, so this
+				 * should never happen.
+				 */
+				panic("not waking up swapper");
+#endif
+			}
+		thread_unlock(td);
+	}
+	p->p_flag &= ~(P_SWAPPINGIN | P_SWAPPINGOUT);
+	p->p_flag |= P_INMEM;
+}
+
+static int
+swapout(struct proc *p)
+{
+	struct thread *td;
+
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+
+	/*
+	 * The states of this process and its threads may have changed
+	 * by now.  Assuming that there is only one pageout daemon thread,
+	 * this process should still be in memory.
+	 */
+	KASSERT((p->p_flag & (P_INMEM | P_SWAPPINGOUT | P_SWAPPINGIN)) ==
+	    P_INMEM, ("swapout: lost a swapout race?"));
+
+	/*
+	 * remember the process resident count
+	 */
+	p->p_vmspace->vm_swrss = vmspace_resident_count(p->p_vmspace);
+	/*
+	 * Check and mark all threads before we proceed.
+	 */
+	p->p_flag &= ~P_INMEM;
+	p->p_flag |= P_SWAPPINGOUT;
+	FOREACH_THREAD_IN_PROC(p, td) {
+		thread_lock(td);
+		if (!thread_safetoswapout(td)) {
+			thread_unlock(td);
+			swapclear(p);
+			return (EBUSY);
+		}
+		td->td_flags &= ~TDF_INMEM;
+		TD_SET_SWAPPED(td);
+		thread_unlock(td);
+	}
+	td = FIRST_THREAD_IN_PROC(p);
+	++td->td_ru.ru_nswap;
+	PROC_UNLOCK(p);
+
+	/*
+	 * This list is stable because all threads are now prevented from
+	 * running.  The list is only modified in the context of a running
+	 * thread in this process.
+	 */
+	FOREACH_THREAD_IN_PROC(p, td)
+		vm_thread_swapout(td);
+
+	PROC_LOCK(p);
+	p->p_flag &= ~P_SWAPPINGOUT;
+	p->p_swtick = ticks;
+	return (0);
+}
diff --git a/sys/vm/vm_swapout_dummy.c b/sys/vm/vm_swapout_dummy.c
new file mode 100644
index 0000000..9cdfcb8
--- /dev/null
+++ b/sys/vm/vm_swapout_dummy.c
@@ -0,0 +1,122 @@
+/*-
+ * Copyright (c) 1991 Regents of the University of California.
+ * All rights reserved.
+ * Copyright (c) 1994 John S. Dyson
+ * All rights reserved.
+ * Copyright (c) 1994 David Greenman
+ * All rights reserved.
+ * Copyright (c) 2005 Yahoo! Technologies Norway AS
+ * All rights reserved.
+ *
+ * This code is derived from software contributed to Berkeley by
+ * The Mach Operating System project at Carnegie-Mellon University.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ *    notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ *    notice, this list of conditions and the following disclaimer in the
+ *    documentation and/or other materials provided with the distribution.
+ * 3. All advertising materials mentioning features or use of this software
+ *    must display the following acknowledgement:
+ *	This product includes software developed by the University of
+ *	California, Berkeley and its contributors.
+ * 4. Neither the name of the University nor the names of its contributors
+ *    may be used to endorse or promote products derived from this software
+ *    without specific prior written permission.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED.  IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ *	from: @(#)vm_pageout.c	7.4 (Berkeley) 5/7/91
+ *
+ *
+ * Copyright (c) 1987, 1990 Carnegie-Mellon University.
+ * All rights reserved.
+ *
+ * Authors: Avadis Tevanian, Jr., Michael Wayne Young
+ *
+ * Permission to use, copy, modify and distribute this software and
+ * its documentation is hereby granted, provided that both the copyright
+ * notice and this permission notice appear in all copies of the
+ * software, derivative works or modified versions, and any portions
+ * thereof, and that both notices appear in supporting documentation.
+ *
+ * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS"
+ * CONDITION.  CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND
+ * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE.
+ *
+ * Carnegie Mellon requests users of this software to return to
+ *
+ *  Software Distribution Coordinator  or  Software.Distribution@CS.CMU.EDU
+ *  School of Computer Science
+ *  Carnegie Mellon University
+ *  Pittsburgh PA 15213-3890
+ *
+ * any improvements or extensions that they make and grant Carnegie the
+ * rights to redistribute these changes.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/systm.h>
+#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/proc.h>
+#include <sys/sysctl.h>
+#include <sys/vmmeter.h>
+
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/vm_pageout.h>
+
+static int vm_swap_enabled = 0;
+SYSCTL_INT(_vm, VM_SWAPPING_ENABLED, swap_enabled, CTLFLAG_RD,
+    &vm_swap_enabled, 0,
+    "Enable entire process swapout");
+
+static int vm_swap_idle_enabled = 0;
+SYSCTL_INT(_vm, OID_AUTO, swap_idle_enabled, CTLFLAG_RD,
+    &vm_swap_idle_enabled, 0,
+    "Allow swapout on idle criteria");
+
+void
+vm_swapout_run(void)
+{
+}
+
+void
+vm_swapout_run_idle(void)
+{
+}
+
+void
+faultin(struct proc *p)
+{
+
+	PROC_LOCK_ASSERT(p, MA_OWNED);
+	if ((p->p_flag & P_INMEM) == 0)
+		panic("faultin: proc %p swapped out with NO_SWAPPING", p);
+}
+
+void
+swapper(void)
+{
+
+	for (;;)
+		tsleep(&proc0, PVM, "swapin", MAXSLP * hz);
+}
diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c
index 4c0dae9..30b02c4 100644
--- a/sys/vm/vnode_pager.c
+++ b/sys/vm/vnode_pager.c
@@ -1138,6 +1138,23 @@ vnode_pager_putpages(vm_object_t object, vm_page_t *m, int count,
 	VM_OBJECT_WLOCK(object);
 }
 
+static int
+vn_off2bidx(vm_ooffset_t offset)
+{
+
+	return ((offset & PAGE_MASK) / DEV_BSIZE);
+}
+
+static bool
+vn_dirty_blk(vm_page_t m, vm_ooffset_t offset)
+{
+
+	KASSERT(IDX_TO_OFF(m->pindex) <= offset &&
+	    offset < IDX_TO_OFF(m->pindex + 1),
+	    ("page %p pidx %ju offset %ju", m, (uintmax_t)m->pindex,
+	    (uintmax_t)offset));
+	return ((m->dirty & ((vm_page_bits_t)1 << vn_off2bidx(offset))) != 0);
+}
 
 /*
  * This is now called from local media FS's to operate against their
@@ -1154,10 +1171,12 @@ vnode_pager_generic_putpages(struct vnode *vp, vm_page_t *ma, int bytecount,
 {
 	vm_object_t object;
 	vm_page_t m;
-	vm_ooffset_t poffset;
+	vm_ooffset_t maxblksz, next_offset, poffset, prev_offset;
 	struct uio auio;
 	struct iovec aiov;
+	off_t prev_resid, wrsz;
 	int count, error, i, maxsize, ncount, pgoff, ppscheck;
+	bool in_hole;
 	static struct timeval lastfail;
 	static int curfail;
 
@@ -1192,8 +1211,14 @@ vnode_pager_generic_putpages(struct vnode *vp, vm_page_t *ma, int bytecount,
 	 * We do not under any circumstances truncate the valid bits, as
 	 * this will screw up bogus page replacement.
 	 */
-	VM_OBJECT_WLOCK(object);
+	VM_OBJECT_RLOCK(object);
 	if (maxsize + poffset > object->un_pager.vnp.vnp_size) {
+		if (!VM_OBJECT_TRYUPGRADE(object)) {
+			VM_OBJECT_RUNLOCK(object);
+			VM_OBJECT_WLOCK(object);
+			if (maxsize + poffset <= object->un_pager.vnp.vnp_size)
+				goto downgrade;
+		}
 		if (object->un_pager.vnp.vnp_size > poffset) {
 			maxsize = object->un_pager.vnp.vnp_size - poffset;
 			ncount = btoc(maxsize);
@@ -1218,35 +1243,105 @@ vnode_pager_generic_putpages(struct vnode *vp, vm_page_t *ma, int bytecount,
 		}
 		for (i = ncount; i < count; i++)
 			rtvals[i] = VM_PAGER_BAD;
+downgrade:
+		VM_OBJECT_LOCK_DOWNGRADE(object);
 	}
-	for (i = 0; i < ncount - ((btoc(maxsize) & PAGE_MASK) != 0); i++)
-		MPASS(ma[i]->dirty == VM_PAGE_BITS_ALL);
-	VM_OBJECT_WUNLOCK(object);
 
-	aiov.iov_base = NULL;
-	aiov.iov_len = maxsize;
 	auio.uio_iov = &aiov;
-	auio.uio_iovcnt = 1;
-	auio.uio_offset = poffset;
 	auio.uio_segflg = UIO_NOCOPY;
 	auio.uio_rw = UIO_WRITE;
-	auio.uio_resid = maxsize;
 	auio.uio_td = NULL;
-	error = VOP_WRITE(vp, &auio, vnode_pager_putpages_ioflags(flags),
-	    curthread->td_ucred);
-	PCPU_INC(cnt.v_vnodeout);
-	PCPU_ADD(cnt.v_vnodepgsout, ncount);
-
-	ppscheck = 0;
-	if (error != 0 && (ppscheck = ppsratecheck(&lastfail, &curfail, 1))
-	    != 0)
-		printf("vnode_pager_putpages: I/O error %d\n", error);
-	if (auio.uio_resid != 0 && (ppscheck != 0 ||
-	    ppsratecheck(&lastfail, &curfail, 1) != 0))
-		printf("vnode_pager_putpages: residual I/O %zd at %ju\n",
-		    auio.uio_resid, (uintmax_t)ma[0]->pindex);
-	for (i = 0; i < ncount; i++)
+	maxblksz = roundup2(poffset + maxsize, DEV_BSIZE);
+
+	for (prev_offset = poffset; prev_offset < maxblksz;) {
+		/* Skip clean blocks. */
+		for (in_hole = true; in_hole && prev_offset < maxblksz;) {
+			m = ma[OFF_TO_IDX(prev_offset - poffset)];
+			for (i = vn_off2bidx(prev_offset);
+			    i < sizeof(vm_page_bits_t) * NBBY &&
+			    prev_offset < maxblksz; i++) {
+				if (vn_dirty_blk(m, prev_offset)) {
+					in_hole = false;
+					break;
+				}
+				prev_offset += DEV_BSIZE;
+			}
+		}
+		if (in_hole)
+			goto write_done;
+
+		/* Find longest run of dirty blocks. */
+		for (next_offset = prev_offset; next_offset < maxblksz;) {
+			m = ma[OFF_TO_IDX(next_offset - poffset)];
+			for (i = vn_off2bidx(next_offset);
+			    i < sizeof(vm_page_bits_t) * NBBY &&
+			    next_offset < maxblksz; i++) {
+				if (!vn_dirty_blk(m, next_offset))
+					goto start_write;
+				next_offset += DEV_BSIZE;
+			}
+		}
+start_write:
+		if (next_offset > poffset + maxsize)
+			next_offset = poffset + maxsize;
+
+		/*
+		 * Getting here requires finding a dirty block in the
+		 * 'skip clean blocks' loop.
+		 */
+		MPASS(prev_offset < next_offset);
+
+		VM_OBJECT_RUNLOCK(object);
+		aiov.iov_base = NULL;
+		auio.uio_iovcnt = 1;
+		auio.uio_offset = prev_offset;
+		prev_resid = auio.uio_resid = aiov.iov_len = next_offset -
+		    prev_offset;
+		error = VOP_WRITE(vp, &auio,
+		    vnode_pager_putpages_ioflags(flags), curthread->td_ucred);
+
+		wrsz = prev_resid - auio.uio_resid;
+		if (wrsz == 0) {
+			if (ppsratecheck(&lastfail, &curfail, 1) != 0) {
+				vn_printf(vp, "vnode_pager_putpages: "
+				    "zero-length write at %ju resid %zd\n",
+				    auio.uio_offset, auio.uio_resid);
+			}
+			VM_OBJECT_RLOCK(object);
+			break;
+		}
+
+		/* Adjust the starting offset for next iteration. */
+		prev_offset += wrsz;
+		MPASS(auio.uio_offset == prev_offset);
+
+		ppscheck = 0;
+		if (error != 0 && (ppscheck = ppsratecheck(&lastfail,
+		    &curfail, 1)) != 0)
+			vn_printf(vp, "vnode_pager_putpages: I/O error %d\n",
+			    error);
+		if (auio.uio_resid != 0 && (ppscheck != 0 ||
+		    ppsratecheck(&lastfail, &curfail, 1) != 0))
+			vn_printf(vp, "vnode_pager_putpages: residual I/O %zd "
+			    "at %ju\n", auio.uio_resid,
+			    (uintmax_t)ma[0]->pindex);
+		VM_OBJECT_RLOCK(object);
+		if (error != 0 || auio.uio_resid != 0)
+			break;
+	}
+write_done:
+	/* Mark completely processed pages. */
+	for (i = 0; i < OFF_TO_IDX(prev_offset - poffset); i++)
 		rtvals[i] = VM_PAGER_OK;
+	/* Mark partial EOF page. */
+	if (prev_offset == poffset + maxsize && (prev_offset & PAGE_MASK) != 0)
+		rtvals[i++] = VM_PAGER_OK;
+	/* Unwritten pages in range, free bonus if the page is clean. */
+	for (; i < ncount; i++)
+		rtvals[i] = ma[i]->dirty == 0 ? VM_PAGER_OK : VM_PAGER_ERROR;
+	VM_OBJECT_RUNLOCK(object);
+	PCPU_ADD(cnt.v_vnodepgsout, i);
+	PCPU_INC(cnt.v_vnodeout);
 	return (rtvals[0]);
 }