Enable the new physical memory allocator.

This allocator uses a binary buddy system with a twist. First and foremost, this allocator is required to support the implementation of superpages. As a side effect, it enables a more robust implementation of contigmalloc(9). Moreover, this reimplementation of contigmalloc(9) eliminates the acquisition of Giant by contigmalloc(..., M_NOWAIT, ...). The twist is that this allocator tries to reduce the number of TLB misses incurred by accesses through a direct map to small, UMA-managed objects and page table pages. Roughly speaking, the physical pages that are allocated for such purposes are clustered together in the physical address space. The performance benefits vary. In the most extreme case, a uniprocessor kernel running on an Opteron, I measured an 18% reduction in system time during a buildworld. This allocator does not implement page coloring. The reason is that superpages have much the same effect. The contiguous physical memory allocation necessary for a superpage is inherently colored. Finally, the one caveat is that this allocator does not effectively support prezeroed pages. I hope this is temporary. On i386, this is a slight pessimization. However, on amd64, the beneficial effects of the direct-map optimization outweigh the ill effects. I speculate that this is true in general of machines with a direct map. Approved by: re
author: alc <alc@FreeBSD.org> 2007-06-16 04:57:06 +0000
committer: alc <alc@FreeBSD.org> 2007-06-16 04:57:06 +0000
commit: a8415c5a0d7d39fe501a93e2f31b2532cf6dfd91 (patch)
tree: d4adea7a0cd4062bbcbfbb6bc1a5a30eecf59d97 /sys
parent: 98cd3abe8b02f0ae3d389eb9e6ecbf4bf39032f6 (diff)
download: FreeBSD-src-a8415c5a0d7d39fe501a93e2f31b2532cf6dfd91.zip
FreeBSD-src-a8415c5a0d7d39fe501a93e2f31b2532cf6dfd91.tar.gz
12 files changed, 142 insertions, 734 deletions
diff --git a/sys/conf/NOTES b/sys/conf/NOTES
index e2494b6..5c58f76 100644
--- a/sys/conf/NOTES
+++ b/sys/conf/NOTES
@@ -125,10 +125,6 @@ options 	DFLTPHYS=(64*1024)
 options 	MAXPHYS=(128*1024)
 
 
-# Options for the VM subsystem
-# Deprecated options supported for backwards compatibility
-#options 	PQ_NOOPT		# No coloring
-
 # This allows you to actually store this configuration file into
 # the kernel binary itself, where it may be later read by saying:
 #    strings -n 3 /boot/kernel/kernel | sed -n 's/^___//p' > MYKERNEL
diff --git a/sys/conf/files b/sys/conf/files
index a5fafa5..4dd1bf0 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -2078,6 +2078,7 @@ vm/vm_page.c			standard
 vm/vm_pageout.c			standard
 vm/vm_pageq.c			standard
 vm/vm_pager.c			standard
+vm/vm_phys.c			standard
 vm/vm_unix.c			standard
 vm/vm_zeroidle.c		standard
 vm/vnode_pager.c		standard
diff --git a/sys/conf/options b/sys/conf/options
index d225037..9b376b2 100644
--- a/sys/conf/options
+++ b/sys/conf/options
@@ -555,7 +555,6 @@ VM_KMEM_SIZE_MAX	opt_vm.h
 NO_SWAPPING		opt_vm.h
 MALLOC_MAKE_FAILURES	opt_vm.h
 MALLOC_PROFILE		opt_vm.h
-PQ_NOOPT		opt_vmpage.h
 
 # The MemGuard replacement allocator used for tamper-after-free detection
 DEBUG_MEMGUARD		opt_vm.h
diff --git a/sys/powerpc/include/vmparam.h b/sys/powerpc/include/vmparam.h
index 01a29b2..60e240c 100644
--- a/sys/powerpc/include/vmparam.h
+++ b/sys/powerpc/include/vmparam.h
@@ -109,9 +109,27 @@ struct pmap_physseg {
  */
 #define	VM_PHYSSEG_DENSE
 
+/*
+ * Create two free page pools: VM_FREEPOOL_DEFAULT is the default pool
+ * from which physical pages are allocated and VM_FREEPOOL_DIRECT is
+ * the pool from which physical pages for small UMA objects are
+ * allocated.
+ */
+#define	VM_NFREEPOOL		2
+#define	VM_FREEPOOL_DEFAULT	0
+#define	VM_FREEPOOL_DIRECT	1
+
+/*
+ * Create one free page list.
+ */
 #define	VM_NFREELIST		1
 #define	VM_FREELIST_DEFAULT	0
 
+/*
+ * The largest allocation size is 4MB.
+ */
+#define	VM_NFREEORDER		11
+
 #ifndef VM_INITIAL_PAGEIN
 #define	VM_INITIAL_PAGEIN	16
 #endif
diff --git a/sys/sun4v/sun4v/pmap.c b/sys/sun4v/sun4v/pmap.c
index f2fa3ea..917eb1a 100644
--- a/sys/sun4v/sun4v/pmap.c
+++ b/sys/sun4v/sun4v/pmap.c
@@ -58,6 +58,7 @@ __FBSDID("$FreeBSD$");
 #include <vm/vm_extern.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
+#include <vm/vm_phys.h>
 #include <vm/uma.h>
 
 #include <machine/cpu.h>
@@ -1286,13 +1287,13 @@ pmap_alloc_zeroed_contig_pages(int npages, uint64_t alignment)
 	m = NULL;
 	while (m == NULL) {	
 		for (i = 0; phys_avail[i + 1] != 0; i += 2) {
-			m = vm_page_alloc_contig(npages, phys_avail[i], 
+			m = vm_phys_alloc_contig(npages, phys_avail[i], 
 						 phys_avail[i + 1], alignment, (1UL<<34));
 			if (m)
 				goto found;
 		}
 		if (m == NULL) {
-			printf("vm_page_alloc_contig failed - waiting to retry\n");
+			printf("vm_phys_alloc_contig failed - waiting to retry\n");
 			VM_WAIT;
 		}
 	}
diff --git a/sys/vm/vm_contig.c b/sys/vm/vm_contig.c
index bd9233b..955df30 100644
--- a/sys/vm/vm_contig.c
+++ b/sys/vm/vm_contig.c
@@ -84,6 +84,7 @@ __FBSDID("$FreeBSD$");
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
+#include <vm/vm_phys.h>
 #include <vm/vm_extern.h>
 
 static int
@@ -165,191 +166,6 @@ vm_contig_launder(int queue)
 	return (FALSE);
 }
 
-/*
- * This interface is for merging with malloc() someday.
- * Even if we never implement compaction so that contiguous allocation
- * works after initialization time, malloc()'s data structures are good
- * for statistics and for allocations of less than a page.
- */
-static void *
-contigmalloc1(
-	unsigned long size,	/* should be size_t here and for malloc() */
-	struct malloc_type *type,
-	int flags,
-	vm_paddr_t low,
-	vm_paddr_t high,
-	unsigned long alignment,
-	unsigned long boundary,
-	vm_map_t map)
-{
-	int i, start;
-	vm_paddr_t phys;
-	vm_object_t object;
-	vm_offset_t addr, tmp_addr;
-	int pass, pqtype;
-	int inactl, actl, inactmax, actmax;
-	vm_page_t pga = vm_page_array;
-
-	size = round_page(size);
-	if (size == 0)
-		panic("contigmalloc1: size must not be 0");
-	if ((alignment & (alignment - 1)) != 0)
-		panic("contigmalloc1: alignment must be a power of 2");
-	if ((boundary & (boundary - 1)) != 0)
-		panic("contigmalloc1: boundary must be a power of 2");
-
-	start = 0;
-	for (pass = 2; pass >= 0; pass--) {
-		vm_page_lock_queues();
-again0:
-		mtx_lock(&vm_page_queue_free_mtx);
-again:
-		/*
-		 * Find first page in array that is free, within range,
-		 * aligned, and such that the boundary won't be crossed.
-		 */
-		for (i = start; i < cnt.v_page_count; i++) {
-			phys = VM_PAGE_TO_PHYS(&pga[i]);
-			pqtype = pga[i].queue - pga[i].pc;
-			if (((pqtype == PQ_FREE) || (pqtype == PQ_CACHE)) &&
-			    (phys >= low) && (phys < high) &&
-			    ((phys & (alignment - 1)) == 0) &&
-			    (((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0))
-				break;
-		}
-
-		/*
-		 * If the above failed or we will exceed the upper bound, fail.
-		 */
-		if ((i == cnt.v_page_count) ||
-			((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) {
-			mtx_unlock(&vm_page_queue_free_mtx);
-			/*
-			 * Instead of racing to empty the inactive/active
-			 * queues, give up, even with more left to free,
-			 * if we try more than the initial amount of pages.
-			 *
-			 * There's no point attempting this on the last pass.
-			 */
-			if (pass > 0) {
-				inactl = actl = 0;
-				inactmax = vm_page_queues[PQ_INACTIVE].lcnt;
-				actmax = vm_page_queues[PQ_ACTIVE].lcnt;
-again1:
-				if (inactl < inactmax &&
-				    vm_contig_launder(PQ_INACTIVE)) {
-					inactl++;
-					goto again1;
-				}
-				if (actl < actmax &&
-				    vm_contig_launder(PQ_ACTIVE)) {
-					actl++;
-					goto again1;
-				}
-			}
-			vm_page_unlock_queues();
-			continue;
-		}
-		start = i;
-
-		/*
-		 * Check successive pages for contiguous and free.
-		 */
-		for (i = start + 1; i < (start + size / PAGE_SIZE); i++) {
-			pqtype = pga[i].queue - pga[i].pc;
-			if ((VM_PAGE_TO_PHYS(&pga[i]) !=
-			    (VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE)) ||
-			    ((pqtype != PQ_FREE) && (pqtype != PQ_CACHE))) {
-				start++;
-				goto again;
-			}
-		}
-		mtx_unlock(&vm_page_queue_free_mtx);
-		for (i = start; i < (start + size / PAGE_SIZE); i++) {
-			vm_page_t m = &pga[i];
-
-			if (VM_PAGE_INQUEUE1(m, PQ_CACHE)) {
-				if (m->hold_count != 0) {
-					start++;
-					goto again0;
-				}
-				object = m->object;
-				if (!VM_OBJECT_TRYLOCK(object)) {
-					start++;
-					goto again0;
-				}
-				if ((m->oflags & VPO_BUSY) || m->busy != 0) {
-					VM_OBJECT_UNLOCK(object);
-					start++;
-					goto again0;
-				}
-				vm_page_free(m);
-				VM_OBJECT_UNLOCK(object);
-			}
-		}
-		mtx_lock(&vm_page_queue_free_mtx);
-		for (i = start; i < (start + size / PAGE_SIZE); i++) {
-			pqtype = pga[i].queue - pga[i].pc;
-			if (pqtype != PQ_FREE) {
-				start++;
-				goto again;
-			}
-		}
-		for (i = start; i < (start + size / PAGE_SIZE); i++) {
-			vm_page_t m = &pga[i];
-			vm_pageq_remove_nowakeup(m);
-			m->valid = VM_PAGE_BITS_ALL;
-			if (m->flags & PG_ZERO)
-				vm_page_zero_count--;
-			/* Don't clear the PG_ZERO flag, we'll need it later. */
-			m->flags = PG_UNMANAGED | (m->flags & PG_ZERO);
-			KASSERT(m->dirty == 0,
-			    ("contigmalloc1: page %p was dirty", m));
-			m->wire_count = 0;
-			m->busy = 0;
-		}
-		mtx_unlock(&vm_page_queue_free_mtx);
-		vm_page_unlock_queues();
-		/*
-		 * We've found a contiguous chunk that meets are requirements.
-		 * Allocate kernel VM, unfree and assign the physical pages to
-		 * it and return kernel VM pointer.
-		 */
-		vm_map_lock(map);
-		if (vm_map_findspace(map, vm_map_min(map), size, &addr) !=
-		    KERN_SUCCESS) {
-			/*
-			 * XXX We almost never run out of kernel virtual
-			 * space, so we don't make the allocated memory
-			 * above available.
-			 */
-			vm_map_unlock(map);
-			return (NULL);
-		}
-		vm_object_reference(kernel_object);
-		vm_map_insert(map, kernel_object, addr - VM_MIN_KERNEL_ADDRESS,
-		    addr, addr + size, VM_PROT_ALL, VM_PROT_ALL, 0);
-		vm_map_unlock(map);
-
-		tmp_addr = addr;
-		VM_OBJECT_LOCK(kernel_object);
-		for (i = start; i < (start + size / PAGE_SIZE); i++) {
-			vm_page_t m = &pga[i];
-			vm_page_insert(m, kernel_object,
-				OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS));
-			if ((flags & M_ZERO) && !(m->flags & PG_ZERO))
-				pmap_zero_page(m);
-			tmp_addr += PAGE_SIZE;
-		}
-		VM_OBJECT_UNLOCK(kernel_object);
-		vm_map_wire(map, addr, addr + size,
-		    VM_MAP_WIRE_SYSTEM|VM_MAP_WIRE_NOHOLES);
-
-		return ((void *)addr);
-	}
-	return (NULL);
-}
-
 static void
 vm_page_release_contigl(vm_page_t m, vm_pindex_t count)
 {
@@ -367,173 +183,6 @@ vm_page_release_contig(vm_page_t m, vm_pindex_t count)
 	vm_page_unlock_queues();
 }
 
-static int
-vm_contig_unqueue_free(vm_page_t m)
-{
-	int error = 0;
-
-	mtx_lock(&vm_page_queue_free_mtx);
-	if ((m->queue - m->pc) == PQ_FREE)
-		vm_pageq_remove_nowakeup(m);
-	else
-		error = EAGAIN;
-	mtx_unlock(&vm_page_queue_free_mtx);
-	if (error)
-		return (error);
-	m->valid = VM_PAGE_BITS_ALL;
-	if (m->flags & PG_ZERO)
-		vm_page_zero_count--;
-	/* Don't clear the PG_ZERO flag; we'll need it later. */
-	m->flags = PG_UNMANAGED | (m->flags & PG_ZERO);
-	m->oflags = 0;
-	KASSERT(m->dirty == 0,
-	    ("contigmalloc2: page %p was dirty", m));
-	m->wire_count = 0;
-	m->busy = 0;
-	return (error);
-}
-
-vm_page_t
-vm_page_alloc_contig(vm_pindex_t npages, vm_paddr_t low, vm_paddr_t high,
-	    vm_offset_t alignment, vm_offset_t boundary)
-{
-	vm_object_t object;
-	vm_offset_t size;
-	vm_paddr_t phys;
-	vm_page_t pga = vm_page_array;
-	static vm_pindex_t np = 0;
-	static vm_pindex_t start = 0;
-	vm_pindex_t startl = 0;
- 	int i, pass, pqtype;
-
-	size = npages << PAGE_SHIFT;
-	if (size == 0)
-		panic("vm_page_alloc_contig: size must not be 0");
-	if ((alignment & (alignment - 1)) != 0)
-		panic("vm_page_alloc_contig: alignment must be a power of 2");
-	if ((boundary & (boundary - 1)) != 0)
-		panic("vm_page_alloc_contig: boundary must be a power of 2");
-
-	/*
-	 * Two simple optimizations.  First, don't scan high ordered pages
-	 * if they are outside of the requested address range.  Second, cache
-	 * the starting page index across calls and reuse it instead of
-	 * restarting the scan from the top.  This is conditional on the
-	 * requested number of pages being the same or greater than the
-	 * cached amount.
-	 */
-	for (pass = 0; pass < 2; pass++) {
-		vm_page_lock_queues();
-		if ((np == 0) || (np > npages)) {
-			if (atop(high) < vm_page_array_size)
-				start = atop(high) - npages + 1;
-			else
-				start = vm_page_array_size - npages + 1;
-		}
-		np = 0;
-retry:
-		start--;
-		/*
-		 * Find last page in array that is free, within range,
-		 * aligned, and such that the boundary won't be crossed.
-		 */
-		for (i = start; i >= 0; i--) {
-			phys = VM_PAGE_TO_PHYS(&pga[i]);
-			pqtype = pga[i].queue - pga[i].pc;
-			if (pass == 0) {
-				if (pqtype != PQ_FREE && pqtype != PQ_CACHE)
-					continue;
-			} else if (pqtype != PQ_FREE && pqtype != PQ_CACHE &&
-				    pga[i].queue != PQ_ACTIVE &&
-				    pga[i].queue != PQ_INACTIVE)
-				continue;
-			if (phys >= low && phys + size <= high &&
-			    ((phys & (alignment - 1)) == 0) &&
-			    ((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0)
-				break;
-		}
-		/* There are no candidates at all. */
-		if (i < 0) {
-			vm_page_unlock_queues();
-			continue;
-		}
-		start = i;
-		/*
-		 * Check successive pages for contiguous and free.
-		 */
-		for (i = start + npages - 1; i > start; i--) {
-			pqtype = pga[i].queue - pga[i].pc;
-			if (VM_PAGE_TO_PHYS(&pga[i]) !=
-			    VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE) {
-				start = i - npages + 1;
-				goto retry;
-			}
-			if (pass == 0) {
-				if (pqtype != PQ_FREE && pqtype != PQ_CACHE) {
-					start = i - npages + 1;
-					goto retry;
-				}
-			} else if (pqtype != PQ_FREE && pqtype != PQ_CACHE &&
-				    pga[i].queue != PQ_ACTIVE &&
-				    pga[i].queue != PQ_INACTIVE) {
-				start = i - npages + 1;
-				goto retry;
-			}
-		}
-		for (i = start + npages - 1; i >= start; i--) {
-			vm_page_t m = &pga[i];
-
-retry_page:
-			pqtype = m->queue - m->pc;
-			if (pass != 0 && pqtype != PQ_FREE &&
-			    pqtype != PQ_CACHE) {
-				if (m->queue == PQ_ACTIVE ||
-				    m->queue == PQ_INACTIVE) {
-					if (vm_contig_launder_page(m) != 0)
-						goto cleanup_freed;
-					pqtype = m->queue - m->pc;
-					if (pqtype != PQ_FREE &&
-					    pqtype != PQ_CACHE)
-						goto cleanup_freed;
-				} else {
-cleanup_freed:
-					vm_page_release_contigl(&pga[i + 1],
-					    start + npages - 1 - i);
-					start = i - npages + 1;
-					goto retry;
-				}
-			}
-			if (pqtype == PQ_CACHE) {
-				if (m->hold_count != 0)
-					goto cleanup_freed;
-				object = m->object;
-				if (!VM_OBJECT_TRYLOCK(object))
-					goto cleanup_freed;
-				if ((m->oflags & VPO_BUSY) || m->busy != 0) {
-					VM_OBJECT_UNLOCK(object);
-					goto cleanup_freed;
-				}
-				vm_page_free(m);
-				VM_OBJECT_UNLOCK(object);
-			}
-			/*
-			 * There is no good API for freeing a page
-			 * directly to PQ_NONE on our behalf, so spin.
-			 */
-			if (vm_contig_unqueue_free(m) != 0)
-				goto retry_page;
-		}
-		/*
-		 * We've found a contiguous chunk that meets are requirements.
-		 */
-		np = npages;
-		startl = start;
-		vm_page_unlock_queues();
-		return (&pga[startl]);
-	}
-	return (NULL);
-}
-
 static void *
 contigmalloc2(vm_page_t m, vm_pindex_t npages, int flags)
 {
@@ -571,11 +220,6 @@ contigmalloc2(vm_page_t m, vm_pindex_t npages, int flags)
 	return ((void *)addr);
 }
 
-static int vm_old_contigmalloc = 0;
-SYSCTL_INT(_vm, OID_AUTO, old_contigmalloc,
-    CTLFLAG_RW, &vm_old_contigmalloc, 0, "Use the old contigmalloc algorithm");
-TUNABLE_INT("vm.old_contigmalloc", &vm_old_contigmalloc);
-
 void *
 contigmalloc(
 	unsigned long size,	/* should be size_t here and for malloc() */
@@ -587,27 +231,51 @@ contigmalloc(
 	unsigned long boundary)
 {
 	void * ret;
-	vm_page_t pages;
-	vm_pindex_t npgs;
+	vm_object_t object;
+	vm_page_t m, m_next, pages;
+	unsigned long npgs;
+	int actl, actmax, inactl, inactmax, tries;
 
 	npgs = round_page(size) >> PAGE_SHIFT;
-	mtx_lock(&Giant);
-	if (vm_old_contigmalloc) {
-		ret = contigmalloc1(size, type, flags, low, high, alignment,
-		    boundary, kernel_map);
-	} else {
-		pages = vm_page_alloc_contig(npgs, low, high,
-		    alignment, boundary);
-		if (pages == NULL) {
-			ret = NULL;
-		} else {
-			ret = contigmalloc2(pages, npgs, flags);
-			if (ret == NULL)
-				vm_page_release_contig(pages, npgs);
+	tries = 0;
+retry:
+	pages = vm_phys_alloc_contig(npgs, low, high, alignment, boundary);
+	if (pages == NULL) {
+		if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) {
+			vm_page_lock_queues();
+			inactl = 0;
+			inactmax = tries < 1 ? 0 : cnt.v_inactive_count;
+			actl = 0;
+			actmax = tries < 2 ? 0 : cnt.v_active_count;
+again:
+			if (inactl < inactmax &&
+			    vm_contig_launder(PQ_INACTIVE)) {
+				inactl++;
+				goto again;
+			}
+			if (actl < actmax &&
+			    vm_contig_launder(PQ_ACTIVE)) {
+				actl++;
+				goto again;
+			}
+			TAILQ_FOREACH_SAFE(m, &vm_page_queues[PQ_CACHE].pl,
+			    pageq, m_next) {
+				if (m->hold_count == 0 &&
+				    VM_OBJECT_TRYLOCK(object = m->object)) {
+					vm_page_free(m);
+					VM_OBJECT_UNLOCK(object);
+				}
+			}
+			vm_page_unlock_queues();
+			tries++;
+			goto retry;
 		}
-		
+		ret = NULL;
+	} else {
+		ret = contigmalloc2(pages, npgs, flags);
+		if (ret == NULL)
+			vm_page_release_contig(pages, npgs);
 	}
-	mtx_unlock(&Giant);
 	malloc_type_allocated(type, ret == NULL ? 0 : npgs << PAGE_SHIFT);
 	return (ret);
 }
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
index dfcade1..4741638 100644
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@@ -154,15 +154,6 @@ static long object_bypasses;
 SYSCTL_LONG(_vm_stats_object, OID_AUTO, bypasses, CTLFLAG_RD,
     &object_bypasses, 0, "VM object bypasses");
 
-/*
- * next_index determines the page color that is assigned to the next
- * allocated object.  Accesses to next_index are not synchronized
- * because the effects of two or more object allocations using
- * next_index simultaneously are inconsequential.  At any given time,
- * numerous objects have the same page color.
- */
-static int next_index;
-
 static uma_zone_t obj_zone;
 
 static int vm_object_zinit(void *mem, int size, int flags);
@@ -210,7 +201,6 @@ vm_object_zinit(void *mem, int size, int flags)
 void
 _vm_object_allocate(objtype_t type, vm_pindex_t size, vm_object_t object)
 {
-	int incr;
 
 	TAILQ_INIT(&object->memq);
 	LIST_INIT(&object->shadow_head);
@@ -223,11 +213,7 @@ _vm_object_allocate(objtype_t type, vm_pindex_t size, vm_object_t object)
 	object->flags = 0;
 	if ((object->type == OBJT_DEFAULT) || (object->type == OBJT_SWAP))
 		object->flags = OBJ_ONEMAPPING;
-	incr = PQ_MAXLENGTH;
-	if (size <= incr)
-		incr = size;
-	object->pg_color = next_index;
-	next_index = (object->pg_color + incr) & PQ_COLORMASK;
+	object->pg_color = 0;
 	object->handle = NULL;
 	object->backing_object = NULL;
 	object->backing_object_offset = (vm_ooffset_t) 0;
@@ -1258,15 +1244,8 @@ vm_object_shadow(
 		LIST_INSERT_HEAD(&source->shadow_head, result, shadow_list);
 		source->shadow_count++;
 		source->generation++;
-		if (length < source->size)
-			length = source->size;
-		if (length > PQ_MAXLENGTH || source->generation > 1)
-			length = PQ_MAXLENGTH;
-		result->pg_color = (source->pg_color +
-		    length * source->generation) & PQ_COLORMASK;
 		result->flags |= source->flags & OBJ_NEEDGIANT;
 		VM_OBJECT_UNLOCK(source);
-		next_index = (result->pg_color + PQ_MAXLENGTH) & PQ_COLORMASK;
 	}
 
 
@@ -2129,7 +2108,7 @@ DB_SHOW_COMMAND(vmopag, vm_object_print_pages)
 	TAILQ_FOREACH(object, &vm_object_list, object_list) {
 		vm_pindex_t idx, fidx;
 		vm_pindex_t osize;
-		vm_paddr_t pa = -1, padiff;
+		vm_paddr_t pa = -1;
 		int rcount;
 		vm_page_t m;
 
@@ -2171,17 +2150,8 @@ DB_SHOW_COMMAND(vmopag, vm_object_print_pages)
 				continue;
 			}
 			if (rcount) {
-				padiff = pa + rcount * PAGE_SIZE - VM_PAGE_TO_PHYS(m);
-				padiff >>= PAGE_SHIFT;
-				padiff &= PQ_COLORMASK;
-				if (padiff == 0) {
-					pa = VM_PAGE_TO_PHYS(m) - rcount * PAGE_SIZE;
-					++rcount;
-					continue;
-				}
-				db_printf(" index(%ld)run(%d)pa(0x%lx)",
+				db_printf(" index(%ld)run(%d)pa(0x%lx)\n",
 					(long)fidx, rcount, (long)pa);
-				db_printf("pd(%ld)\n", (long)padiff);
 				if (nl > 18) {
 					c = cngetc();
 					if (c != ' ')
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index e1ef640..a3c89fc 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -117,6 +117,7 @@ __FBSDID("$FreeBSD$");
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
+#include <vm/vm_phys.h>
 #include <vm/vm_extern.h>
 #include <vm/uma.h>
 #include <vm/uma_int.h>
@@ -339,6 +340,8 @@ vm_page_startup(vm_offset_t vaddr)
 	 * Clear all of the page structures
 	 */
 	bzero((caddr_t) vm_page_array, page_range * sizeof(struct vm_page));
+	for (i = 0; i < page_range; i++)
+		vm_page_array[i].order = VM_NFREEORDER;
 	vm_page_array_size = page_range;
 
 	/*
@@ -352,10 +355,13 @@ vm_page_startup(vm_offset_t vaddr)
 	    ("vm_page_startup: inconsistent page counts"));
 
 	/*
-	 * Construct the free queue(s) in descending order (by physical
-	 * address) so that the first 16MB of physical memory is allocated
-	 * last rather than first.  On large-memory machines, this avoids
-	 * the exhaustion of low physical memory before isa_dma_init has run.
+	 * Initialize the physical memory allocator.
+	 */
+	vm_phys_init();
+
+	/*
+	 * Add every available physical page that is not blacklisted to
+	 * the free lists.
 	 */
 	cnt.v_page_count = 0;
 	cnt.v_free_count = 0;
@@ -369,7 +375,7 @@ vm_page_startup(vm_offset_t vaddr)
 				printf("Skipping page with pa 0x%jx\n",
 				    (uintmax_t)pa);
 			else
-				vm_pageq_add_new_page(pa);
+				vm_phys_add_page(pa);
 			pa += PAGE_SIZE;
 		}
 	}
@@ -543,7 +549,7 @@ vm_page_dirty(vm_page_t m)
 {
 	KASSERT(VM_PAGE_GETKNOWNQUEUE1(m) != PQ_CACHE,
 	    ("vm_page_dirty: page in cache!"));
-	KASSERT(VM_PAGE_GETKNOWNQUEUE1(m) != PQ_FREE,
+	KASSERT(!VM_PAGE_IS_FREE(m),
 	    ("vm_page_dirty: page is free!"));
 	m->dirty = VM_PAGE_BITS_ALL;
 }
@@ -799,14 +805,14 @@ vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex)
  *	This routine may not block.
  */
 vm_page_t
-vm_page_select_cache(int color)
+vm_page_select_cache(void)
 {
 	vm_object_t object;
 	vm_page_t m;
 	boolean_t was_trylocked;
 
 	mtx_assert(&vm_page_queue_mtx, MA_OWNED);
-	while ((m = vm_pageq_find(PQ_CACHE, color, FALSE)) != NULL) {
+	while ((m = TAILQ_FIRST(&vm_page_queues[PQ_CACHE].pl)) != NULL) {
 		KASSERT(m->dirty == 0, ("Found dirty cache page %p", m));
 		KASSERT(!pmap_page_is_mapped(m),
 		    ("Found mapped cache page %p", m));
@@ -850,7 +856,7 @@ vm_page_t
 vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req)
 {
 	vm_page_t m = NULL;
-	int color, flags, page_req;
+	int flags, page_req;
 
 	page_req = req & VM_ALLOC_CLASS_MASK;
 	KASSERT(curthread->td_intr_nesting_level == 0 ||
@@ -861,9 +867,7 @@ vm_page_alloc(vm_object_t object, vm_pindex_t pindex, int req)
 		KASSERT(object != NULL,
 		    ("vm_page_alloc: NULL object."));
 		VM_OBJECT_LOCK_ASSERT(object, MA_OWNED);
-		color = (pindex + object->pg_color) & PQ_COLORMASK;
-	} else
-		color = pindex & PQ_COLORMASK;
+	}
 
 	/*
 	 * The pager is allowed to eat deeper into the free page list.
@@ -883,7 +887,8 @@ loop:
 		 * Allocate from the free queue if the number of free pages
 		 * exceeds the minimum for the request class.
 		 */
-		m = vm_pageq_find(PQ_FREE, color, (req & VM_ALLOC_ZERO) != 0);
+		m = vm_phys_alloc_pages_locked(object != NULL ?
+		    VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0);
 	} else if (page_req != VM_ALLOC_INTERRUPT) {
 		mtx_unlock(&vm_page_queue_free_mtx);
 		/*
@@ -892,7 +897,7 @@ loop:
 		 * cnt.v_*_free_min counters are replenished.
 		 */
 		vm_page_lock_queues();
-		if ((m = vm_page_select_cache(color)) == NULL) {
+		if ((m = vm_page_select_cache()) == NULL) {
 			KASSERT(cnt.v_cache_count == 0,
 			    ("vm_page_alloc: cache queue is missing %d pages",
 			    cnt.v_cache_count));
@@ -908,7 +913,8 @@ loop:
 				mtx_unlock(&vm_page_queue_free_mtx);
 				return (NULL);
 			}
-			m = vm_pageq_find(PQ_FREE, color, (req & VM_ALLOC_ZERO) != 0);
+			m = vm_phys_alloc_pages_locked(object != NULL ?
+			    VM_FREEPOOL_DEFAULT : VM_FREEPOOL_DIRECT, 0);
 		} else {
 			vm_page_unlock_queues();
 			goto loop;
@@ -931,11 +937,8 @@ loop:
 	    m != NULL,
 	    ("vm_page_alloc(): missing page on free queue")
 	);
-
-	/*
-	 * Remove from free queue
-	 */
-	vm_pageq_remove_nowakeup(m);
+	KASSERT(VM_PAGE_IS_FREE(m),
+	    ("vm_page_alloc: page %p is not free", m));
 
 	/*
 	 * Initialize structure.  Only the PG_ZERO flag is inherited.
@@ -1096,7 +1099,7 @@ vm_page_free_wakeup(void)
 /*
  *	vm_page_free_toq:
  *
- *	Returns the given page to the PQ_FREE list,
+ *	Returns the given page to the free list,
  *	disassociating it with any VM object.
  *
  *	Object and page must be locked prior to entry.
@@ -1106,7 +1109,6 @@ vm_page_free_wakeup(void)
 void
 vm_page_free_toq(vm_page_t m)
 {
-	struct vpgqueues *pq;
 
 	if (VM_PAGE_GETQUEUE(m) != PQ_NONE)
 		mtx_assert(&vm_page_queue_mtx, MA_OWNED);
@@ -1114,12 +1116,12 @@ vm_page_free_toq(vm_page_t m)
 	    ("vm_page_free_toq: freeing mapped page %p", m));
 	PCPU_INC(cnt.v_tfree);
 
-	if (m->busy || VM_PAGE_INQUEUE1(m, PQ_FREE)) {
+	if (m->busy || VM_PAGE_IS_FREE(m)) {
 		printf(
 		"vm_page_free: pindex(%lu), busy(%d), VPO_BUSY(%d), hold(%d)\n",
 		    (u_long)m->pindex, m->busy, (m->oflags & VPO_BUSY) ? 1 : 0,
 		    m->hold_count);
-		if (VM_PAGE_INQUEUE1(m, PQ_FREE))
+		if (VM_PAGE_IS_FREE(m))
 			panic("vm_page_free: freeing free page");
 		else
 			panic("vm_page_free: freeing busy page");
@@ -1155,27 +1157,19 @@ vm_page_free_toq(vm_page_t m)
 	if (m->hold_count != 0) {
 		m->flags &= ~PG_ZERO;
 		vm_pageq_enqueue(PQ_HOLD, m);
-		return;
-	}
-	VM_PAGE_SETQUEUE1(m, PQ_FREE);
-	mtx_lock(&vm_page_queue_free_mtx);
-	pq = &vm_page_queues[VM_PAGE_GETQUEUE(m)];
-	pq->lcnt++;
-	++(*pq->cnt);
-
-	/*
-	 * Put zero'd pages on the end ( where we look for zero'd pages
-	 * first ) and non-zerod pages at the head.
-	 */
-	if (m->flags & PG_ZERO) {
-		TAILQ_INSERT_TAIL(&pq->pl, m, pageq);
-		++vm_page_zero_count;
 	} else {
-		TAILQ_INSERT_HEAD(&pq->pl, m, pageq);
-		vm_page_zero_idle_wakeup();
+		m->flags |= PG_FREE;
+		mtx_lock(&vm_page_queue_free_mtx);
+		if ((m->flags & PG_ZERO) != 0) {
+			vm_phys_free_pages_locked(m, 0);
+			++vm_page_zero_count;
+		} else {
+			vm_phys_free_pages_locked(m, 0);
+			vm_page_zero_idle_wakeup();
+		}
+		vm_page_free_wakeup();
+		mtx_unlock(&vm_page_queue_free_mtx);
 	}
-	vm_page_free_wakeup();
-	mtx_unlock(&vm_page_queue_free_mtx);
 }
 
 /*
@@ -1294,7 +1288,6 @@ _vm_page_deactivate(vm_page_t m, int athead)
 		else
 			TAILQ_INSERT_TAIL(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
 		VM_PAGE_SETQUEUE2(m, PQ_INACTIVE);
-		vm_page_queues[PQ_INACTIVE].lcnt++;
 		cnt.v_inactive_count++;
 	}
 }
@@ -1382,7 +1375,7 @@ vm_page_cache(vm_page_t m)
 			(long)m->pindex);
 	}
 	vm_pageq_remove_nowakeup(m);
-	vm_pageq_enqueue(PQ_CACHE + m->pc, m);
+	vm_pageq_enqueue(PQ_CACHE, m);
 	mtx_lock(&vm_page_queue_free_mtx);
 	vm_page_free_wakeup();
 	mtx_unlock(&vm_page_queue_free_mtx);
@@ -1794,21 +1787,17 @@ DB_SHOW_COMMAND(page, vm_page_print_page_info)
 
 DB_SHOW_COMMAND(pageq, vm_page_print_pageq_info)
 {
-	int i;
+		
 	db_printf("PQ_FREE:");
-	for (i = 0; i < PQ_NUMCOLORS; i++) {
-		db_printf(" %d", vm_page_queues[PQ_FREE + i].lcnt);
-	}
+	db_printf(" %d", cnt.v_free_count);
 	db_printf("\n");
 		
 	db_printf("PQ_CACHE:");
-	for (i = 0; i < PQ_NUMCOLORS; i++) {
-		db_printf(" %d", vm_page_queues[PQ_CACHE + i].lcnt);
-	}
+	db_printf(" %d", *vm_page_queues[PQ_CACHE].cnt);
 	db_printf("\n");
 
 	db_printf("PQ_ACTIVE: %d, PQ_INACTIVE: %d\n",
-		vm_page_queues[PQ_ACTIVE].lcnt,
-		vm_page_queues[PQ_INACTIVE].lcnt);
+		*vm_page_queues[PQ_ACTIVE].cnt,
+		*vm_page_queues[PQ_INACTIVE].cnt);
 }
 #endif /* DDB */
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
index 0b1803f..c238e32 100644
--- a/sys/vm/vm_page.h
+++ b/sys/vm/vm_page.h
@@ -110,9 +110,11 @@ struct vm_page {
 	vm_pindex_t pindex;		/* offset into object (O,P) */
 	vm_paddr_t phys_addr;		/* physical address of page */
 	struct md_page md;		/* machine dependant stuff */
-	u_short	queue;			/* page queue index */
-	u_short	flags,			/* see below */
-		pc;			/* page color */
+	uint8_t	queue;			/* page queue index */
+	int8_t segind;  
+	u_short	flags;			/* see below */
+	uint8_t	order;			/* index of the buddy queue */
+	uint8_t pool;
 	u_short wire_count;		/* wired down maps refs (P) */
 	u_int cow;			/* page cow mapping count */
 	short hold_count;		/* page hold count */
@@ -155,62 +157,39 @@ CTASSERT(sizeof(u_long) >= 8);
 #endif
 #endif
 
-/* PQ_CACHE and PQ_FREE represents a PQ_NUMCOLORS consecutive queue. */
 #define PQ_NONE		0
-#define PQ_FREE		1
-#define PQ_INACTIVE	(page_queue_coloring.inactive)
-#define PQ_ACTIVE	(page_queue_coloring.active)
-#define PQ_CACHE	(page_queue_coloring.cache)
-#define PQ_HOLD		(page_queue_coloring.hold)
-#define PQ_COUNT	(page_queue_coloring.count)
-#define PQ_MAXCOLORS	1024
-#define PQ_MAXCOUNT	(4 + 2 * PQ_MAXCOLORS)
-#define PQ_NUMCOLORS	(page_queue_coloring.numcolors)
-#define PQ_PRIME1	(page_queue_coloring.prime1)
-#define PQ_PRIME2	(page_queue_coloring.prime2)
-#define PQ_COLORMASK	(page_queue_coloring.colormask)
-#define PQ_MAXLENGTH	(page_queue_coloring.maxlength)
+#define	PQ_INACTIVE	1
+#define	PQ_ACTIVE	2
+#define	PQ_CACHE	3
+#define	PQ_HOLD		4
+#define	PQ_COUNT	5
+#define	PQ_MAXCOUNT	5
 
 /* Returns the real queue a page is on. */
 #define VM_PAGE_GETQUEUE(m)	((m)->queue)
 
 /* Returns the well known queue a page is on. */
-#define VM_PAGE_GETKNOWNQUEUE1(m)	((m)->queue - (m)->pc)
+#define VM_PAGE_GETKNOWNQUEUE1(m)	VM_PAGE_GETQUEUE(m)
 #define VM_PAGE_GETKNOWNQUEUE2(m)	VM_PAGE_GETQUEUE(m)
 
 /* Given the real queue number and a page color return the well know queue. */
-#define VM_PAGE_RESOLVEQUEUE(m, q)	((q) - (m)->pc)
+#define VM_PAGE_RESOLVEQUEUE(m, q)	(q)
 
 /* Returns true if the page is in the named well known queue. */
 #define VM_PAGE_INQUEUE1(m, q)	(VM_PAGE_GETKNOWNQUEUE1(m) == (q))
 #define VM_PAGE_INQUEUE2(m, q)	(VM_PAGE_GETKNOWNQUEUE2(m) == (q))
 
 /* Sets the queue a page is on. */
-#define VM_PAGE_SETQUEUE1(m, q)	(VM_PAGE_GETQUEUE(m) = (q) + (m)->pc)
+#define VM_PAGE_SETQUEUE1(m, q)	(VM_PAGE_GETQUEUE(m) = (q))
 #define VM_PAGE_SETQUEUE2(m, q)	(VM_PAGE_GETQUEUE(m) = (q))
 
 struct vpgqueues {
 	struct pglist pl;
 	int	*cnt;
-	int	lcnt;
-};
-
-struct pq_coloring {
-	int numcolors;
-	int colormask;
-	int prime1;
-	int prime2;
-	int inactive;
-	int active;
-	int cache;
-	int hold;
-	int count;
-	int maxlength;
 };
 
 extern struct vpgqueues vm_page_queues[PQ_MAXCOUNT];
 extern struct mtx vm_page_queue_free_mtx;
-extern struct pq_coloring page_queue_coloring;
 
 /*
  * These are the flags defined for vm_page.
@@ -222,6 +201,7 @@ extern struct pq_coloring page_queue_coloring;
  *	 pte mappings, nor can they be removed from their objects via 
  *	 the object, and such pages are also not on any PQ queue.
  */
+#define	PG_FREE		0x0002		/* page is free */
 #define PG_WINATCFLS	0x0004		/* flush dirty page on inactive q */
 #define	PG_FICTITIOUS	0x0008		/* physical page doesn't exist (O) */
 #define	PG_WRITEABLE	0x0010		/* page is mapped writeable */
@@ -276,19 +256,19 @@ extern vm_page_t vm_page_array;		/* First resident page in table */
 extern int vm_page_array_size;		/* number of vm_page_t's */
 extern long first_page;			/* first physical page number */
 
+#define	VM_PAGE_IS_FREE(m)	(((m)->flags & PG_FREE) != 0)
+
 #define VM_PAGE_TO_PHYS(entry)	((entry)->phys_addr)
 
+vm_page_t vm_phys_paddr_to_vm_page(vm_paddr_t pa);
+
 static __inline vm_page_t PHYS_TO_VM_PAGE(vm_paddr_t pa);
 
 static __inline vm_page_t
 PHYS_TO_VM_PAGE(vm_paddr_t pa)
 {
 #ifdef VM_PHYSSEG_SPARSE
-	int i, j = 0;
-
-	for (i = 0; phys_avail[i + 1] <= pa || phys_avail[i] > pa; i += 2)
-		j += atop(phys_avail[i + 1] - phys_avail[i]);
-	return (&vm_page_array[j + atop(pa - phys_avail[i])]);
+	return (vm_phys_paddr_to_vm_page(pa));
 #elif defined(VM_PHYSSEG_DENSE)
 	return (&vm_page_array[atop(pa) - first_page]);
 #else
@@ -336,17 +316,13 @@ void vm_page_dirty(vm_page_t m);
 void vm_page_wakeup(vm_page_t m);
 
 void vm_pageq_init(void);
-void vm_pageq_add_new_page(vm_paddr_t pa);
 void vm_pageq_enqueue(int queue, vm_page_t m);
 void vm_pageq_remove_nowakeup(vm_page_t m);
 void vm_pageq_remove(vm_page_t m);
-vm_page_t vm_pageq_find(int basequeue, int index, boolean_t prefer_zero);
 void vm_pageq_requeue(vm_page_t m);
 
 void vm_page_activate (vm_page_t);
 vm_page_t vm_page_alloc (vm_object_t, vm_pindex_t, int);
-vm_page_t vm_page_alloc_contig (vm_pindex_t, vm_paddr_t, vm_paddr_t,
-	    vm_offset_t, vm_offset_t);
 vm_page_t vm_page_grab (vm_object_t, vm_pindex_t, int);
 void vm_page_cache (register vm_page_t);
 int vm_page_try_to_cache (vm_page_t);
@@ -357,7 +333,7 @@ void vm_page_insert (vm_page_t, vm_object_t, vm_pindex_t);
 vm_page_t vm_page_lookup (vm_object_t, vm_pindex_t);
 void vm_page_remove (vm_page_t);
 void vm_page_rename (vm_page_t, vm_object_t, vm_pindex_t);
-vm_page_t vm_page_select_cache(int);
+vm_page_t vm_page_select_cache(void);
 void vm_page_sleep(vm_page_t m, const char *msg);
 vm_page_t vm_page_splay(vm_pindex_t, vm_page_t);
 vm_offset_t vm_page_startup(vm_offset_t vaddr);
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index 24bb7e3..1d89f7a 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -682,8 +682,7 @@ vm_pageout_scan(int pass)
 	struct thread *td;
 	vm_offset_t size, bigsize;
 	vm_object_t object;
-	int actcount, cache_cur, cache_first_failure;
-	static int cache_last_free;
+	int actcount;
 	int vnodes_skipped = 0;
 	int maxlaunder;
 
@@ -1145,12 +1144,8 @@ unlock_and_continue:
 	 * are considered basically 'free', moving pages from cache to free
 	 * does not effect other calculations.
 	 */
-	cache_cur = cache_last_free;
-	cache_first_failure = -1;
-	while (cnt.v_free_count < cnt.v_free_reserved && (cache_cur =
-	    (cache_cur + PQ_PRIME2) & PQ_COLORMASK) != cache_first_failure) {
-		TAILQ_FOREACH(m, &vm_page_queues[PQ_CACHE + cache_cur].pl,
-		    pageq) {
+	while (cnt.v_free_count < cnt.v_free_reserved) {
+		TAILQ_FOREACH(m, &vm_page_queues[PQ_CACHE].pl, pageq) {
 			KASSERT(m->dirty == 0,
 			    ("Found dirty cache page %p", m));
 			KASSERT(!pmap_page_is_mapped(m),
@@ -1167,13 +1162,11 @@ unlock_and_continue:
 				vm_page_free(m);
 				VM_OBJECT_UNLOCK(object);
 				cnt.v_dfree++;
-				cache_last_free = cache_cur;
-				cache_first_failure = -1;
 				break;
 			}
 		}
-		if (m == NULL && cache_first_failure == -1)
-			cache_first_failure = cache_cur;
+		if (m == NULL)
+			break;
 	}
 	vm_page_unlock_queues();
 #if !defined(NO_SWAPPING)
@@ -1425,7 +1418,7 @@ vm_pageout()
 	cnt.v_pageout_free_min = (2*MAXBSIZE)/PAGE_SIZE +
 	    cnt.v_interrupt_free_min;
 	cnt.v_free_reserved = vm_pageout_page_count +
-	    cnt.v_pageout_free_min + (cnt.v_page_count / 768) + PQ_NUMCOLORS;
+	    cnt.v_pageout_free_min + (cnt.v_page_count / 768);
 	cnt.v_free_severe = cnt.v_free_min / 2;
 	cnt.v_free_min += cnt.v_free_reserved;
 	cnt.v_free_severe += cnt.v_free_reserved;
diff --git a/sys/vm/vm_pageq.c b/sys/vm/vm_pageq.c
index 2aa2177..5c10f62 100644
--- a/sys/vm/vm_pageq.c
+++ b/sys/vm/vm_pageq.c
@@ -28,8 +28,6 @@
 #include <sys/cdefs.h>
 __FBSDID("$FreeBSD$");
 
-#include "opt_vmpage.h"
-
 #include <sys/param.h>
 #include <sys/systm.h>
 #include <sys/linker_set.h>
@@ -48,103 +46,17 @@ __FBSDID("$FreeBSD$");
 #include <vm/vm_page.h>
 #include <vm/vm_pageout.h>
 #include <vm/vm_pager.h>
+#include <vm/vm_phys.h>
 #include <vm/vm_extern.h>
 
-static void vm_coloring_init(void);
-void setPQL2(int *const size, int *const ways);
-
 struct vpgqueues vm_page_queues[PQ_MAXCOUNT];
-struct pq_coloring page_queue_coloring;
-
-static int pq_cachesize = 0;	/* size of the cache in KB */
-static int pq_cachenways = 0;	/* associativity of the cache */
-
-SYSCTL_NODE(_vm_stats, OID_AUTO, pagequeue, CTLFLAG_RW, 0, "VM meter stats");
-SYSCTL_INT(_vm_stats_pagequeue, OID_AUTO, page_colors, CTLFLAG_RD,
-    &(PQ_NUMCOLORS), 0, "Number of colors in the page queue");
-SYSCTL_INT(_vm_stats_pagequeue, OID_AUTO, cachesize, CTLFLAG_RD,
-    &pq_cachesize, 0, "Size of the processor cache in KB");
-SYSCTL_INT(_vm_stats_pagequeue, OID_AUTO, cachenways, CTLFLAG_RD,
-    &pq_cachenways, 0, "Associativity of the processor cache");
-SYSCTL_INT(_vm_stats_pagequeue, OID_AUTO, prime1, CTLFLAG_RD,
-    &(PQ_PRIME1), 0, "Cache tuning value");
-SYSCTL_INT(_vm_stats_pagequeue, OID_AUTO, prime2, CTLFLAG_RD,
-    &(PQ_PRIME2), 0, "Cache tuning value");
-
-static void
-vm_coloring_init(void)
-{
-#ifdef PQ_NOOPT
-	PQ_NUMCOLORS = PQ_PRIME1 = PQ_PRIME2 = 1;
-#else
-
-	setPQL2(&pq_cachesize, &pq_cachenways);
-
-	CTASSERT(PAGE_SIZE/1024 > 0);
-
-	if (pq_cachesize > 0 && pq_cachenways > 0)
-		PQ_NUMCOLORS = pq_cachesize / (PAGE_SIZE/1024) / \
-		    pq_cachenways;
-	else
-		PQ_NUMCOLORS = 32;
-
-	if (PQ_MAXCOLORS < PQ_NUMCOLORS) {
-		printf("VM-PQ color limit (PQ_MAXCOLORS=%u) exceeded (%u), see vm_page.h", PQ_MAXCOLORS, PQ_NUMCOLORS);
-		PQ_NUMCOLORS = PQ_MAXCOLORS;
-	}
-
-	if (PQ_NUMCOLORS >= 128) {
-		PQ_PRIME1 = 31;
-		PQ_PRIME2 = 23;
-	} else if (PQ_NUMCOLORS >= 64) {
-		PQ_PRIME1 = 13;
-		PQ_PRIME2 = 7;
-	} else if (PQ_NUMCOLORS >= 32) {
-		PQ_PRIME1 = 9;
-		PQ_PRIME2 = 5;
-	} else if (PQ_NUMCOLORS >= 16) {
-		PQ_PRIME1 = 5;
-		PQ_PRIME2 = 3;
-	} else
-		PQ_NUMCOLORS = PQ_PRIME1 = PQ_PRIME2 = 1;
-#endif
-
-	/*
-	 * PQ_CACHE represents a
-	 * PQ_NUMCOLORS consecutive queue.
-	 */
-	PQ_COLORMASK = PQ_NUMCOLORS - 1;
-	PQ_INACTIVE  = 1 + PQ_NUMCOLORS;
-	PQ_ACTIVE    = 2 + PQ_NUMCOLORS;
-	PQ_CACHE     = 3 + PQ_NUMCOLORS;
-	PQ_HOLD      = 3 + 2 * PQ_NUMCOLORS;
-	PQ_COUNT     = 4 + 2 * PQ_NUMCOLORS;
-	PQ_MAXLENGTH = PQ_NUMCOLORS / 3 + PQ_PRIME1;
-
-#if 0
-	/* XXX: is it possible to allocate vm_page_queues[PQ_COUNT] here? */
-#error XXX: vm_page_queues = malloc(PQ_COUNT * sizeof(struct vpgqueues));
-#endif
-
-	if (bootverbose)
-		if (PQ_NUMCOLORS > 1)
-		    printf("Using %d colors for the VM-PQ tuning (%d, %d)\n",
-		    PQ_NUMCOLORS, pq_cachesize, pq_cachenways);
-}
 
 void
 vm_pageq_init(void)
 {
 	int i;
 
-	vm_coloring_init();
-
-	for (i = 0; i < PQ_NUMCOLORS; ++i) {
-		vm_page_queues[PQ_FREE+i].cnt = &cnt.v_free_count;
-	}
-	for (i = 0; i < PQ_NUMCOLORS; ++i) {
-		vm_page_queues[PQ_CACHE + i].cnt = &cnt.v_cache_count;
-	}
+	vm_page_queues[PQ_CACHE].cnt = &cnt.v_cache_count;
 	vm_page_queues[PQ_INACTIVE].cnt = &cnt.v_inactive_count;
 	vm_page_queues[PQ_ACTIVE].cnt = &cnt.v_active_count;
 	vm_page_queues[PQ_HOLD].cnt = &cnt.v_active_count;
@@ -179,28 +91,6 @@ vm_pageq_enqueue(int queue, vm_page_t m)
 	VM_PAGE_SETQUEUE2(m, queue);
 	TAILQ_INSERT_TAIL(&vpq->pl, m, pageq);
 	++*vpq->cnt;
-	++vpq->lcnt;
-}
-
-/*
- *	vm_add_new_page:
- *
- *	Add a new page to the freelist for use by the system.
- */
-void
-vm_pageq_add_new_page(vm_paddr_t pa)
-{
-	vm_page_t m;
-
-	cnt.v_page_count++;
-	m = PHYS_TO_VM_PAGE(pa);
-	m->phys_addr = pa;
-	m->flags = 0;
-	m->pc = (pa >> PAGE_SHIFT) & PQ_COLORMASK;
-	pmap_page_init(m);
-	mtx_lock(&vm_page_queue_free_mtx);
-	vm_pageq_enqueue(m->pc + PQ_FREE, m);
-	mtx_unlock(&vm_page_queue_free_mtx);
 }
 
 /*
@@ -222,7 +112,6 @@ vm_pageq_remove_nowakeup(vm_page_t m)
 		VM_PAGE_SETQUEUE2(m, PQ_NONE);
 		TAILQ_REMOVE(&pq->pl, m, pageq);
 		(*pq->cnt)--;
-		pq->lcnt--;
 	}
 }
 
@@ -245,86 +134,9 @@ vm_pageq_remove(vm_page_t m)
 		pq = &vm_page_queues[queue];
 		TAILQ_REMOVE(&pq->pl, m, pageq);
 		(*pq->cnt)--;
-		pq->lcnt--;
 		if (VM_PAGE_RESOLVEQUEUE(m, queue) == PQ_CACHE) {
 			if (vm_paging_needed())
 				pagedaemon_wakeup();
 		}
 	}
 }
-
-#ifndef PQ_NOOPT
-
-/*
- *	vm_pageq_find:
- *
- *	Find a page on the specified queue with color optimization.
- *
- *	The page coloring optimization attempts to locate a page
- *	that does not overload other nearby pages in the object in
- *	the cpu's L2 cache.  We need this optimization because cpu
- *	caches tend to be physical caches, while object spaces tend 
- *	to be virtual.
- *
- *	The specified queue must be locked.
- *	This routine may not block.
- *
- *	This routine may only be called from the vm_pageq_find()
- *	function in this file.
- */
-static inline vm_page_t
-_vm_pageq_find(int basequeue, int index)
-{
-	int i;
-	vm_page_t m = NULL;
-	struct vpgqueues *pq;
-
-	pq = &vm_page_queues[basequeue];
-
-	/*
-	 * Note that for the first loop, index+i and index-i wind up at the
-	 * same place.  Even though this is not totally optimal, we've already
-	 * blown it by missing the cache case so we do not care.
-	 */
-	for (i = PQ_NUMCOLORS / 2; i > 0; --i) {
-		if ((m = TAILQ_FIRST(&pq[(index + i) & PQ_COLORMASK].pl)) \
-		    != NULL)
-			break;
-
-		if ((m = TAILQ_FIRST(&pq[(index - i) & PQ_COLORMASK].pl)) \
-		    != NULL)
-			break;
-	}
-	return (m);
-}
-#endif /* PQ_NOOPT */
-
-vm_page_t
-vm_pageq_find(int basequeue, int index, boolean_t prefer_zero)
-{
-        vm_page_t m;
-
-#ifndef PQ_NOOPT
-	if (PQ_NUMCOLORS > 1) {
-	        if (prefer_zero) {
-	                m = TAILQ_LAST(&vm_page_queues[basequeue+index].pl, \
-			    pglist);
-        	} else {
-                	m = TAILQ_FIRST(&vm_page_queues[basequeue+index].pl);
-        	}
-        	if (m == NULL) {
-                	m = _vm_pageq_find(basequeue, index);
-		}
-	} else {
-#endif
-        	if (prefer_zero) {
-                	m = TAILQ_LAST(&vm_page_queues[basequeue].pl, pglist);
-        	} else {
-                	m = TAILQ_FIRST(&vm_page_queues[basequeue].pl);
-        	}
-#ifndef PQ_NOOPT
-	}
-#endif
-        return (m);
-}
-
diff --git a/sys/vm/vm_zeroidle.c b/sys/vm/vm_zeroidle.c
index b21d01f..1cba318 100644
--- a/sys/vm/vm_zeroidle.c
+++ b/sys/vm/vm_zeroidle.c
@@ -51,12 +51,9 @@ __FBSDID("$FreeBSD$");
 
 #include <vm/vm.h>
 #include <vm/vm_page.h>
+#include <vm/vm_phys.h>
 
-static int cnt_prezero;
-SYSCTL_INT(_vm_stats_misc, OID_AUTO, cnt_prezero, CTLFLAG_RD,
-    &cnt_prezero, 0, "");
-
-static int idlezero_enable_default = 1;
+static int idlezero_enable_default = 0;
 TUNABLE_INT("vm.idlezero_enable", &idlezero_enable_default);
 /* Defer setting the enable flag until the kthread is running. */
 static int idlezero_enable = 0;
@@ -100,25 +97,13 @@ vm_page_zero_check(void)
 static void
 vm_page_zero_idle(void)
 {
-	static int free_rover;
-	vm_page_t m;
 
 	mtx_assert(&vm_page_queue_free_mtx, MA_OWNED);
 	zero_state = 0;
-	m = vm_pageq_find(PQ_FREE, free_rover, FALSE);
-	if (m != NULL && (m->flags & PG_ZERO) == 0) {
-		vm_pageq_remove_nowakeup(m);
-		mtx_unlock(&vm_page_queue_free_mtx);
-		pmap_zero_page_idle(m);
-		mtx_lock(&vm_page_queue_free_mtx);
-		m->flags |= PG_ZERO;
-		vm_pageq_enqueue(PQ_FREE + m->pc, m);
-		++vm_page_zero_count;
-		++cnt_prezero;
+	if (vm_phys_zero_pages_idle()) {
 		if (vm_page_zero_count >= ZIDLE_HI(cnt.v_free_count))
 			zero_state = 1;
 	}
-	free_rover = (free_rover + PQ_PRIME2) & PQ_COLORMASK;
 }
 
 /* Called by vm_page_free to hint that a new page is available. */
author	alc <alc@FreeBSD.org>	2007-06-16 04:57:06 +0000
committer	alc <alc@FreeBSD.org>	2007-06-16 04:57:06 +0000
commit	a8415c5a0d7d39fe501a93e2f31b2532cf6dfd91 (patch)
tree	d4adea7a0cd4062bbcbfbb6bc1a5a30eecf59d97 /sys
parent	98cd3abe8b02f0ae3d389eb9e6ecbf4bf39032f6 (diff)
download	FreeBSD-src-a8415c5a0d7d39fe501a93e2f31b2532cf6dfd91.zip FreeBSD-src-a8415c5a0d7d39fe501a93e2f31b2532cf6dfd91.tar.gz