Reorg vm_page.c into vm_page.c, vm_pageq.c, and vm_contig.c (for contigmalloc).

Also removed some spl's and added some VM mutexes, but they are not actually used yet, so this commit does not really make any operational changes to the system. vm_page.c relates to vm_page_t manipulation, including high level deactivation, activation, etc... vm_pageq.c relates to finding free pages and aquiring exclusive access to a page queue (exclusivity part not yet implemented). And the world still builds... :-)
author: dillon <dillon@FreeBSD.org> 2001-07-04 23:27:09 +0000
committer: dillon <dillon@FreeBSD.org> 2001-07-04 23:27:09 +0000
commit: 93369f554a43c46419d56436721efe61e4b858c7 (patch)
tree: 81260d55ae91eebf4632f0a755612f23148b8fb8 /sys/vm/vm_page.c
parent: 62d663b9b937d23cdb2e47513623bf3834ebac8e (diff)
download: FreeBSD-src-93369f554a43c46419d56436721efe61e4b858c7.zip
FreeBSD-src-93369f554a43c46419d56436721efe61e4b858c7.tar.gz
1 files changed, 47 insertions, 413 deletions
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 2d23371..9a93ee1 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -65,6 +65,39 @@
  */
 
 /*
+ *			GENERAL RULES ON VM_PAGE MANIPULATION
+ *
+ *	- a pageq mutex is required when adding or removing a page from a
+ *	  page queue (vm_page_queue[]), regardless of other mutexes or the
+ *	  busy state of a page.
+ *
+ *	- a hash chain mutex is required when associating or disassociating
+ *	  a page from the VM PAGE CACHE hash table (vm_page_buckets),
+ *	  regardless of other mutexes or the busy state of a page.
+ *
+ *	- either a hash chain mutex OR a busied page is required in order
+ *	  to modify the page flags.  A hash chain mutex must be obtained in
+ *	  order to busy a page.  A page's flags cannot be modified by a
+ *	  hash chain mutex if the page is marked busy.
+ *
+ *	- The object memq mutex is held when inserting or removing
+ *	  pages from an object (vm_page_insert() or vm_page_remove()).  This
+ *	  is different from the object's main mutex.
+ *
+ *	Generally speaking, you have to be aware of side effects when running
+ *	vm_page ops.  A vm_page_lookup() will return with the hash chain
+ *	locked, whether it was able to lookup the page or not.  vm_page_free(),
+ *	vm_page_cache(), vm_page_activate(), and a number of other routines
+ *	will release the hash chain mutex for you.  Intermediate manipulation
+ *	routines such as vm_page_flag_set() expect the hash chain to be held
+ *	on entry and the hash chain will remain held on return.
+ *
+ *	pageq scanning can only occur with the pageq in question locked.
+ *	We have a known bottleneck with the active queue, but the cache
+ *	and free queues are actually arrays already. 
+ */
+
+/*
  *	Resident memory management module.
  */
 
@@ -86,9 +119,6 @@
 #include <vm/vm_pager.h>
 #include <vm/vm_extern.h>
 
-static void	vm_page_queue_init __P((void));
-static vm_page_t vm_page_select_cache __P((vm_object_t, vm_pindex_t));
-
 /*
  *	Associated with page of user-allocatable memory is a
  *	page structure.
@@ -98,35 +128,13 @@ static struct vm_page **vm_page_buckets; /* Array of buckets */
 static int vm_page_bucket_count;	/* How big is array? */
 static int vm_page_hash_mask;		/* Mask for hash function */
 static volatile int vm_page_bucket_generation;
-
-struct vpgqueues vm_page_queues[PQ_COUNT];
-
-static void
-vm_page_queue_init(void) 
-{
-	int i;
-
-	for (i = 0; i < PQ_L2_SIZE; i++) {
-		vm_page_queues[PQ_FREE+i].cnt = &cnt.v_free_count;
-	}
-	for (i = 0; i < PQ_L2_SIZE; i++) {
-		vm_page_queues[PQ_CACHE+i].cnt = &cnt.v_cache_count;
-	}
-	vm_page_queues[PQ_INACTIVE].cnt = &cnt.v_inactive_count;
-	vm_page_queues[PQ_ACTIVE].cnt = &cnt.v_active_count;
-
-	for (i = 0; i < PQ_COUNT; i++) {
-		TAILQ_INIT(&vm_page_queues[i].pl);
-	}
-}
+static struct mtx vm_buckets_mtx[BUCKET_HASH_SIZE];
 
 vm_page_t vm_page_array = 0;
 int vm_page_array_size = 0;
 long first_page = 0;
 int vm_page_zero_count = 0;
 
-static vm_page_t _vm_page_list_find(int basequeue, int index);
-
 /*
  *	vm_set_page_size:
  *
@@ -144,31 +152,6 @@ vm_set_page_size(void)
 }
 
 /*
- *	vm_add_new_page:
- *
- *	Add a new page to the freelist for use by the system.
- *	Must be called at splhigh().
- */
-vm_page_t
-vm_add_new_page(vm_offset_t pa)
-{
-	vm_page_t m;
-
-	GIANT_REQUIRED;
-
-	++cnt.v_page_count;
-	++cnt.v_free_count;
-	m = PHYS_TO_VM_PAGE(pa);
-	m->phys_addr = pa;
-	m->flags = 0;
-	m->pc = (pa >> PAGE_SHIFT) & PQ_L2_MASK;
-	m->queue = m->pc + PQ_FREE;
-	TAILQ_INSERT_TAIL(&vm_page_queues[m->queue].pl, m, pageq);
-	vm_page_queues[m->queue].lcnt++;
-	return (m);
-}
-
-/*
  *	vm_page_startup:
  *
  *	Initializes the resident memory module.
@@ -225,7 +208,7 @@ vm_page_startup(vm_offset_t starta, vm_offset_t enda, vm_offset_t vaddr)
 	 * and the inactive queue.
 	 */
 
-	vm_page_queue_init();
+	vm_pageq_init();
 
 	/*
 	 * Allocate (and initialize) the hash table buckets.
@@ -264,6 +247,8 @@ vm_page_startup(vm_offset_t starta, vm_offset_t enda, vm_offset_t vaddr)
 		*bucket = NULL;
 		bucket++;
 	}
+	for (i = 0; i < BUCKET_HASH_SIZE; ++i)
+		mtx_init(&vm_buckets_mtx[i],  "vm buckets hash mutexes", MTX_DEF);
 
 	/*
 	 * Compute the number of pages of memory that will be available for
@@ -309,7 +294,7 @@ vm_page_startup(vm_offset_t starta, vm_offset_t enda, vm_offset_t vaddr)
 		else
 			last_pa = phys_avail[i + 1];
 		while (pa < last_pa && npages-- > 0) {
-			vm_add_new_page(pa);
+			vm_pageq_add_new_page(pa);
 			pa += PAGE_SIZE;
 		}
 	}
@@ -782,132 +767,6 @@ vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex)
 }
 
 /*
- * vm_page_unqueue_nowakeup:
- *
- * 	vm_page_unqueue() without any wakeup
- *
- *	This routine must be called at splhigh().
- *	This routine may not block.
- */
-
-void
-vm_page_unqueue_nowakeup(vm_page_t m)
-{
-	int queue = m->queue;
-	struct vpgqueues *pq;
-	if (queue != PQ_NONE) {
-		pq = &vm_page_queues[queue];
-		m->queue = PQ_NONE;
-		TAILQ_REMOVE(&pq->pl, m, pageq);
-		(*pq->cnt)--;
-		pq->lcnt--;
-	}
-}
-
-/*
- * vm_page_unqueue:
- *
- *	Remove a page from its queue.
- *
- *	This routine must be called at splhigh().
- *	This routine may not block.
- */
-
-void
-vm_page_unqueue(vm_page_t m)
-{
-	int queue = m->queue;
-	struct vpgqueues *pq;
-
-	GIANT_REQUIRED;
-	if (queue != PQ_NONE) {
-		m->queue = PQ_NONE;
-		pq = &vm_page_queues[queue];
-		TAILQ_REMOVE(&pq->pl, m, pageq);
-		(*pq->cnt)--;
-		pq->lcnt--;
-		if ((queue - m->pc) == PQ_CACHE) {
-			if (vm_paging_needed())
-				pagedaemon_wakeup();
-		}
-	}
-}
-
-vm_page_t
-vm_page_list_find(int basequeue, int index, boolean_t prefer_zero)
-{
-        vm_page_t m;
-
-	GIANT_REQUIRED;
-
-#if PQ_L2_SIZE > 1
-        if (prefer_zero) {
-                m = TAILQ_LAST(&vm_page_queues[basequeue+index].pl, pglist);
-        } else {
-                m = TAILQ_FIRST(&vm_page_queues[basequeue+index].pl);
-        }
-        if (m == NULL) {
-                m = _vm_page_list_find(basequeue, index);
-	}
-#else
-        if (prefer_zero) {
-                m = TAILQ_LAST(&vm_page_queues[basequeue].pl, pglist);
-        } else {
-                m = TAILQ_FIRST(&vm_page_queues[basequeue].pl);
-        }
-#endif
-        return(m);
-}
-
-
-#if PQ_L2_SIZE > 1
-
-/*
- *	vm_page_list_find:
- *
- *	Find a page on the specified queue with color optimization.
- *
- *	The page coloring optimization attempts to locate a page
- *	that does not overload other nearby pages in the object in
- *	the cpu's L1 or L2 caches.  We need this optimization because 
- *	cpu caches tend to be physical caches, while object spaces tend 
- *	to be virtual.
- *
- *	This routine must be called at splvm().
- *	This routine may not block.
- *
- *	This routine may only be called from the vm_page_list_find() macro
- *	in vm_page.h
- */
-static vm_page_t
-_vm_page_list_find(int basequeue, int index)
-{
-	int i;
-	vm_page_t m = NULL;
-	struct vpgqueues *pq;
-
-	GIANT_REQUIRED;
-	pq = &vm_page_queues[basequeue];
-
-	/*
-	 * Note that for the first loop, index+i and index-i wind up at the
-	 * same place.  Even though this is not totally optimal, we've already
-	 * blown it by missing the cache case so we do not care.
-	 */
-
-	for(i = PQ_L2_SIZE / 2; i > 0; --i) {
-		if ((m = TAILQ_FIRST(&pq[(index + i) & PQ_L2_MASK].pl)) != NULL)
-			break;
-
-		if ((m = TAILQ_FIRST(&pq[(index - i) & PQ_L2_MASK].pl)) != NULL)
-			break;
-	}
-	return(m);
-}
-
-#endif
-
-/*
  *	vm_page_select_cache:
  *
  *	Find a page on the cache queue with color optimization.  As pages
@@ -924,7 +783,7 @@ vm_page_select_cache(vm_object_t object, vm_pindex_t pindex)
 
 	GIANT_REQUIRED;
 	while (TRUE) {
-		m = vm_page_list_find(
+		m = vm_pageq_find(
 		    PQ_CACHE,
 		    (pindex + object->pg_color) & PQ_L2_MASK,
 		    FALSE
@@ -952,7 +811,7 @@ vm_page_select_free(vm_object_t object, vm_pindex_t pindex, boolean_t prefer_zer
 {
 	vm_page_t m;
 
-	m = vm_page_list_find(
+	m = vm_pageq_find(
 		PQ_FREE,
 		(pindex + object->pg_color) & PQ_L2_MASK,
 		prefer_zero
@@ -1065,7 +924,7 @@ loop:
 	 * Remove from free queue
 	 */
 
-	vm_page_unqueue_nowakeup(m);
+	vm_pageq_remove_nowakeup(m);
 
 	/*
 	 * Initialize structure.  Only the PG_ZERO flag is inherited.
@@ -1178,7 +1037,7 @@ vm_page_activate(vm_page_t m)
 		if ((m->queue - m->pc) == PQ_CACHE)
 			cnt.v_reactivated++;
 
-		vm_page_unqueue(m);
+		vm_pageq_remove(m);
 
 		if (m->wire_count == 0 && (m->flags & PG_UNMANAGED) == 0) {
 			m->queue = PQ_ACTIVE;
@@ -1269,7 +1128,7 @@ vm_page_free_toq(vm_page_t m)
 	 * appropriate free queue.
 	 */
 
-	vm_page_unqueue_nowakeup(m);
+	vm_pageq_remove_nowakeup(m);
 	vm_page_remove(m);
 
 	/*
@@ -1369,7 +1228,7 @@ vm_page_unmanage(vm_page_t m)
 	s = splvm();
 	if ((m->flags & PG_UNMANAGED) == 0) {
 		if (m->wire_count == 0)
-			vm_page_unqueue(m);
+			vm_pageq_remove(m);
 	}
 	vm_page_flag_set(m, PG_UNMANAGED);
 	splx(s);
@@ -1398,7 +1257,7 @@ vm_page_wire(vm_page_t m)
 	s = splvm();
 	if (m->wire_count == 0) {
 		if ((m->flags & PG_UNMANAGED) == 0)
-			vm_page_unqueue(m);
+			vm_pageq_remove(m);
 		cnt.v_wire_count++;
 	}
 	m->wire_count++;
@@ -1494,7 +1353,7 @@ _vm_page_deactivate(vm_page_t m, int athead)
 		if ((m->queue - m->pc) == PQ_CACHE)
 			cnt.v_reactivated++;
 		vm_page_flag_clear(m, PG_WINATCFLS);
-		vm_page_unqueue(m);
+		vm_pageq_remove(m);
 		if (athead)
 			TAILQ_INSERT_HEAD(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
 		else
@@ -1586,7 +1445,7 @@ vm_page_cache(vm_page_t m)
 			(long)m->pindex);
 	}
 	s = splvm();
-	vm_page_unqueue_nowakeup(m);
+	vm_pageq_remove_nowakeup(m);
 	m->queue = PQ_CACHE + m->pc;
 	vm_page_queues[m->queue].lcnt++;
 	TAILQ_INSERT_TAIL(&vm_page_queues[m->queue].pl, m, pageq);
@@ -1928,231 +1787,6 @@ vm_page_test_dirty(vm_page_t m)
 	}
 }
 
-/*
- * This interface is for merging with malloc() someday.
- * Even if we never implement compaction so that contiguous allocation
- * works after initialization time, malloc()'s data structures are good
- * for statistics and for allocations of less than a page.
- */
-void *
-contigmalloc1(
-	unsigned long size,	/* should be size_t here and for malloc() */
-	struct malloc_type *type,
-	int flags,
-	unsigned long low,
-	unsigned long high,
-	unsigned long alignment,
-	unsigned long boundary,
-	vm_map_t map)
-{
-	int i, s, start;
-	vm_offset_t addr, phys, tmp_addr;
-	int pass;
-	vm_page_t pga = vm_page_array;
-
-	size = round_page(size);
-	if (size == 0)
-		panic("contigmalloc1: size must not be 0");
-	if ((alignment & (alignment - 1)) != 0)
-		panic("contigmalloc1: alignment must be a power of 2");
-	if ((boundary & (boundary - 1)) != 0)
-		panic("contigmalloc1: boundary must be a power of 2");
-
-	start = 0;
-	for (pass = 0; pass <= 1; pass++) {
-		s = splvm();
-again:
-		/*
-		 * Find first page in array that is free, within range, aligned, and
-		 * such that the boundary won't be crossed.
-		 */
-		for (i = start; i < cnt.v_page_count; i++) {
-			int pqtype;
-			phys = VM_PAGE_TO_PHYS(&pga[i]);
-			pqtype = pga[i].queue - pga[i].pc;
-			if (((pqtype == PQ_FREE) || (pqtype == PQ_CACHE)) &&
-			    (phys >= low) && (phys < high) &&
-			    ((phys & (alignment - 1)) == 0) &&
-			    (((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0))
-				break;
-		}
-
-		/*
-		 * If the above failed or we will exceed the upper bound, fail.
-		 */
-		if ((i == cnt.v_page_count) ||
-			((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) {
-			vm_page_t m, next;
-
-again1:
-			for (m = TAILQ_FIRST(&vm_page_queues[PQ_INACTIVE].pl);
-				m != NULL;
-				m = next) {
-
-				KASSERT(m->queue == PQ_INACTIVE,
-					("contigmalloc1: page %p is not PQ_INACTIVE", m));
-
-				next = TAILQ_NEXT(m, pageq);
-				if (vm_page_sleep_busy(m, TRUE, "vpctw0"))
-					goto again1;
-				vm_page_test_dirty(m);
-				if (m->dirty) {
-					if (m->object->type == OBJT_VNODE) {
-						vn_lock(m->object->handle, LK_EXCLUSIVE | LK_RETRY, curproc);
-						vm_object_page_clean(m->object, 0, 0, OBJPC_SYNC);
-						VOP_UNLOCK(m->object->handle, 0, curproc);
-						goto again1;
-					} else if (m->object->type == OBJT_SWAP ||
-								m->object->type == OBJT_DEFAULT) {
-						vm_pageout_flush(&m, 1, 0);
-						goto again1;
-					}
-				}
-				if ((m->dirty == 0) && (m->busy == 0) && (m->hold_count == 0))
-					vm_page_cache(m);
-			}
-
-			for (m = TAILQ_FIRST(&vm_page_queues[PQ_ACTIVE].pl);
-				m != NULL;
-				m = next) {
-
-				KASSERT(m->queue == PQ_ACTIVE,
-					("contigmalloc1: page %p is not PQ_ACTIVE", m));
-
-				next = TAILQ_NEXT(m, pageq);
-				if (vm_page_sleep_busy(m, TRUE, "vpctw1"))
-					goto again1;
-				vm_page_test_dirty(m);
-				if (m->dirty) {
-					if (m->object->type == OBJT_VNODE) {
-						vn_lock(m->object->handle, LK_EXCLUSIVE | LK_RETRY, curproc);
-						vm_object_page_clean(m->object, 0, 0, OBJPC_SYNC);
-						VOP_UNLOCK(m->object->handle, 0, curproc);
-						goto again1;
-					} else if (m->object->type == OBJT_SWAP ||
-								m->object->type == OBJT_DEFAULT) {
-						vm_pageout_flush(&m, 1, 0);
-						goto again1;
-					}
-				}
-				if ((m->dirty == 0) && (m->busy == 0) && (m->hold_count == 0))
-					vm_page_cache(m);
-			}
-
-			splx(s);
-			continue;
-		}
-		start = i;
-
-		/*
-		 * Check successive pages for contiguous and free.
-		 */
-		for (i = start + 1; i < (start + size / PAGE_SIZE); i++) {
-			int pqtype;
-			pqtype = pga[i].queue - pga[i].pc;
-			if ((VM_PAGE_TO_PHYS(&pga[i]) !=
-			    (VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE)) ||
-			    ((pqtype != PQ_FREE) && (pqtype != PQ_CACHE))) {
-				start++;
-				goto again;
-			}
-		}
-
-		for (i = start; i < (start + size / PAGE_SIZE); i++) {
-			int pqtype;
-			vm_page_t m = &pga[i];
-
-			pqtype = m->queue - m->pc;
-			if (pqtype == PQ_CACHE) {
-				vm_page_busy(m);
-				vm_page_free(m);
-			}
-
-			TAILQ_REMOVE(&vm_page_queues[m->queue].pl, m, pageq);
-			vm_page_queues[m->queue].lcnt--;
-			cnt.v_free_count--;
-			m->valid = VM_PAGE_BITS_ALL;
-			m->flags = 0;
-			KASSERT(m->dirty == 0, ("contigmalloc1: page %p was dirty", m));
-			m->wire_count = 0;
-			m->busy = 0;
-			m->queue = PQ_NONE;
-			m->object = NULL;
-			vm_page_wire(m);
-		}
-
-		/*
-		 * We've found a contiguous chunk that meets are requirements.
-		 * Allocate kernel VM, unfree and assign the physical pages to it and
-		 * return kernel VM pointer.
-		 */
-		tmp_addr = addr = kmem_alloc_pageable(map, size);
-		if (addr == 0) {
-			/*
-			 * XXX We almost never run out of kernel virtual
-			 * space, so we don't make the allocated memory
-			 * above available.
-			 */
-			splx(s);
-			return (NULL);
-		}
-
-		for (i = start; i < (start + size / PAGE_SIZE); i++) {
-			vm_page_t m = &pga[i];
-			vm_page_insert(m, kernel_object,
-				OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS));
-			pmap_kenter(tmp_addr, VM_PAGE_TO_PHYS(m));
-			tmp_addr += PAGE_SIZE;
-		}
-
-		splx(s);
-		return ((void *)addr);
-	}
-	return NULL;
-}
-
-void *
-contigmalloc(
-	unsigned long size,	/* should be size_t here and for malloc() */
-	struct malloc_type *type,
-	int flags,
-	unsigned long low,
-	unsigned long high,
-	unsigned long alignment,
-	unsigned long boundary)
-{
-	void * ret;
-
-	GIANT_REQUIRED;
-	ret = contigmalloc1(size, type, flags, low, high, alignment, boundary,
-			     kernel_map);
-	return (ret);
-
-}
-
-void
-contigfree(void *addr, unsigned long size, struct malloc_type *type)
-{
-	GIANT_REQUIRED;
-	kmem_free(kernel_map, (vm_offset_t)addr, size);
-}
-
-vm_offset_t
-vm_page_alloc_contig(
-	vm_offset_t size,
-	vm_offset_t low,
-	vm_offset_t high,
-	vm_offset_t alignment)
-{
-	vm_offset_t ret;
-
-	GIANT_REQUIRED;
-	ret = ((vm_offset_t)contigmalloc1(size, M_DEVBUF, M_NOWAIT, low, high,
-					  alignment, 0ul, kernel_map));
-	return (ret);
-
-}
-
 #include "opt_ddb.h"
 #ifdef DDB
 #include <sys/kernel.h>
author	dillon <dillon@FreeBSD.org>	2001-07-04 23:27:09 +0000
committer	dillon <dillon@FreeBSD.org>	2001-07-04 23:27:09 +0000
commit	93369f554a43c46419d56436721efe61e4b858c7 (patch)
tree	81260d55ae91eebf4632f0a755612f23148b8fb8 /sys/vm/vm_page.c
parent	62d663b9b937d23cdb2e47513623bf3834ebac8e (diff)
download	FreeBSD-src-93369f554a43c46419d56436721efe61e4b858c7.zip FreeBSD-src-93369f554a43c46419d56436721efe61e4b858c7.tar.gz