summaryrefslogtreecommitdiffstats
path: root/sys/vm/vm_page.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/vm/vm_page.c')
-rw-r--r--sys/vm/vm_page.c460
1 files changed, 47 insertions, 413 deletions
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 2d23371..9a93ee1 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -65,6 +65,39 @@
*/
/*
+ * GENERAL RULES ON VM_PAGE MANIPULATION
+ *
+ * - a pageq mutex is required when adding or removing a page from a
+ * page queue (vm_page_queue[]), regardless of other mutexes or the
+ * busy state of a page.
+ *
+ * - a hash chain mutex is required when associating or disassociating
+ * a page from the VM PAGE CACHE hash table (vm_page_buckets),
+ * regardless of other mutexes or the busy state of a page.
+ *
+ * - either a hash chain mutex OR a busied page is required in order
+ * to modify the page flags. A hash chain mutex must be obtained in
+ * order to busy a page. A page's flags cannot be modified by a
+ * hash chain mutex if the page is marked busy.
+ *
+ * - The object memq mutex is held when inserting or removing
+ * pages from an object (vm_page_insert() or vm_page_remove()). This
+ * is different from the object's main mutex.
+ *
+ * Generally speaking, you have to be aware of side effects when running
+ * vm_page ops. A vm_page_lookup() will return with the hash chain
+ * locked, whether it was able to lookup the page or not. vm_page_free(),
+ * vm_page_cache(), vm_page_activate(), and a number of other routines
+ * will release the hash chain mutex for you. Intermediate manipulation
+ * routines such as vm_page_flag_set() expect the hash chain to be held
+ * on entry and the hash chain will remain held on return.
+ *
+ * pageq scanning can only occur with the pageq in question locked.
+ * We have a known bottleneck with the active queue, but the cache
+ * and free queues are actually arrays already.
+ */
+
+/*
* Resident memory management module.
*/
@@ -86,9 +119,6 @@
#include <vm/vm_pager.h>
#include <vm/vm_extern.h>
-static void vm_page_queue_init __P((void));
-static vm_page_t vm_page_select_cache __P((vm_object_t, vm_pindex_t));
-
/*
* Associated with page of user-allocatable memory is a
* page structure.
@@ -98,35 +128,13 @@ static struct vm_page **vm_page_buckets; /* Array of buckets */
static int vm_page_bucket_count; /* How big is array? */
static int vm_page_hash_mask; /* Mask for hash function */
static volatile int vm_page_bucket_generation;
-
-struct vpgqueues vm_page_queues[PQ_COUNT];
-
-static void
-vm_page_queue_init(void)
-{
- int i;
-
- for (i = 0; i < PQ_L2_SIZE; i++) {
- vm_page_queues[PQ_FREE+i].cnt = &cnt.v_free_count;
- }
- for (i = 0; i < PQ_L2_SIZE; i++) {
- vm_page_queues[PQ_CACHE+i].cnt = &cnt.v_cache_count;
- }
- vm_page_queues[PQ_INACTIVE].cnt = &cnt.v_inactive_count;
- vm_page_queues[PQ_ACTIVE].cnt = &cnt.v_active_count;
-
- for (i = 0; i < PQ_COUNT; i++) {
- TAILQ_INIT(&vm_page_queues[i].pl);
- }
-}
+static struct mtx vm_buckets_mtx[BUCKET_HASH_SIZE];
vm_page_t vm_page_array = 0;
int vm_page_array_size = 0;
long first_page = 0;
int vm_page_zero_count = 0;
-static vm_page_t _vm_page_list_find(int basequeue, int index);
-
/*
* vm_set_page_size:
*
@@ -144,31 +152,6 @@ vm_set_page_size(void)
}
/*
- * vm_add_new_page:
- *
- * Add a new page to the freelist for use by the system.
- * Must be called at splhigh().
- */
-vm_page_t
-vm_add_new_page(vm_offset_t pa)
-{
- vm_page_t m;
-
- GIANT_REQUIRED;
-
- ++cnt.v_page_count;
- ++cnt.v_free_count;
- m = PHYS_TO_VM_PAGE(pa);
- m->phys_addr = pa;
- m->flags = 0;
- m->pc = (pa >> PAGE_SHIFT) & PQ_L2_MASK;
- m->queue = m->pc + PQ_FREE;
- TAILQ_INSERT_TAIL(&vm_page_queues[m->queue].pl, m, pageq);
- vm_page_queues[m->queue].lcnt++;
- return (m);
-}
-
-/*
* vm_page_startup:
*
* Initializes the resident memory module.
@@ -225,7 +208,7 @@ vm_page_startup(vm_offset_t starta, vm_offset_t enda, vm_offset_t vaddr)
* and the inactive queue.
*/
- vm_page_queue_init();
+ vm_pageq_init();
/*
* Allocate (and initialize) the hash table buckets.
@@ -264,6 +247,8 @@ vm_page_startup(vm_offset_t starta, vm_offset_t enda, vm_offset_t vaddr)
*bucket = NULL;
bucket++;
}
+ for (i = 0; i < BUCKET_HASH_SIZE; ++i)
+ mtx_init(&vm_buckets_mtx[i], "vm buckets hash mutexes", MTX_DEF);
/*
* Compute the number of pages of memory that will be available for
@@ -309,7 +294,7 @@ vm_page_startup(vm_offset_t starta, vm_offset_t enda, vm_offset_t vaddr)
else
last_pa = phys_avail[i + 1];
while (pa < last_pa && npages-- > 0) {
- vm_add_new_page(pa);
+ vm_pageq_add_new_page(pa);
pa += PAGE_SIZE;
}
}
@@ -782,132 +767,6 @@ vm_page_rename(vm_page_t m, vm_object_t new_object, vm_pindex_t new_pindex)
}
/*
- * vm_page_unqueue_nowakeup:
- *
- * vm_page_unqueue() without any wakeup
- *
- * This routine must be called at splhigh().
- * This routine may not block.
- */
-
-void
-vm_page_unqueue_nowakeup(vm_page_t m)
-{
- int queue = m->queue;
- struct vpgqueues *pq;
- if (queue != PQ_NONE) {
- pq = &vm_page_queues[queue];
- m->queue = PQ_NONE;
- TAILQ_REMOVE(&pq->pl, m, pageq);
- (*pq->cnt)--;
- pq->lcnt--;
- }
-}
-
-/*
- * vm_page_unqueue:
- *
- * Remove a page from its queue.
- *
- * This routine must be called at splhigh().
- * This routine may not block.
- */
-
-void
-vm_page_unqueue(vm_page_t m)
-{
- int queue = m->queue;
- struct vpgqueues *pq;
-
- GIANT_REQUIRED;
- if (queue != PQ_NONE) {
- m->queue = PQ_NONE;
- pq = &vm_page_queues[queue];
- TAILQ_REMOVE(&pq->pl, m, pageq);
- (*pq->cnt)--;
- pq->lcnt--;
- if ((queue - m->pc) == PQ_CACHE) {
- if (vm_paging_needed())
- pagedaemon_wakeup();
- }
- }
-}
-
-vm_page_t
-vm_page_list_find(int basequeue, int index, boolean_t prefer_zero)
-{
- vm_page_t m;
-
- GIANT_REQUIRED;
-
-#if PQ_L2_SIZE > 1
- if (prefer_zero) {
- m = TAILQ_LAST(&vm_page_queues[basequeue+index].pl, pglist);
- } else {
- m = TAILQ_FIRST(&vm_page_queues[basequeue+index].pl);
- }
- if (m == NULL) {
- m = _vm_page_list_find(basequeue, index);
- }
-#else
- if (prefer_zero) {
- m = TAILQ_LAST(&vm_page_queues[basequeue].pl, pglist);
- } else {
- m = TAILQ_FIRST(&vm_page_queues[basequeue].pl);
- }
-#endif
- return(m);
-}
-
-
-#if PQ_L2_SIZE > 1
-
-/*
- * vm_page_list_find:
- *
- * Find a page on the specified queue with color optimization.
- *
- * The page coloring optimization attempts to locate a page
- * that does not overload other nearby pages in the object in
- * the cpu's L1 or L2 caches. We need this optimization because
- * cpu caches tend to be physical caches, while object spaces tend
- * to be virtual.
- *
- * This routine must be called at splvm().
- * This routine may not block.
- *
- * This routine may only be called from the vm_page_list_find() macro
- * in vm_page.h
- */
-static vm_page_t
-_vm_page_list_find(int basequeue, int index)
-{
- int i;
- vm_page_t m = NULL;
- struct vpgqueues *pq;
-
- GIANT_REQUIRED;
- pq = &vm_page_queues[basequeue];
-
- /*
- * Note that for the first loop, index+i and index-i wind up at the
- * same place. Even though this is not totally optimal, we've already
- * blown it by missing the cache case so we do not care.
- */
-
- for(i = PQ_L2_SIZE / 2; i > 0; --i) {
- if ((m = TAILQ_FIRST(&pq[(index + i) & PQ_L2_MASK].pl)) != NULL)
- break;
-
- if ((m = TAILQ_FIRST(&pq[(index - i) & PQ_L2_MASK].pl)) != NULL)
- break;
- }
- return(m);
-}
-
-#endif
-
-/*
* vm_page_select_cache:
*
* Find a page on the cache queue with color optimization. As pages
@@ -924,7 +783,7 @@ vm_page_select_cache(vm_object_t object, vm_pindex_t pindex)
GIANT_REQUIRED;
while (TRUE) {
- m = vm_page_list_find(
+ m = vm_pageq_find(
PQ_CACHE,
(pindex + object->pg_color) & PQ_L2_MASK,
FALSE
@@ -952,7 +811,7 @@ vm_page_select_free(vm_object_t object, vm_pindex_t pindex, boolean_t prefer_zer
{
vm_page_t m;
- m = vm_page_list_find(
+ m = vm_pageq_find(
PQ_FREE,
(pindex + object->pg_color) & PQ_L2_MASK,
prefer_zero
@@ -1065,7 +924,7 @@ loop:
* Remove from free queue
*/
- vm_page_unqueue_nowakeup(m);
+ vm_pageq_remove_nowakeup(m);
/*
* Initialize structure. Only the PG_ZERO flag is inherited.
@@ -1178,7 +1037,7 @@ vm_page_activate(vm_page_t m)
if ((m->queue - m->pc) == PQ_CACHE)
cnt.v_reactivated++;
- vm_page_unqueue(m);
+ vm_pageq_remove(m);
if (m->wire_count == 0 && (m->flags & PG_UNMANAGED) == 0) {
m->queue = PQ_ACTIVE;
@@ -1269,7 +1128,7 @@ vm_page_free_toq(vm_page_t m)
* appropriate free queue.
*/
- vm_page_unqueue_nowakeup(m);
+ vm_pageq_remove_nowakeup(m);
vm_page_remove(m);
/*
@@ -1369,7 +1228,7 @@ vm_page_unmanage(vm_page_t m)
s = splvm();
if ((m->flags & PG_UNMANAGED) == 0) {
if (m->wire_count == 0)
- vm_page_unqueue(m);
+ vm_pageq_remove(m);
}
vm_page_flag_set(m, PG_UNMANAGED);
splx(s);
@@ -1398,7 +1257,7 @@ vm_page_wire(vm_page_t m)
s = splvm();
if (m->wire_count == 0) {
if ((m->flags & PG_UNMANAGED) == 0)
- vm_page_unqueue(m);
+ vm_pageq_remove(m);
cnt.v_wire_count++;
}
m->wire_count++;
@@ -1494,7 +1353,7 @@ _vm_page_deactivate(vm_page_t m, int athead)
if ((m->queue - m->pc) == PQ_CACHE)
cnt.v_reactivated++;
vm_page_flag_clear(m, PG_WINATCFLS);
- vm_page_unqueue(m);
+ vm_pageq_remove(m);
if (athead)
TAILQ_INSERT_HEAD(&vm_page_queues[PQ_INACTIVE].pl, m, pageq);
else
@@ -1586,7 +1445,7 @@ vm_page_cache(vm_page_t m)
(long)m->pindex);
}
s = splvm();
- vm_page_unqueue_nowakeup(m);
+ vm_pageq_remove_nowakeup(m);
m->queue = PQ_CACHE + m->pc;
vm_page_queues[m->queue].lcnt++;
TAILQ_INSERT_TAIL(&vm_page_queues[m->queue].pl, m, pageq);
@@ -1928,231 +1787,6 @@ vm_page_test_dirty(vm_page_t m)
}
}
-/*
- * This interface is for merging with malloc() someday.
- * Even if we never implement compaction so that contiguous allocation
- * works after initialization time, malloc()'s data structures are good
- * for statistics and for allocations of less than a page.
- */
-void *
-contigmalloc1(
- unsigned long size, /* should be size_t here and for malloc() */
- struct malloc_type *type,
- int flags,
- unsigned long low,
- unsigned long high,
- unsigned long alignment,
- unsigned long boundary,
- vm_map_t map)
-{
- int i, s, start;
- vm_offset_t addr, phys, tmp_addr;
- int pass;
- vm_page_t pga = vm_page_array;
-
- size = round_page(size);
- if (size == 0)
- panic("contigmalloc1: size must not be 0");
- if ((alignment & (alignment - 1)) != 0)
- panic("contigmalloc1: alignment must be a power of 2");
- if ((boundary & (boundary - 1)) != 0)
- panic("contigmalloc1: boundary must be a power of 2");
-
- start = 0;
- for (pass = 0; pass <= 1; pass++) {
- s = splvm();
-again:
- /*
- * Find first page in array that is free, within range, aligned, and
- * such that the boundary won't be crossed.
- */
- for (i = start; i < cnt.v_page_count; i++) {
- int pqtype;
- phys = VM_PAGE_TO_PHYS(&pga[i]);
- pqtype = pga[i].queue - pga[i].pc;
- if (((pqtype == PQ_FREE) || (pqtype == PQ_CACHE)) &&
- (phys >= low) && (phys < high) &&
- ((phys & (alignment - 1)) == 0) &&
- (((phys ^ (phys + size - 1)) & ~(boundary - 1)) == 0))
- break;
- }
-
- /*
- * If the above failed or we will exceed the upper bound, fail.
- */
- if ((i == cnt.v_page_count) ||
- ((VM_PAGE_TO_PHYS(&pga[i]) + size) > high)) {
- vm_page_t m, next;
-
-again1:
- for (m = TAILQ_FIRST(&vm_page_queues[PQ_INACTIVE].pl);
- m != NULL;
- m = next) {
-
- KASSERT(m->queue == PQ_INACTIVE,
- ("contigmalloc1: page %p is not PQ_INACTIVE", m));
-
- next = TAILQ_NEXT(m, pageq);
- if (vm_page_sleep_busy(m, TRUE, "vpctw0"))
- goto again1;
- vm_page_test_dirty(m);
- if (m->dirty) {
- if (m->object->type == OBJT_VNODE) {
- vn_lock(m->object->handle, LK_EXCLUSIVE | LK_RETRY, curproc);
- vm_object_page_clean(m->object, 0, 0, OBJPC_SYNC);
- VOP_UNLOCK(m->object->handle, 0, curproc);
- goto again1;
- } else if (m->object->type == OBJT_SWAP ||
- m->object->type == OBJT_DEFAULT) {
- vm_pageout_flush(&m, 1, 0);
- goto again1;
- }
- }
- if ((m->dirty == 0) && (m->busy == 0) && (m->hold_count == 0))
- vm_page_cache(m);
- }
-
- for (m = TAILQ_FIRST(&vm_page_queues[PQ_ACTIVE].pl);
- m != NULL;
- m = next) {
-
- KASSERT(m->queue == PQ_ACTIVE,
- ("contigmalloc1: page %p is not PQ_ACTIVE", m));
-
- next = TAILQ_NEXT(m, pageq);
- if (vm_page_sleep_busy(m, TRUE, "vpctw1"))
- goto again1;
- vm_page_test_dirty(m);
- if (m->dirty) {
- if (m->object->type == OBJT_VNODE) {
- vn_lock(m->object->handle, LK_EXCLUSIVE | LK_RETRY, curproc);
- vm_object_page_clean(m->object, 0, 0, OBJPC_SYNC);
- VOP_UNLOCK(m->object->handle, 0, curproc);
- goto again1;
- } else if (m->object->type == OBJT_SWAP ||
- m->object->type == OBJT_DEFAULT) {
- vm_pageout_flush(&m, 1, 0);
- goto again1;
- }
- }
- if ((m->dirty == 0) && (m->busy == 0) && (m->hold_count == 0))
- vm_page_cache(m);
- }
-
- splx(s);
- continue;
- }
- start = i;
-
- /*
- * Check successive pages for contiguous and free.
- */
- for (i = start + 1; i < (start + size / PAGE_SIZE); i++) {
- int pqtype;
- pqtype = pga[i].queue - pga[i].pc;
- if ((VM_PAGE_TO_PHYS(&pga[i]) !=
- (VM_PAGE_TO_PHYS(&pga[i - 1]) + PAGE_SIZE)) ||
- ((pqtype != PQ_FREE) && (pqtype != PQ_CACHE))) {
- start++;
- goto again;
- }
- }
-
- for (i = start; i < (start + size / PAGE_SIZE); i++) {
- int pqtype;
- vm_page_t m = &pga[i];
-
- pqtype = m->queue - m->pc;
- if (pqtype == PQ_CACHE) {
- vm_page_busy(m);
- vm_page_free(m);
- }
-
- TAILQ_REMOVE(&vm_page_queues[m->queue].pl, m, pageq);
- vm_page_queues[m->queue].lcnt--;
- cnt.v_free_count--;
- m->valid = VM_PAGE_BITS_ALL;
- m->flags = 0;
- KASSERT(m->dirty == 0, ("contigmalloc1: page %p was dirty", m));
- m->wire_count = 0;
- m->busy = 0;
- m->queue = PQ_NONE;
- m->object = NULL;
- vm_page_wire(m);
- }
-
- /*
- * We've found a contiguous chunk that meets are requirements.
- * Allocate kernel VM, unfree and assign the physical pages to it and
- * return kernel VM pointer.
- */
- tmp_addr = addr = kmem_alloc_pageable(map, size);
- if (addr == 0) {
- /*
- * XXX We almost never run out of kernel virtual
- * space, so we don't make the allocated memory
- * above available.
- */
- splx(s);
- return (NULL);
- }
-
- for (i = start; i < (start + size / PAGE_SIZE); i++) {
- vm_page_t m = &pga[i];
- vm_page_insert(m, kernel_object,
- OFF_TO_IDX(tmp_addr - VM_MIN_KERNEL_ADDRESS));
- pmap_kenter(tmp_addr, VM_PAGE_TO_PHYS(m));
- tmp_addr += PAGE_SIZE;
- }
-
- splx(s);
- return ((void *)addr);
- }
- return NULL;
-}
-
-void *
-contigmalloc(
- unsigned long size, /* should be size_t here and for malloc() */
- struct malloc_type *type,
- int flags,
- unsigned long low,
- unsigned long high,
- unsigned long alignment,
- unsigned long boundary)
-{
- void * ret;
-
- GIANT_REQUIRED;
- ret = contigmalloc1(size, type, flags, low, high, alignment, boundary,
- kernel_map);
- return (ret);
-
-}
-
-void
-contigfree(void *addr, unsigned long size, struct malloc_type *type)
-{
- GIANT_REQUIRED;
- kmem_free(kernel_map, (vm_offset_t)addr, size);
-}
-
-vm_offset_t
-vm_page_alloc_contig(
- vm_offset_t size,
- vm_offset_t low,
- vm_offset_t high,
- vm_offset_t alignment)
-{
- vm_offset_t ret;
-
- GIANT_REQUIRED;
- ret = ((vm_offset_t)contigmalloc1(size, M_DEVBUF, M_NOWAIT, low, high,
- alignment, 0ul, kernel_map));
- return (ret);
-
-}
-
#include "opt_ddb.h"
#ifdef DDB
#include <sys/kernel.h>
OpenPOWER on IntegriCloud