diff options
author | attilio <attilio@FreeBSD.org> | 2012-08-03 15:58:05 +0000 |
---|---|---|
committer | attilio <attilio@FreeBSD.org> | 2012-08-03 15:58:05 +0000 |
commit | c52a057b1917918a0faad18ccf6666c4cdcb5c8c (patch) | |
tree | 21862df57c9f0eef84636c11cb5c3ecf3d578893 /sys/vm | |
parent | 675a214708d3e1fb79cbcf6bd8a46e0f1cc4823d (diff) | |
parent | 8916b8f9033faa7d822c84a2bdd638979f11fab1 (diff) | |
download | FreeBSD-src-c52a057b1917918a0faad18ccf6666c4cdcb5c8c.zip FreeBSD-src-c52a057b1917918a0faad18ccf6666c4cdcb5c8c.tar.gz |
MFC
Diffstat (limited to 'sys/vm')
-rw-r--r-- | sys/vm/memguard.c | 26 | ||||
-rw-r--r-- | sys/vm/memguard.h | 2 | ||||
-rw-r--r-- | sys/vm/vm_contig.c | 351 | ||||
-rw-r--r-- | sys/vm/vm_kern.c | 142 | ||||
-rw-r--r-- | sys/vm/vm_map.h | 4 | ||||
-rw-r--r-- | sys/vm/vm_page.c | 59 | ||||
-rw-r--r-- | sys/vm/vm_page.h | 87 | ||||
-rw-r--r-- | sys/vm/vm_pageout.c | 208 | ||||
-rw-r--r-- | sys/vm/vm_pageout.h | 4 | ||||
-rw-r--r-- | sys/vm/vm_reserv.c | 2 |
10 files changed, 416 insertions, 469 deletions
diff --git a/sys/vm/memguard.c b/sys/vm/memguard.c index 5a690e7..b1740c3 100644 --- a/sys/vm/memguard.c +++ b/sys/vm/memguard.c @@ -159,16 +159,18 @@ SYSCTL_ULONG(_vm_memguard, OID_AUTO, frequency_hits, CTLFLAG_RD, * the kmem_map. The memguard memory will be a submap. */ unsigned long -memguard_fudge(unsigned long km_size, unsigned long km_max) +memguard_fudge(unsigned long km_size, const struct vm_map *parent_map) { - u_long mem_pgs = cnt.v_page_count; + u_long mem_pgs, parent_size; vm_memguard_divisor = 10; TUNABLE_INT_FETCH("vm.memguard.divisor", &vm_memguard_divisor); + parent_size = vm_map_max(parent_map) - vm_map_min(parent_map) + + PAGE_SIZE; /* Pick a conservative value if provided value sucks. */ if ((vm_memguard_divisor <= 0) || - ((km_size / vm_memguard_divisor) == 0)) + ((parent_size / vm_memguard_divisor) == 0)) vm_memguard_divisor = 10; /* * Limit consumption of physical pages to @@ -177,21 +179,19 @@ memguard_fudge(unsigned long km_size, unsigned long km_max) * This prevents memguard's page promotions from completely * using up memory, since most malloc(9) calls are sub-page. */ + mem_pgs = cnt.v_page_count; memguard_physlimit = (mem_pgs / vm_memguard_divisor) * PAGE_SIZE; /* * We want as much KVA as we can take safely. Use at most our - * allotted fraction of kmem_max. Limit this to twice the - * physical memory to avoid using too much memory as pagetable - * pages. + * allotted fraction of the parent map's size. Limit this to + * twice the physical memory to avoid using too much memory as + * pagetable pages (size must be multiple of PAGE_SIZE). */ - memguard_mapsize = km_max / vm_memguard_divisor; - /* size must be multiple of PAGE_SIZE */ - memguard_mapsize = round_page(memguard_mapsize); - if (memguard_mapsize == 0 || - memguard_mapsize / (2 * PAGE_SIZE) > mem_pgs) + memguard_mapsize = round_page(parent_size / vm_memguard_divisor); + if (memguard_mapsize / (2 * PAGE_SIZE) > mem_pgs) memguard_mapsize = mem_pgs * 2 * PAGE_SIZE; - if (km_max > 0 && km_size + memguard_mapsize > km_max) - return (km_max); + if (km_size + memguard_mapsize > parent_size) + memguard_mapsize = 0; return (km_size + memguard_mapsize); } diff --git a/sys/vm/memguard.h b/sys/vm/memguard.h index 335e237..9ec4ffd 100644 --- a/sys/vm/memguard.h +++ b/sys/vm/memguard.h @@ -35,7 +35,7 @@ struct malloc_type; struct vm_map; #ifdef DEBUG_MEMGUARD -unsigned long memguard_fudge(unsigned long, unsigned long); +unsigned long memguard_fudge(unsigned long, const struct vm_map *); void memguard_init(struct vm_map *); void *memguard_alloc(unsigned long, int); void *memguard_realloc(void *, unsigned long, struct malloc_type *, int); diff --git a/sys/vm/vm_contig.c b/sys/vm/vm_contig.c deleted file mode 100644 index e29aaf7..0000000 --- a/sys/vm/vm_contig.c +++ /dev/null @@ -1,351 +0,0 @@ -/*- - * Copyright (c) 1991 Regents of the University of California. - * All rights reserved. - * - * This code is derived from software contributed to Berkeley by - * The Mach Operating System project at Carnegie-Mellon University. - * - * Redistribution and use in source and binary forms, with or without - * modification, are permitted provided that the following conditions - * are met: - * 1. Redistributions of source code must retain the above copyright - * notice, this list of conditions and the following disclaimer. - * 2. Redistributions in binary form must reproduce the above copyright - * notice, this list of conditions and the following disclaimer in the - * documentation and/or other materials provided with the distribution. - * 4. Neither the name of the University nor the names of its contributors - * may be used to endorse or promote products derived from this software - * without specific prior written permission. - * - * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND - * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE - * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE - * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE - * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL - * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS - * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) - * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT - * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY - * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF - * SUCH DAMAGE. - * - * from: @(#)vm_page.c 7.4 (Berkeley) 5/7/91 - */ - -/*- - * Copyright (c) 1987, 1990 Carnegie-Mellon University. - * All rights reserved. - * - * Authors: Avadis Tevanian, Jr., Michael Wayne Young - * - * Permission to use, copy, modify and distribute this software and - * its documentation is hereby granted, provided that both the copyright - * notice and this permission notice appear in all copies of the - * software, derivative works or modified versions, and any portions - * thereof, and that both notices appear in supporting documentation. - * - * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS "AS IS" - * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND - * FOR ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. - * - * Carnegie Mellon requests users of this software to return to - * - * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU - * School of Computer Science - * Carnegie Mellon University - * Pittsburgh PA 15213-3890 - * - * any improvements or extensions that they make and grant Carnegie the - * rights to redistribute these changes. - */ - -#include <sys/cdefs.h> -__FBSDID("$FreeBSD$"); - -#include <sys/param.h> -#include <sys/systm.h> -#include <sys/lock.h> -#include <sys/mount.h> -#include <sys/mutex.h> -#include <sys/proc.h> -#include <sys/kernel.h> -#include <sys/sysctl.h> -#include <sys/vmmeter.h> -#include <sys/vnode.h> - -#include <vm/vm.h> -#include <vm/vm_param.h> -#include <vm/vm_kern.h> -#include <vm/pmap.h> -#include <vm/vm_map.h> -#include <vm/vm_object.h> -#include <vm/vm_page.h> -#include <vm/vm_pageout.h> -#include <vm/vm_pager.h> -#include <vm/vm_extern.h> - -static int -vm_contig_launder_page(vm_page_t m, vm_page_t *next) -{ - vm_object_t object; - vm_page_t m_tmp; - struct vnode *vp; - struct mount *mp; - int vfslocked; - - mtx_assert(&vm_page_queue_mtx, MA_OWNED); - vm_page_lock_assert(m, MA_OWNED); - object = m->object; - if (!VM_OBJECT_TRYLOCK(object) && - (!vm_pageout_fallback_object_lock(m, next) || m->hold_count != 0)) { - vm_page_unlock(m); - VM_OBJECT_UNLOCK(object); - return (EAGAIN); - } - if (vm_page_sleep_if_busy(m, TRUE, "vpctw0")) { - VM_OBJECT_UNLOCK(object); - vm_page_lock_queues(); - return (EBUSY); - } - vm_page_test_dirty(m); - if (m->dirty == 0) - pmap_remove_all(m); - if (m->dirty != 0) { - vm_page_unlock(m); - if ((object->flags & OBJ_DEAD) != 0) { - VM_OBJECT_UNLOCK(object); - return (EAGAIN); - } - if (object->type == OBJT_VNODE) { - vm_page_unlock_queues(); - vp = object->handle; - vm_object_reference_locked(object); - VM_OBJECT_UNLOCK(object); - (void) vn_start_write(vp, &mp, V_WAIT); - vfslocked = VFS_LOCK_GIANT(vp->v_mount); - vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); - VM_OBJECT_LOCK(object); - vm_object_page_clean(object, 0, 0, OBJPC_SYNC); - VM_OBJECT_UNLOCK(object); - VOP_UNLOCK(vp, 0); - VFS_UNLOCK_GIANT(vfslocked); - vm_object_deallocate(object); - vn_finished_write(mp); - vm_page_lock_queues(); - return (0); - } else if (object->type == OBJT_SWAP || - object->type == OBJT_DEFAULT) { - vm_page_unlock_queues(); - m_tmp = m; - vm_pageout_flush(&m_tmp, 1, VM_PAGER_PUT_SYNC, 0, - NULL, NULL); - VM_OBJECT_UNLOCK(object); - vm_page_lock_queues(); - return (0); - } - } else { - vm_page_cache(m); - vm_page_unlock(m); - } - VM_OBJECT_UNLOCK(object); - return (0); -} - -static int -vm_contig_launder(int queue, vm_paddr_t low, vm_paddr_t high) -{ - vm_page_t m, next; - vm_paddr_t pa; - int error; - - TAILQ_FOREACH_SAFE(m, &vm_page_queues[queue].pl, pageq, next) { - - /* Skip marker pages */ - if ((m->flags & PG_MARKER) != 0) - continue; - - pa = VM_PAGE_TO_PHYS(m); - if (pa < low || pa + PAGE_SIZE > high) - continue; - - if (!vm_pageout_page_lock(m, &next) || m->hold_count != 0) { - vm_page_unlock(m); - continue; - } - KASSERT(m->queue == queue, - ("vm_contig_launder: page %p's queue is not %d", m, queue)); - error = vm_contig_launder_page(m, &next); - vm_page_lock_assert(m, MA_NOTOWNED); - if (error == 0) - return (TRUE); - if (error == EBUSY) - return (FALSE); - } - return (FALSE); -} - -/* - * Increase the number of cached pages. - */ -void -vm_contig_grow_cache(int tries, vm_paddr_t low, vm_paddr_t high) -{ - int actl, actmax, inactl, inactmax; - - vm_page_lock_queues(); - inactl = 0; - inactmax = tries < 1 ? 0 : cnt.v_inactive_count; - actl = 0; - actmax = tries < 2 ? 0 : cnt.v_active_count; -again: - if (inactl < inactmax && vm_contig_launder(PQ_INACTIVE, low, high)) { - inactl++; - goto again; - } - if (actl < actmax && vm_contig_launder(PQ_ACTIVE, low, high)) { - actl++; - goto again; - } - vm_page_unlock_queues(); -} - -/* - * Allocates a region from the kernel address map and pages within the - * specified physical address range to the kernel object, creates a wired - * mapping from the region to these pages, and returns the region's starting - * virtual address. The allocated pages are not necessarily physically - * contiguous. If M_ZERO is specified through the given flags, then the pages - * are zeroed before they are mapped. - */ -vm_offset_t -kmem_alloc_attr(vm_map_t map, vm_size_t size, int flags, vm_paddr_t low, - vm_paddr_t high, vm_memattr_t memattr) -{ - vm_object_t object = kernel_object; - vm_offset_t addr; - vm_ooffset_t end_offset, offset; - vm_page_t m; - int pflags, tries; - - size = round_page(size); - vm_map_lock(map); - if (vm_map_findspace(map, vm_map_min(map), size, &addr)) { - vm_map_unlock(map); - return (0); - } - offset = addr - VM_MIN_KERNEL_ADDRESS; - vm_object_reference(object); - vm_map_insert(map, object, offset, addr, addr + size, VM_PROT_ALL, - VM_PROT_ALL, 0); - if ((flags & (M_NOWAIT | M_USE_RESERVE)) == M_NOWAIT) - pflags = VM_ALLOC_INTERRUPT | VM_ALLOC_NOBUSY; - else - pflags = VM_ALLOC_SYSTEM | VM_ALLOC_NOBUSY; - if (flags & M_ZERO) - pflags |= VM_ALLOC_ZERO; - VM_OBJECT_LOCK(object); - end_offset = offset + size; - for (; offset < end_offset; offset += PAGE_SIZE) { - tries = 0; -retry: - m = vm_page_alloc_contig(object, OFF_TO_IDX(offset), pflags, 1, - low, high, PAGE_SIZE, 0, memattr); - if (m == NULL) { - VM_OBJECT_UNLOCK(object); - if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) { - vm_map_unlock(map); - vm_contig_grow_cache(tries, low, high); - vm_map_lock(map); - VM_OBJECT_LOCK(object); - tries++; - goto retry; - } - /* - * Since the pages that were allocated by any previous - * iterations of this loop are not busy, they can be - * freed by vm_object_page_remove(), which is called - * by vm_map_delete(). - */ - vm_map_delete(map, addr, addr + size); - vm_map_unlock(map); - return (0); - } - if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0) - pmap_zero_page(m); - m->valid = VM_PAGE_BITS_ALL; - } - VM_OBJECT_UNLOCK(object); - vm_map_unlock(map); - vm_map_wire(map, addr, addr + size, VM_MAP_WIRE_SYSTEM | - VM_MAP_WIRE_NOHOLES); - return (addr); -} - -/* - * Allocates a region from the kernel address map, inserts the - * given physically contiguous pages into the kernel object, - * creates a wired mapping from the region to the pages, and - * returns the region's starting virtual address. If M_ZERO is - * specified through the given flags, then the pages are zeroed - * before they are mapped. - */ -vm_offset_t -kmem_alloc_contig(vm_map_t map, vm_size_t size, int flags, vm_paddr_t low, - vm_paddr_t high, u_long alignment, vm_paddr_t boundary, - vm_memattr_t memattr) -{ - vm_object_t object = kernel_object; - vm_offset_t addr; - vm_ooffset_t offset; - vm_page_t end_m, m; - int pflags, tries; - - size = round_page(size); - vm_map_lock(map); - if (vm_map_findspace(map, vm_map_min(map), size, &addr)) { - vm_map_unlock(map); - return (0); - } - offset = addr - VM_MIN_KERNEL_ADDRESS; - vm_object_reference(object); - vm_map_insert(map, object, offset, addr, addr + size, VM_PROT_ALL, - VM_PROT_ALL, 0); - if ((flags & (M_NOWAIT | M_USE_RESERVE)) == M_NOWAIT) - pflags = VM_ALLOC_INTERRUPT | VM_ALLOC_NOBUSY; - else - pflags = VM_ALLOC_SYSTEM | VM_ALLOC_NOBUSY; - if (flags & M_ZERO) - pflags |= VM_ALLOC_ZERO; - if (flags & M_NODUMP) - pflags |= VM_ALLOC_NODUMP; - VM_OBJECT_LOCK(object); - tries = 0; -retry: - m = vm_page_alloc_contig(object, OFF_TO_IDX(offset), pflags, - atop(size), low, high, alignment, boundary, memattr); - if (m == NULL) { - VM_OBJECT_UNLOCK(object); - if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) { - vm_map_unlock(map); - vm_contig_grow_cache(tries, low, high); - vm_map_lock(map); - VM_OBJECT_LOCK(object); - tries++; - goto retry; - } - vm_map_delete(map, addr, addr + size); - vm_map_unlock(map); - return (0); - } - end_m = m + atop(size); - for (; m < end_m; m++) { - if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0) - pmap_zero_page(m); - m->valid = VM_PAGE_BITS_ALL; - } - VM_OBJECT_UNLOCK(object); - vm_map_unlock(map); - vm_map_wire(map, addr, addr + size, VM_MAP_WIRE_SYSTEM | - VM_MAP_WIRE_NOHOLES); - return (addr); -} diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c index 5e157a6..46e7f1c 100644 --- a/sys/vm/vm_kern.c +++ b/sys/vm/vm_kern.c @@ -195,6 +195,148 @@ kmem_alloc(map, size) } /* + * Allocates a region from the kernel address map and physical pages + * within the specified address range to the kernel object. Creates a + * wired mapping from this region to these pages, and returns the + * region's starting virtual address. The allocated pages are not + * necessarily physically contiguous. If M_ZERO is specified through the + * given flags, then the pages are zeroed before they are mapped. + */ +vm_offset_t +kmem_alloc_attr(vm_map_t map, vm_size_t size, int flags, vm_paddr_t low, + vm_paddr_t high, vm_memattr_t memattr) +{ + vm_object_t object = kernel_object; + vm_offset_t addr; + vm_ooffset_t end_offset, offset; + vm_page_t m; + int pflags, tries; + + size = round_page(size); + vm_map_lock(map); + if (vm_map_findspace(map, vm_map_min(map), size, &addr)) { + vm_map_unlock(map); + return (0); + } + offset = addr - VM_MIN_KERNEL_ADDRESS; + vm_object_reference(object); + vm_map_insert(map, object, offset, addr, addr + size, VM_PROT_ALL, + VM_PROT_ALL, 0); + if ((flags & (M_NOWAIT | M_USE_RESERVE)) == M_NOWAIT) + pflags = VM_ALLOC_INTERRUPT | VM_ALLOC_NOBUSY; + else + pflags = VM_ALLOC_SYSTEM | VM_ALLOC_NOBUSY; + if (flags & M_ZERO) + pflags |= VM_ALLOC_ZERO; + VM_OBJECT_LOCK(object); + end_offset = offset + size; + for (; offset < end_offset; offset += PAGE_SIZE) { + tries = 0; +retry: + m = vm_page_alloc_contig(object, OFF_TO_IDX(offset), pflags, 1, + low, high, PAGE_SIZE, 0, memattr); + if (m == NULL) { + VM_OBJECT_UNLOCK(object); + if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) { + vm_map_unlock(map); + vm_pageout_grow_cache(tries, low, high); + vm_map_lock(map); + VM_OBJECT_LOCK(object); + tries++; + goto retry; + } + + /* + * Since the pages that were allocated by any previous + * iterations of this loop are not busy, they can be + * freed by vm_object_page_remove(), which is called + * by vm_map_delete(). + */ + vm_map_delete(map, addr, addr + size); + vm_map_unlock(map); + return (0); + } + if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0) + pmap_zero_page(m); + m->valid = VM_PAGE_BITS_ALL; + } + VM_OBJECT_UNLOCK(object); + vm_map_unlock(map); + vm_map_wire(map, addr, addr + size, VM_MAP_WIRE_SYSTEM | + VM_MAP_WIRE_NOHOLES); + return (addr); +} + +/* + * Allocates a region from the kernel address map and physically + * contiguous pages within the specified address range to the kernel + * object. Creates a wired mapping from this region to these pages, and + * returns the region's starting virtual address. If M_ZERO is specified + * through the given flags, then the pages are zeroed before they are + * mapped. + */ +vm_offset_t +kmem_alloc_contig(vm_map_t map, vm_size_t size, int flags, vm_paddr_t low, + vm_paddr_t high, u_long alignment, vm_paddr_t boundary, + vm_memattr_t memattr) +{ + vm_object_t object = kernel_object; + vm_offset_t addr; + vm_ooffset_t offset; + vm_page_t end_m, m; + int pflags, tries; + + size = round_page(size); + vm_map_lock(map); + if (vm_map_findspace(map, vm_map_min(map), size, &addr)) { + vm_map_unlock(map); + return (0); + } + offset = addr - VM_MIN_KERNEL_ADDRESS; + vm_object_reference(object); + vm_map_insert(map, object, offset, addr, addr + size, VM_PROT_ALL, + VM_PROT_ALL, 0); + if ((flags & (M_NOWAIT | M_USE_RESERVE)) == M_NOWAIT) + pflags = VM_ALLOC_INTERRUPT | VM_ALLOC_NOBUSY; + else + pflags = VM_ALLOC_SYSTEM | VM_ALLOC_NOBUSY; + if (flags & M_ZERO) + pflags |= VM_ALLOC_ZERO; + if (flags & M_NODUMP) + pflags |= VM_ALLOC_NODUMP; + VM_OBJECT_LOCK(object); + tries = 0; +retry: + m = vm_page_alloc_contig(object, OFF_TO_IDX(offset), pflags, + atop(size), low, high, alignment, boundary, memattr); + if (m == NULL) { + VM_OBJECT_UNLOCK(object); + if (tries < ((flags & M_NOWAIT) != 0 ? 1 : 3)) { + vm_map_unlock(map); + vm_pageout_grow_cache(tries, low, high); + vm_map_lock(map); + VM_OBJECT_LOCK(object); + tries++; + goto retry; + } + vm_map_delete(map, addr, addr + size); + vm_map_unlock(map); + return (0); + } + end_m = m + atop(size); + for (; m < end_m; m++) { + if ((flags & M_ZERO) && (m->flags & PG_ZERO) == 0) + pmap_zero_page(m); + m->valid = VM_PAGE_BITS_ALL; + } + VM_OBJECT_UNLOCK(object); + vm_map_unlock(map); + vm_map_wire(map, addr, addr + size, VM_MAP_WIRE_SYSTEM | + VM_MAP_WIRE_NOHOLES); + return (addr); +} + +/* * kmem_free: * * Release a region of kernel virtual memory allocated diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h index 056eac5..b3b1ad4 100644 --- a/sys/vm/vm_map.h +++ b/sys/vm/vm_map.h @@ -200,13 +200,13 @@ struct vm_map { #ifdef _KERNEL static __inline vm_offset_t -vm_map_max(vm_map_t map) +vm_map_max(const struct vm_map *map) { return (map->max_offset); } static __inline vm_offset_t -vm_map_min(vm_map_t map) +vm_map_min(const struct vm_map *map) { return (map->min_offset); } diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c index ec96135..ccadd8d 100644 --- a/sys/vm/vm_page.c +++ b/sys/vm/vm_page.c @@ -451,63 +451,6 @@ vm_page_startup(vm_offset_t vaddr) return (vaddr); } - -CTASSERT(offsetof(struct vm_page, aflags) % sizeof(uint32_t) == 0); - -void -vm_page_aflag_set(vm_page_t m, uint8_t bits) -{ - uint32_t *addr, val; - - /* - * The PGA_WRITEABLE flag can only be set if the page is managed and - * VPO_BUSY. Currently, this flag is only set by pmap_enter(). - */ - KASSERT((bits & PGA_WRITEABLE) == 0 || - (m->oflags & (VPO_UNMANAGED | VPO_BUSY)) == VPO_BUSY, - ("PGA_WRITEABLE and !VPO_BUSY")); - - /* - * We want to use atomic updates for m->aflags, which is a - * byte wide. Not all architectures provide atomic operations - * on the single-byte destination. Punt and access the whole - * 4-byte word with an atomic update. Parallel non-atomic - * updates to the fields included in the update by proximity - * are handled properly by atomics. - */ - addr = (void *)&m->aflags; - MPASS(((uintptr_t)addr & (sizeof(uint32_t) - 1)) == 0); - val = bits; -#if BYTE_ORDER == BIG_ENDIAN - val <<= 24; -#endif - atomic_set_32(addr, val); -} - -void -vm_page_aflag_clear(vm_page_t m, uint8_t bits) -{ - uint32_t *addr, val; - - /* - * The PGA_REFERENCED flag can only be cleared if the object - * containing the page is locked. - */ - KASSERT((bits & PGA_REFERENCED) == 0 || VM_OBJECT_LOCKED(m->object), - ("PGA_REFERENCED and !VM_OBJECT_LOCKED")); - - /* - * See the comment in vm_page_aflag_set(). - */ - addr = (void *)&m->aflags; - MPASS(((uintptr_t)addr & (sizeof(uint32_t) - 1)) == 0); - val = bits; -#if BYTE_ORDER == BIG_ENDIAN - val <<= 24; -#endif - atomic_clear_32(addr, val); -} - void vm_page_reference(vm_page_t m) { @@ -1480,7 +1423,7 @@ retry: cpindex = pindex; for (m = m_ret; m < &m_ret[npages]; m++) { m->aflags = 0; - m->flags &= flags; + m->flags = (m->flags | PG_NODUMP) & flags; if ((req & VM_ALLOC_WIRED) != 0) m->wire_count = 1; /* Unmanaged pages don't use "act_count". */ diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h index da7fd89..e95f173 100644 --- a/sys/vm/vm_page.h +++ b/sys/vm/vm_page.h @@ -237,13 +237,14 @@ extern struct vpglocks pa_lock[]; #define vm_page_queue_free_mtx vm_page_queue_free_lock.data /* - * These are the flags defined for vm_page. - * - * aflags are updated by atomic accesses. Use the vm_page_aflag_set() - * and vm_page_aflag_clear() functions to set and clear the flags. + * The vm_page's aflags are updated using atomic operations. To set or clear + * these flags, the functions vm_page_aflag_set() and vm_page_aflag_clear() + * must be used. Neither these flags nor these functions are part of the KBI. * * PGA_REFERENCED may be cleared only if the object containing the page is - * locked. It is set by both the MI and MD VM layers. + * locked. It is set by both the MI and MD VM layers. However, kernel + * loadable modules should not directly set this flag. They should call + * vm_page_reference() instead. * * PGA_WRITEABLE is set exclusively on managed pages by pmap_enter(). When it * does so, the page must be VPO_BUSY. The MI VM layer must never access this @@ -279,8 +280,12 @@ extern struct vpglocks pa_lock[]; #ifdef _KERNEL +#include <sys/systm.h> + #include <vm/vm_param.h> +#include <machine/atomic.h> + /* * Each pageable resident page falls into one of five lists: * @@ -308,7 +313,6 @@ extern struct vpglocks pa_lock[]; * */ -struct vnode; extern int vm_page_zero_count; extern vm_page_t vm_page_array; /* First resident page in table */ @@ -348,8 +352,6 @@ extern struct vpglocks vm_page_queue_lock; #define VM_ALLOC_COUNT_SHIFT 16 #define VM_ALLOC_COUNT(count) ((count) << VM_ALLOC_COUNT_SHIFT) -void vm_page_aflag_set(vm_page_t m, uint8_t bits); -void vm_page_aflag_clear(vm_page_t m, uint8_t bits); void vm_page_busy(vm_page_t m); void vm_page_flash(vm_page_t m); void vm_page_io_start(vm_page_t m); @@ -425,6 +427,75 @@ void vm_page_object_lock_assert(vm_page_t m); #endif /* + * We want to use atomic updates for the aflags field, which is 8 bits wide. + * However, not all architectures support atomic operations on 8-bit + * destinations. In order that we can easily use a 32-bit operation, we + * require that the aflags field be 32-bit aligned. + */ +CTASSERT(offsetof(struct vm_page, aflags) % sizeof(uint32_t) == 0); + +/* + * Clear the given bits in the specified page. + */ +static inline void +vm_page_aflag_clear(vm_page_t m, uint8_t bits) +{ + uint32_t *addr, val; + + /* + * The PGA_REFERENCED flag can only be cleared if the object + * containing the page is locked. + */ + if ((bits & PGA_REFERENCED) != 0) + VM_PAGE_OBJECT_LOCK_ASSERT(m); + + /* + * Access the whole 32-bit word containing the aflags field with an + * atomic update. Parallel non-atomic updates to the other fields + * within this word are handled properly by the atomic update. + */ + addr = (void *)&m->aflags; + KASSERT(((uintptr_t)addr & (sizeof(uint32_t) - 1)) == 0, + ("vm_page_aflag_clear: aflags is misaligned")); + val = bits; +#if BYTE_ORDER == BIG_ENDIAN + val <<= 24; +#endif + atomic_clear_32(addr, val); +} + +/* + * Set the given bits in the specified page. + */ +static inline void +vm_page_aflag_set(vm_page_t m, uint8_t bits) +{ + uint32_t *addr, val; + + /* + * The PGA_WRITEABLE flag can only be set if the page is managed and + * VPO_BUSY. Currently, this flag is only set by pmap_enter(). + */ + KASSERT((bits & PGA_WRITEABLE) == 0 || + (m->oflags & (VPO_UNMANAGED | VPO_BUSY)) == VPO_BUSY, + ("vm_page_aflag_set: PGA_WRITEABLE and !VPO_BUSY")); + + /* + * Access the whole 32-bit word containing the aflags field with an + * atomic update. Parallel non-atomic updates to the other fields + * within this word are handled properly by the atomic update. + */ + addr = (void *)&m->aflags; + KASSERT(((uintptr_t)addr & (sizeof(uint32_t) - 1)) == 0, + ("vm_page_aflag_set: aflags is misaligned")); + val = bits; +#if BYTE_ORDER == BIG_ENDIAN + val <<= 24; +#endif + atomic_set_32(addr, val); +} + +/* * vm_page_dirty: * * Set all bits in the page's dirty field. diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c index 11d040d..3994ce1 100644 --- a/sys/vm/vm_pageout.c +++ b/sys/vm/vm_pageout.c @@ -209,11 +209,14 @@ int vm_page_max_wired; /* XXX max # of wired pages system-wide */ SYSCTL_INT(_vm, OID_AUTO, max_wired, CTLFLAG_RW, &vm_page_max_wired, 0, "System-wide limit to wired page count"); +static boolean_t vm_pageout_fallback_object_lock(vm_page_t, vm_page_t *); +static boolean_t vm_pageout_launder(int, int, vm_paddr_t, vm_paddr_t); #if !defined(NO_SWAPPING) static void vm_pageout_map_deactivate_pages(vm_map_t, long); static void vm_pageout_object_deactivate_pages(pmap_t, vm_object_t, long); static void vm_req_vmdaemon(int req); #endif +static boolean_t vm_pageout_page_lock(vm_page_t, vm_page_t *); static void vm_pageout_page_stats(void); /* @@ -247,7 +250,7 @@ vm_pageout_init_marker(vm_page_t marker, u_short queue) * This function depends on both the lock portion of struct vm_object * and normal struct vm_page being type stable. */ -boolean_t +static boolean_t vm_pageout_fallback_object_lock(vm_page_t m, vm_page_t *next) { struct vm_page marker; @@ -286,7 +289,7 @@ vm_pageout_fallback_object_lock(vm_page_t m, vm_page_t *next) * * This function depends on normal struct vm_page being type stable. */ -boolean_t +static boolean_t vm_pageout_page_lock(vm_page_t m, vm_page_t *next) { struct vm_page marker; @@ -558,6 +561,138 @@ vm_pageout_flush(vm_page_t *mc, int count, int flags, int mreq, int *prunlen, return (numpagedout); } +static boolean_t +vm_pageout_launder(int queue, int tries, vm_paddr_t low, vm_paddr_t high) +{ + struct mount *mp; + struct vnode *vp; + vm_object_t object; + vm_paddr_t pa; + vm_page_t m, m_tmp, next; + int vfslocked; + + vm_page_lock_queues(); + TAILQ_FOREACH_SAFE(m, &vm_page_queues[queue].pl, pageq, next) { + KASSERT(m->queue == queue, + ("vm_pageout_launder: page %p's queue is not %d", m, + queue)); + if ((m->flags & PG_MARKER) != 0) + continue; + pa = VM_PAGE_TO_PHYS(m); + if (pa < low || pa + PAGE_SIZE > high) + continue; + if (!vm_pageout_page_lock(m, &next) || m->hold_count != 0) { + vm_page_unlock(m); + continue; + } + object = m->object; + if (!VM_OBJECT_TRYLOCK(object) && + (!vm_pageout_fallback_object_lock(m, &next) || + m->hold_count != 0)) { + vm_page_unlock(m); + VM_OBJECT_UNLOCK(object); + continue; + } + if ((m->oflags & VPO_BUSY) != 0 || m->busy != 0) { + if (tries == 0) { + vm_page_unlock(m); + VM_OBJECT_UNLOCK(object); + continue; + } + vm_page_sleep(m, "vpctw0"); + VM_OBJECT_UNLOCK(object); + return (FALSE); + } + vm_page_test_dirty(m); + if (m->dirty == 0) + pmap_remove_all(m); + if (m->dirty != 0) { + vm_page_unlock(m); + if (tries == 0 || (object->flags & OBJ_DEAD) != 0) { + VM_OBJECT_UNLOCK(object); + continue; + } + if (object->type == OBJT_VNODE) { + vm_page_unlock_queues(); + vp = object->handle; + vm_object_reference_locked(object); + VM_OBJECT_UNLOCK(object); + (void)vn_start_write(vp, &mp, V_WAIT); + vfslocked = VFS_LOCK_GIANT(vp->v_mount); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY); + VM_OBJECT_LOCK(object); + vm_object_page_clean(object, 0, 0, OBJPC_SYNC); + VM_OBJECT_UNLOCK(object); + VOP_UNLOCK(vp, 0); + VFS_UNLOCK_GIANT(vfslocked); + vm_object_deallocate(object); + vn_finished_write(mp); + return (TRUE); + } else if (object->type == OBJT_SWAP || + object->type == OBJT_DEFAULT) { + vm_page_unlock_queues(); + m_tmp = m; + vm_pageout_flush(&m_tmp, 1, VM_PAGER_PUT_SYNC, + 0, NULL, NULL); + VM_OBJECT_UNLOCK(object); + return (TRUE); + } + } else { + vm_page_cache(m); + vm_page_unlock(m); + } + VM_OBJECT_UNLOCK(object); + } + vm_page_unlock_queues(); + return (FALSE); +} + +/* + * Increase the number of cached pages. The specified value, "tries", + * determines which categories of pages are cached: + * + * 0: All clean, inactive pages within the specified physical address range + * are cached. Will not sleep. + * 1: The vm_lowmem handlers are called. All inactive pages within + * the specified physical address range are cached. May sleep. + * 2: The vm_lowmem handlers are called. All inactive and active pages + * within the specified physical address range are cached. May sleep. + */ +void +vm_pageout_grow_cache(int tries, vm_paddr_t low, vm_paddr_t high) +{ + int actl, actmax, inactl, inactmax; + + if (tries > 0) { + /* + * Decrease registered cache sizes. The vm_lowmem handlers + * may acquire locks and/or sleep, so they can only be invoked + * when "tries" is greater than zero. + */ + EVENTHANDLER_INVOKE(vm_lowmem, 0); + + /* + * We do this explicitly after the caches have been drained + * above. + */ + uma_reclaim(); + } + inactl = 0; + inactmax = cnt.v_inactive_count; + actl = 0; + actmax = tries < 2 ? 0 : cnt.v_active_count; +again: + if (inactl < inactmax && vm_pageout_launder(PQ_INACTIVE, tries, low, + high)) { + inactl++; + goto again; + } + if (actl < actmax && vm_pageout_launder(PQ_ACTIVE, tries, low, high)) { + actl++; + goto again; + } +} + #if !defined(NO_SWAPPING) /* * vm_pageout_object_deactivate_pages @@ -738,7 +873,7 @@ vm_pageout_scan(int pass) vm_page_t m, next; struct vm_page marker; int page_shortage, maxscan, pcount; - int addl_page_shortage, addl_page_shortage_init; + int addl_page_shortage; vm_object_t object; int actcount; int vnodes_skipped = 0; @@ -754,13 +889,19 @@ vm_pageout_scan(int pass) */ uma_reclaim(); - addl_page_shortage_init = atomic_readandclear_int(&vm_pageout_deficit); + /* + * The addl_page_shortage is the the number of temporarily + * stuck pages in the inactive queue. In other words, the + * number of pages from cnt.v_inactive_count that should be + * discounted in setting the target for the active queue scan. + */ + addl_page_shortage = atomic_readandclear_int(&vm_pageout_deficit); /* * Calculate the number of pages we want to either free or move * to the cache. */ - page_shortage = vm_paging_target() + addl_page_shortage_init; + page_shortage = vm_paging_target() + addl_page_shortage; vm_pageout_init_marker(&marker, PQ_INACTIVE); @@ -786,8 +927,6 @@ vm_pageout_scan(int pass) maxlaunder = 10000; vm_page_lock_queues(); queues_locked = TRUE; -rescan0: - addl_page_shortage = addl_page_shortage_init; maxscan = cnt.v_inactive_count; for (m = TAILQ_FIRST(&vm_page_queues[PQ_INACTIVE].pl); @@ -795,12 +934,9 @@ rescan0: m = next) { KASSERT(queues_locked, ("unlocked queues")); mtx_assert(&vm_page_queue_mtx, MA_OWNED); + KASSERT(m->queue == PQ_INACTIVE, ("Inactive queue %p", m)); cnt.v_pdpages++; - - if (m->queue != PQ_INACTIVE) - goto rescan0; - next = TAILQ_NEXT(m, pageq); /* @@ -815,38 +951,31 @@ rescan0: ("Unmanaged page %p cannot be in inactive queue", m)); /* - * Lock the page. + * The page or object lock acquisitions fail if the + * page was removed from the queue or moved to a + * different position within the queue. In either + * case, addl_page_shortage should not be incremented. */ if (!vm_pageout_page_lock(m, &next)) { vm_page_unlock(m); - addl_page_shortage++; continue; } - - /* - * A held page may be undergoing I/O, so skip it. - */ - if (m->hold_count) { + object = m->object; + if (!VM_OBJECT_TRYLOCK(object) && + !vm_pageout_fallback_object_lock(m, &next)) { vm_page_unlock(m); - vm_page_requeue(m); - addl_page_shortage++; + VM_OBJECT_UNLOCK(object); continue; } /* - * Don't mess with busy pages, keep in the front of the - * queue, most likely are being paged out. + * Don't mess with busy pages, keep them at at the + * front of the queue, most likely they are being + * paged out. Increment addl_page_shortage for busy + * pages, because they may leave the inactive queue + * shortly after page scan is finished. */ - object = m->object; - if (!VM_OBJECT_TRYLOCK(object) && - (!vm_pageout_fallback_object_lock(m, &next) || - m->hold_count != 0)) { - VM_OBJECT_UNLOCK(object); - vm_page_unlock(m); - addl_page_shortage++; - continue; - } - if (m->busy || (m->oflags & VPO_BUSY)) { + if (m->busy != 0 || (m->oflags & VPO_BUSY) != 0) { vm_page_unlock(m); VM_OBJECT_UNLOCK(object); addl_page_shortage++; @@ -906,6 +1035,21 @@ rescan0: goto relock_queues; } + if (m->hold_count != 0) { + vm_page_unlock(m); + VM_OBJECT_UNLOCK(object); + + /* + * Held pages are essentially stuck in the + * queue. So, they ought to be discounted + * from cnt.v_inactive_count. See the + * calculation of the page_shortage for the + * loop over the active queue below. + */ + addl_page_shortage++; + goto relock_queues; + } + /* * If the upper level VM system does not believe that the page * is fully dirty, but it is mapped for write access, then we diff --git a/sys/vm/vm_pageout.h b/sys/vm/vm_pageout.h index 6897bbb..c7b4e90 100644 --- a/sys/vm/vm_pageout.h +++ b/sys/vm/vm_pageout.h @@ -101,10 +101,8 @@ extern void vm_wait(void); extern void vm_waitpfault(void); #ifdef _KERNEL -boolean_t vm_pageout_fallback_object_lock(vm_page_t, vm_page_t *); int vm_pageout_flush(vm_page_t *, int, int, int, int *, boolean_t *); +void vm_pageout_grow_cache(int, vm_paddr_t, vm_paddr_t); void vm_pageout_oom(int shortage); -boolean_t vm_pageout_page_lock(vm_page_t, vm_page_t *); -void vm_contig_grow_cache(int, vm_paddr_t, vm_paddr_t); #endif #endif /* _VM_VM_PAGEOUT_H_ */ diff --git a/sys/vm/vm_reserv.c b/sys/vm/vm_reserv.c index 10db93c..549e710 100644 --- a/sys/vm/vm_reserv.c +++ b/sys/vm/vm_reserv.c @@ -445,7 +445,7 @@ vm_reserv_alloc_contig(vm_object_t object, vm_pindex_t pindex, u_long npages, m += VM_LEVEL_0_NPAGES; first += VM_LEVEL_0_NPAGES; allocpages -= VM_LEVEL_0_NPAGES; - } while (allocpages > VM_LEVEL_0_NPAGES); + } while (allocpages > 0); return (m_ret); /* |