summaryrefslogtreecommitdiffstats
path: root/sys/vm
diff options
context:
space:
mode:
authoralfred <alfred@FreeBSD.org>2001-05-19 01:28:09 +0000
committeralfred <alfred@FreeBSD.org>2001-05-19 01:28:09 +0000
commita3f0842419d98da211706f921fc626e160cd960b (patch)
treee86922a5639c32e1242d4f3088fc487f3be5b236 /sys/vm
parent9eda9187f024233436e6a743f13bd938b1a0f19c (diff)
downloadFreeBSD-src-a3f0842419d98da211706f921fc626e160cd960b.zip
FreeBSD-src-a3f0842419d98da211706f921fc626e160cd960b.tar.gz
Introduce a global lock for the vm subsystem (vm_mtx).
vm_mtx does not recurse and is required for most low level vm operations. faults can not be taken without holding Giant. Memory subsystems can now call the base page allocators safely. Almost all atomic ops were removed as they are covered under the vm mutex. Alpha and ia64 now need to catch up to i386's trap handlers. FFS and NFS have been tested, other filesystems will need minor changes (grabbing the vm lock when twiddling page properties). Reviewed (partially) by: jake, jhb
Diffstat (limited to 'sys/vm')
-rw-r--r--sys/vm/default_pager.c2
-rw-r--r--sys/vm/phys_pager.c16
-rw-r--r--sys/vm/swap_pager.c77
-rw-r--r--sys/vm/vm.h4
-rw-r--r--sys/vm/vm_fault.c65
-rw-r--r--sys/vm/vm_glue.c38
-rw-r--r--sys/vm/vm_init.c8
-rw-r--r--sys/vm/vm_kern.c74
-rw-r--r--sys/vm/vm_map.c41
-rw-r--r--sys/vm/vm_map.h22
-rw-r--r--sys/vm/vm_meter.c6
-rw-r--r--sys/vm/vm_mmap.c60
-rw-r--r--sys/vm/vm_object.c58
-rw-r--r--sys/vm/vm_object.h35
-rw-r--r--sys/vm/vm_page.c123
-rw-r--r--sys/vm/vm_page.h31
-rw-r--r--sys/vm/vm_pageout.c19
-rw-r--r--sys/vm/vm_pager.c27
-rw-r--r--sys/vm/vm_pager.h16
-rw-r--r--sys/vm/vm_unix.c7
-rw-r--r--sys/vm/vm_zone.c32
-rw-r--r--sys/vm/vnode_pager.c53
22 files changed, 643 insertions, 171 deletions
diff --git a/sys/vm/default_pager.c b/sys/vm/default_pager.c
index f5d88a5..0fb4896 100644
--- a/sys/vm/default_pager.c
+++ b/sys/vm/default_pager.c
@@ -41,6 +41,8 @@
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
#include <vm/vm.h>
#include <vm/vm_object.h>
diff --git a/sys/vm/phys_pager.c b/sys/vm/phys_pager.c
index 1f00ea0..d34672b 100644
--- a/sys/vm/phys_pager.c
+++ b/sys/vm/phys_pager.c
@@ -34,7 +34,6 @@
#include <sys/mutex.h>
#include <sys/mman.h>
#include <sys/sysctl.h>
-#include <sys/sx.h>
#include <vm/vm.h>
#include <vm/vm_object.h>
@@ -43,7 +42,7 @@
#include <vm/vm_zone.h>
/* prevent concurrant creation races */
-static struct sx phys_pager_sx;
+static int phys_pager_alloc_lock;
/* list of device pager objects */
static struct pagerlst phys_pager_object_list;
/* protect access to phys_pager_object_list */
@@ -54,7 +53,6 @@ phys_pager_init(void)
{
TAILQ_INIT(&phys_pager_object_list);
- sx_init(&phys_pager_sx, "phys_pager create");
mtx_init(&phys_pager_mtx, "phys_pager list", MTX_DEF);
}
@@ -76,8 +74,11 @@ phys_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
/*
* Lock to prevent object creation race condition.
*/
- sx_xlock(&phys_pager_sx);
-
+ while (phys_pager_alloc_lock) {
+ phys_pager_alloc_lock = -1;
+ msleep(&phys_pager_alloc_lock, &vm_mtx, PVM, "swpalc", 0);
+ }
+
/*
* Look up pager, creating as necessary.
*/
@@ -101,7 +102,10 @@ phys_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
if (OFF_TO_IDX(foff + size) > object->size)
object->size = OFF_TO_IDX(foff + size);
}
- sx_xunlock(&phys_pager_sx);
+ if (phys_pager_alloc_lock)
+ wakeup(&phys_pager_alloc_lock);
+ phys_pager_alloc_lock = 0;
+
} else {
object = vm_object_allocate(OBJT_PHYS,
OFF_TO_IDX(foff + size));
diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
index 8d343f4..44f4465 100644
--- a/sys/vm/swap_pager.c
+++ b/sys/vm/swap_pager.c
@@ -80,7 +80,6 @@
#include <sys/sysctl.h>
#include <sys/blist.h>
#include <sys/lock.h>
-#include <sys/sx.h>
#include <sys/vmmeter.h>
#ifndef MAX_PAGEOUT_CLUSTER
@@ -119,6 +118,7 @@ static int nsw_wcount_sync; /* limit write buffers / synchronous */
static int nsw_wcount_async; /* limit write buffers / asynchronous */
static int nsw_wcount_async_max;/* assigned maximum */
static int nsw_cluster_max; /* maximum VOP I/O allowed */
+static int sw_alloc_interlock; /* swap pager allocation interlock */
struct blist *swapblist;
static struct swblock **swhash;
@@ -145,7 +145,6 @@ SYSCTL_INT(_vm, OID_AUTO, swap_async_max,
#define NOBJLIST(handle) \
(&swap_pager_object_list[((int)(intptr_t)handle >> 4) & (NOBJLISTS-1)])
-static struct sx sw_alloc_sx; /* prevent concurrant creation */
static struct mtx sw_alloc_mtx; /* protect list manipulation */
static struct pagerlst swap_pager_object_list[NOBJLISTS];
struct pagerlst swap_pager_un_object_list;
@@ -233,6 +232,8 @@ static daddr_t swp_pager_meta_ctl __P((vm_object_t, vm_pindex_t, int));
static __inline void
swp_sizecheck()
{
+
+ mtx_assert(&vm_mtx, MA_OWNED);
if (vm_swap_size < nswap_lowat) {
if (swap_pager_almost_full == 0) {
printf("swap_pager: out of swap space\n");
@@ -264,7 +265,6 @@ swap_pager_init()
for (i = 0; i < NOBJLISTS; ++i)
TAILQ_INIT(&swap_pager_object_list[i]);
TAILQ_INIT(&swap_pager_un_object_list);
- sx_init(&sw_alloc_sx, "swap_pager create");
mtx_init(&sw_alloc_mtx, "swap_pager list", MTX_DEF);
/*
@@ -389,7 +389,10 @@ swap_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
* of the handle.
*/
- sx_xlock(&sw_alloc_sx);
+ while (sw_alloc_interlock) {
+ sw_alloc_interlock = -1;
+ msleep(&sw_alloc_interlock, &vm_mtx, PVM, "swpalc", 0);
+ }
object = vm_pager_object_lookup(NOBJLIST(handle), handle);
@@ -403,7 +406,9 @@ swap_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
swp_pager_meta_build(object, 0, SWAPBLK_NONE);
}
- sx_xunlock(&sw_alloc_sx);
+ if (sw_alloc_interlock < 0)
+ wakeup(&sw_alloc_interlock);
+ sw_alloc_interlock = 0;
} else {
object = vm_object_allocate(OBJT_DEFAULT,
OFF_TO_IDX(offset + PAGE_MASK + size));
@@ -478,6 +483,7 @@ swap_pager_dealloc(object)
*
* This routine may not block
* This routine must be called at splvm().
+ * vm_mtx should be held
*/
static __inline daddr_t
@@ -486,6 +492,7 @@ swp_pager_getswapspace(npages)
{
daddr_t blk;
+ mtx_assert(&vm_mtx, MA_OWNED);
if ((blk = blist_alloc(swapblist, npages)) == SWAPBLK_NONE) {
if (swap_pager_full != 2) {
printf("swap_pager_getswapspace: failed\n");
@@ -514,6 +521,7 @@ swp_pager_getswapspace(npages)
*
* This routine may not block
* This routine must be called at splvm().
+ * vm_mtx should be held
*/
static __inline void
@@ -521,6 +529,8 @@ swp_pager_freeswapspace(blk, npages)
daddr_t blk;
int npages;
{
+
+ mtx_assert(&vm_mtx, MA_OWNED);
blist_free(swapblist, blk, npages);
vm_swap_size += npages;
/* per-swap area stats */
@@ -551,6 +561,9 @@ swap_pager_freespace(object, start, size)
vm_size_t size;
{
int s = splvm();
+
+ mtx_assert(&vm_mtx, MA_OWNED);
+
swp_pager_meta_free(object, start, size);
splx(s);
}
@@ -635,6 +648,8 @@ swap_pager_copy(srcobject, dstobject, offset, destroysource)
s = splvm();
+ mtx_assert(&vm_mtx, MA_OWNED);
+
/*
* If destroysource is set, we remove the source object from the
* swap_pager internal queue now.
@@ -881,7 +896,9 @@ swap_pager_strategy(vm_object_t object, struct bio *bp)
* FREE PAGE(s) - destroy underlying swap that is no longer
* needed.
*/
+ mtx_lock(&vm_mtx);
swp_pager_meta_free(object, start, count);
+ mtx_unlock(&vm_mtx);
splx(s);
bp->bio_resid = 0;
biodone(bp);
@@ -892,6 +909,7 @@ swap_pager_strategy(vm_object_t object, struct bio *bp)
* Execute read or write
*/
+ mtx_lock(&vm_mtx);
while (count > 0) {
daddr_t blk;
@@ -954,7 +972,9 @@ swap_pager_strategy(vm_object_t object, struct bio *bp)
bp->bio_resid -= PAGE_SIZE;
} else {
if (nbp == NULL) {
+ mtx_unlock(&vm_mtx);
nbp = getchainbuf(bp, swapdev_vp, B_ASYNC);
+ mtx_lock(&vm_mtx);
nbp->b_blkno = blk;
nbp->b_bcount = 0;
nbp->b_data = data;
@@ -985,6 +1005,7 @@ swap_pager_strategy(vm_object_t object, struct bio *bp)
/* nbp = NULL; */
}
+ mtx_unlock(&vm_mtx);
/*
* Wait for completion.
*/
@@ -1281,6 +1302,7 @@ swap_pager_putpages(object, m, count, sync, rtvals)
* at this time.
*/
s = splvm();
+ mtx_unlock(&vm_mtx);
mtx_lock(&pbuf_mtx);
n -= nsw_wcount_async_max;
if (nsw_wcount_async + n >= 0) {
@@ -1289,6 +1311,7 @@ swap_pager_putpages(object, m, count, sync, rtvals)
wakeup(&nsw_wcount_async);
}
mtx_unlock(&pbuf_mtx);
+ mtx_lock(&vm_mtx);
splx(s);
}
@@ -1399,6 +1422,8 @@ swap_pager_putpages(object, m, count, sync, rtvals)
swapdev_vp->v_numoutput++;
splx(s);
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
/*
* asynchronous
@@ -1410,9 +1435,12 @@ swap_pager_putpages(object, m, count, sync, rtvals)
bp->b_iodone = swp_pager_async_iodone;
BUF_KERNPROC(bp);
BUF_STRATEGY(bp);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
for (j = 0; j < n; ++j)
rtvals[i+j] = VM_PAGER_PEND;
+ /* restart outter loop */
continue;
}
@@ -1445,6 +1473,8 @@ swap_pager_putpages(object, m, count, sync, rtvals)
* normal async completion, which frees everything up.
*/
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
swp_pager_async_iodone(bp);
splx(s);
@@ -1732,7 +1762,8 @@ swp_pager_hash(vm_object_t object, vm_pindex_t index)
*
* This routine must be called at splvm(), except when used to convert
* an OBJT_DEFAULT object into an OBJT_SWAP object.
-
+ *
+ * Requires vm_mtx.
*/
static void
@@ -1744,6 +1775,7 @@ swp_pager_meta_build(
struct swblock *swap;
struct swblock **pswap;
+ mtx_assert(&vm_mtx, MA_OWNED);
/*
* Convert default object to swap object if necessary
*/
@@ -1830,12 +1862,16 @@ retry:
* out. This routine does *NOT* operate on swap metadata associated
* with resident pages.
*
+ * mv_mtx must be held
* This routine must be called at splvm()
*/
static void
swp_pager_meta_free(vm_object_t object, vm_pindex_t index, daddr_t count)
{
+
+ mtx_assert(&vm_mtx, MA_OWNED);
+
if (object->type != OBJT_SWAP)
return;
@@ -1875,6 +1911,7 @@ swp_pager_meta_free(vm_object_t object, vm_pindex_t index, daddr_t count)
* an object.
*
* This routine must be called at splvm()
+ * Requires vm_mtx.
*/
static void
@@ -1882,6 +1919,8 @@ swp_pager_meta_free_all(vm_object_t object)
{
daddr_t index = 0;
+ mtx_assert(&vm_mtx, MA_OWNED);
+
if (object->type != OBJT_SWAP)
return;
@@ -1930,6 +1969,7 @@ swp_pager_meta_free_all(vm_object_t object)
* busy page.
*
* This routine must be called at splvm().
+ * Requires vm_mtx.
*
* SWM_FREE remove and free swap block from metadata
* SWM_POP remove from meta data but do not free.. pop it out
@@ -2032,18 +2072,24 @@ vm_pager_chain_iodone(struct buf *nbp)
* Obtain a physical buffer and chain it to its parent buffer. When
* I/O completes, the parent buffer will be B_SIGNAL'd. Errors are
* automatically propagated to the parent
+ *
+ * vm_mtx can't be held
*/
struct buf *
getchainbuf(struct bio *bp, struct vnode *vp, int flags)
{
- struct buf *nbp = getpbuf(NULL);
- u_int *count = (u_int *)&(bp->bio_caller1);
+ struct buf *nbp;
+ u_int *count;
+
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
+ nbp = getpbuf(NULL);
+ count = (u_int *)&(bp->bio_caller1);
nbp->b_caller1 = bp;
++(*count);
- if (*count > 4)
+ if (*count > 4)
waitchainbuf(bp, 4, 0);
nbp->b_iocmd = bp->bio_cmd;
@@ -2063,6 +2109,9 @@ getchainbuf(struct bio *bp, struct vnode *vp, int flags)
void
flushchainbuf(struct buf *nbp)
{
+
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
if (nbp->b_bcount) {
nbp->b_bufsize = nbp->b_bcount;
if (nbp->b_iocmd == BIO_WRITE)
@@ -2072,14 +2121,19 @@ flushchainbuf(struct buf *nbp)
} else {
bufdone(nbp);
}
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
}
-void
+static void
waitchainbuf(struct bio *bp, int limit, int done)
{
int s;
- u_int *count = (u_int *)&(bp->bio_caller1);
+ u_int *count;
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
+ mtx_lock(&Giant);
+ count = (u_int *)&(bp->bio_caller1);
s = splbio();
while (*count > limit) {
bp->bio_flags |= BIO_FLAG1;
@@ -2092,6 +2146,7 @@ waitchainbuf(struct bio *bp, int limit, int done)
}
biodone(bp);
}
+ mtx_unlock(&Giant);
splx(s);
}
diff --git a/sys/vm/vm.h b/sys/vm/vm.h
index 38f04ac..5915b29 100644
--- a/sys/vm/vm.h
+++ b/sys/vm/vm.h
@@ -95,6 +95,10 @@ typedef struct vm_map *vm_map_t;
struct vm_object;
typedef struct vm_object *vm_object_t;
+#ifdef _KERNEL
+extern struct mtx vm_mtx;
+#endif
+
#ifndef _KERNEL
/*
* This is defined in <sys/types.h> for the kernel so that non-vm kernel
diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
index a1bad69..f31f12b 100644
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
@@ -81,6 +81,8 @@
#include <sys/vnode.h>
#include <sys/resourcevar.h>
#include <sys/vmmeter.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
@@ -134,6 +136,8 @@ unlock_map(struct faultstate *fs)
static void
_unlock_things(struct faultstate *fs, int dealloc)
{
+
+ mtx_assert(&vm_mtx, MA_OWNED);
vm_object_pip_wakeup(fs->object);
if (fs->object != fs->first_object) {
vm_page_free(fs->first_m);
@@ -145,8 +149,15 @@ _unlock_things(struct faultstate *fs, int dealloc)
}
unlock_map(fs);
if (fs->vp != NULL) {
- vput(fs->vp);
+ struct vnode *vp;
+
+ vp = fs->vp;
fs->vp = NULL;
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
+ vput(vp);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
}
}
@@ -179,10 +190,41 @@ _unlock_things(struct faultstate *fs, int dealloc)
*
*
* The map in question must be referenced, and remains so.
- * Caller may hold no locks.
+ * Caller may hold no locks except the vm_mtx which will be
+ * locked if needed.
*/
+static int vm_fault1 __P((vm_map_t, vm_offset_t, vm_prot_t, int));
+
+static int vm_faults_no_vm_mtx;
+SYSCTL_INT(_vm, OID_AUTO, vm_faults_no_vm_mtx, CTLFLAG_RW,
+ &vm_faults_no_vm_mtx, 0, "");
+
+static int vm_faults_no_giant;
+SYSCTL_INT(_vm, OID_AUTO, vm_faults_no_giant, CTLFLAG_RW,
+ &vm_faults_no_giant, 0, "");
+
int
-vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, int fault_flags)
+vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
+ int fault_flags)
+{
+ int hadvmlock, ret;
+
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock) {
+ mtx_lock(&vm_mtx);
+ vm_faults_no_vm_mtx++;
+ if (!mtx_owned(&Giant))
+ vm_faults_no_giant++;
+ }
+ ret = vm_fault1(map, vaddr, fault_type, fault_flags);
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
+ return (ret);
+}
+
+static int
+vm_fault1(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
+ int fault_flags)
{
vm_prot_t prot;
int result;
@@ -194,7 +236,8 @@ vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, int fault_flags)
int faultcount;
struct faultstate fs;
- cnt.v_vm_faults++; /* needs lock XXX */
+ mtx_assert(&vm_mtx, MA_OWNED);
+ cnt.v_vm_faults++;
hardfault = 0;
RetryFault:;
@@ -251,7 +294,11 @@ RetryFault:;
vm_object_reference(fs.first_object);
vm_object_pip_add(fs.first_object, 1);
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
fs.vp = vnode_pager_lock(fs.first_object);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
if ((fault_type & VM_PROT_WRITE) &&
(fs.first_object->type == OBJT_VNODE)) {
vm_freeze_copyopts(fs.first_object,
@@ -723,7 +770,11 @@ readrest:
*/
if (fs.vp != NULL) {
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
vput(fs.vp);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
fs.vp = NULL;
}
@@ -940,6 +991,7 @@ vm_fault_user_wire(map, start, end)
register pmap_t pmap;
int rv;
+ mtx_assert(&vm_mtx, MA_OWNED);
pmap = vm_map_pmap(map);
/*
@@ -1112,6 +1164,9 @@ vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry)
*
* Return value:
* number of pages in marray
+ *
+ * This routine can't block.
+ * vm_mtx must be held.
*/
static int
vm_fault_additional_pages(m, rbehind, rahead, marray, reqpage)
@@ -1127,6 +1182,8 @@ vm_fault_additional_pages(m, rbehind, rahead, marray, reqpage)
vm_page_t rtm;
int cbehind, cahead;
+ mtx_assert(&vm_mtx, MA_OWNED);
+
object = m->object;
pindex = m->pindex;
diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c
index a180ae3..37c580a 100644
--- a/sys/vm/vm_glue.c
+++ b/sys/vm/vm_glue.c
@@ -161,6 +161,7 @@ useracc(addr, len, rw)
|| (vm_offset_t) addr + len < (vm_offset_t) addr) {
return (FALSE);
}
+ mtx_lock(&vm_mtx);
map = &curproc->p_vmspace->vm_map;
vm_map_lock_read(map);
/*
@@ -172,6 +173,7 @@ useracc(addr, len, rw)
trunc_page((vm_offset_t)addr), round_page((vm_offset_t)addr + len), prot);
map->hint = save_hint;
vm_map_unlock_read(map);
+ mtx_unlock(&vm_mtx);
return (rv == TRUE);
}
@@ -181,8 +183,12 @@ vslock(addr, len)
caddr_t addr;
u_int len;
{
- vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page((vm_offset_t)addr),
+
+ mtx_lock(&vm_mtx);
+ vm_map_pageable(&curproc->p_vmspace->vm_map,
+ trunc_page((vm_offset_t)addr),
round_page((vm_offset_t)addr + len), FALSE);
+ mtx_unlock(&vm_mtx);
}
void
@@ -190,8 +196,12 @@ vsunlock(addr, len)
caddr_t addr;
u_int len;
{
- vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page((vm_offset_t)addr),
+
+ mtx_lock(&vm_mtx);
+ vm_map_pageable(&curproc->p_vmspace->vm_map,
+ trunc_page((vm_offset_t)addr),
round_page((vm_offset_t)addr + len), TRUE);
+ mtx_unlock(&vm_mtx);
}
/*
@@ -201,6 +211,8 @@ vsunlock(addr, len)
* machine-dependent layer to fill those in and make the new process
* ready to run. The new process is set up so that it returns directly
* to user mode to avoid stack copying and relocation problems.
+ *
+ * Called without vm_mtx.
*/
void
vm_fork(p1, p2, flags)
@@ -209,6 +221,7 @@ vm_fork(p1, p2, flags)
{
register struct user *up;
+ mtx_lock(&vm_mtx);
if ((flags & RFPROC) == 0) {
/*
* Divorce the memory, if it is shared, essentially
@@ -221,6 +234,7 @@ vm_fork(p1, p2, flags)
}
}
cpu_fork(p1, p2, flags);
+ mtx_unlock(&vm_mtx);
return;
}
@@ -275,6 +289,7 @@ vm_fork(p1, p2, flags)
* and make the child ready to run.
*/
cpu_fork(p1, p2, flags);
+ mtx_unlock(&vm_mtx);
}
/*
@@ -360,10 +375,13 @@ scheduler(dummy)
mtx_assert(&Giant, MA_OWNED | MA_NOTRECURSED);
loop:
+ mtx_lock(&vm_mtx);
if (vm_page_count_min()) {
VM_WAIT;
+ mtx_unlock(&vm_mtx);
goto loop;
}
+ mtx_unlock(&vm_mtx);
mtx_unlock(&Giant);
pp = NULL;
@@ -442,6 +460,9 @@ SYSCTL_INT(_vm, OID_AUTO, swap_idle_threshold2,
* If any procs have been sleeping/stopped for at least maxslp seconds,
* they are swapped. Else, we swap the longest-sleeping or stopped process,
* if any, otherwise the longest-resident process.
+ *
+ * Can block
+ * must be called with vm_mtx
*/
void
swapout_procs(action)
@@ -452,6 +473,8 @@ int action;
int outpri, outpri2;
int didswap = 0;
+ mtx_assert(&vm_mtx, MA_OWNED);
+ mtx_unlock(&vm_mtx);
outp = outp2 = NULL;
outpri = outpri2 = INT_MIN;
sx_slock(&allproc_lock);
@@ -465,6 +488,11 @@ retry:
PROC_UNLOCK(p);
continue;
}
+ /*
+ * only aiod changes vmspace, however it will be
+ * skipped because of the if statement above checking
+ * for P_SYSTEM
+ */
vm = p->p_vmspace;
mtx_lock_spin(&sched_lock);
if ((p->p_sflag & (PS_INMEM|PS_SWAPPING)) != PS_INMEM) {
@@ -516,6 +544,7 @@ retry:
}
mtx_unlock_spin(&sched_lock);
+ mtx_lock(&vm_mtx);
#if 0
/*
* XXX: This is broken. We release the lock we
@@ -531,7 +560,7 @@ retry:
*/
if (lockmgr(&vm->vm_map.lock,
LK_EXCLUSIVE | LK_NOWAIT,
- (void *)0, curproc)) {
+ NULL, curproc)) {
vmspace_free(vm);
PROC_UNLOCK(p);
continue;
@@ -548,8 +577,10 @@ retry:
swapout(p);
vmspace_free(vm);
didswap++;
+ mtx_unlock(&vm_mtx);
goto retry;
}
+ mtx_unlock(&vm_mtx);
PROC_UNLOCK(p);
}
}
@@ -558,6 +589,7 @@ retry:
* If we swapped something out, and another process needed memory,
* then wakeup the sched process.
*/
+ mtx_lock(&vm_mtx);
if (didswap)
wakeup(&proc0);
}
diff --git a/sys/vm/vm_init.c b/sys/vm/vm_init.c
index ae336e1..35e4676 100644
--- a/sys/vm/vm_init.c
+++ b/sys/vm/vm_init.c
@@ -73,6 +73,7 @@
#include <sys/lock.h>
#include <sys/proc.h>
#include <sys/systm.h>
+#include <sys/mutex.h>
#include <vm/vm.h>
#include <vm/vm_object.h>
@@ -96,16 +97,20 @@ SYSINIT(vm_mem, SI_SUB_VM, SI_ORDER_FIRST, vm_mem_init, NULL)
* The start and end address of physical memory is passed in.
*/
+struct mtx vm_mtx;
+
/* ARGSUSED*/
static void
vm_mem_init(dummy)
void *dummy;
{
+
/*
* Initializes resident memory structures. From here on, all physical
* memory is accounted for, and we use only virtual addresses.
*/
-
+ mtx_init(&vm_mtx, "vm", MTX_DEF);
+ mtx_lock(&vm_mtx);
vm_set_page_size();
virtual_avail = vm_page_startup(avail_start, avail_end, virtual_avail);
@@ -118,4 +123,5 @@ vm_mem_init(dummy)
kmem_init(virtual_avail, virtual_end);
pmap_init(avail_start, avail_end);
vm_pager_init();
+ mtx_unlock(&vm_mtx);
}
diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c
index 14e4867..08ee486 100644
--- a/sys/vm/vm_kern.c
+++ b/sys/vm/vm_kern.c
@@ -106,11 +106,17 @@ kmem_alloc_pageable(map, size)
{
vm_offset_t addr;
int result;
+ int hadvmlock;
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
size = round_page(size);
addr = vm_map_min(map);
result = vm_map_find(map, NULL, (vm_offset_t) 0,
&addr, size, TRUE, VM_PROT_ALL, VM_PROT_ALL, 0);
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
if (result != KERN_SUCCESS) {
return (0);
}
@@ -131,10 +137,17 @@ kmem_alloc_nofault(map, size)
vm_offset_t addr;
int result;
+ int hadvmlock;
+
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
size = round_page(size);
addr = vm_map_min(map);
result = vm_map_find(map, NULL, (vm_offset_t) 0,
&addr, size, TRUE, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
if (result != KERN_SUCCESS) {
return (0);
}
@@ -153,8 +166,11 @@ kmem_alloc(map, size)
vm_offset_t addr;
vm_offset_t offset;
vm_offset_t i;
+ int hadvmlock;
- mtx_assert(&Giant, MA_OWNED);
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
size = round_page(size);
/*
@@ -170,6 +186,8 @@ kmem_alloc(map, size)
vm_map_lock(map);
if (vm_map_findspace(map, vm_map_min(map), size, &addr)) {
vm_map_unlock(map);
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
return (0);
}
offset = addr - VM_MIN_KERNEL_ADDRESS;
@@ -214,6 +232,8 @@ kmem_alloc(map, size)
(void) vm_map_pageable(map, (vm_offset_t) addr, addr + size, FALSE);
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
return (addr);
}
@@ -232,9 +252,16 @@ kmem_free(map, addr, size)
vm_offset_t addr;
vm_size_t size;
{
+ int hadvmlock;
+
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
- mtx_assert(&Giant, MA_OWNED);
(void) vm_map_remove(map, trunc_page(addr), round_page(addr + size));
+
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
}
/*
@@ -257,6 +284,11 @@ kmem_suballoc(parent, min, max, size)
{
int ret;
vm_map_t result;
+ int hadvmlock;
+
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
size = round_page(size);
@@ -274,6 +306,8 @@ kmem_suballoc(parent, min, max, size)
panic("kmem_suballoc: cannot create submap");
if (vm_map_submap(parent, *min, *max, result) != KERN_SUCCESS)
panic("kmem_suballoc: unable to change range to submap");
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
return (result);
}
@@ -308,10 +342,15 @@ kmem_malloc(map, size, flags)
vm_map_entry_t entry;
vm_offset_t addr;
vm_page_t m;
+ int hadvmlock;
if (map != kmem_map && map != mb_map)
panic("kmem_malloc: map != {kmem,mb}_map");
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
+
size = round_page(size);
addr = vm_map_min(map);
@@ -326,12 +365,12 @@ kmem_malloc(map, size, flags)
if (map == mb_map) {
mb_map_full = TRUE;
printf("Out of mbuf clusters - adjust NMBCLUSTERS or increase maxusers!\n");
- return (0);
+ goto bad;
}
if ((flags & M_NOWAIT) == 0)
panic("kmem_malloc(%ld): kmem_map too small: %ld total allocated",
(long)size, (long)map->size);
- return (0);
+ goto bad;
}
offset = addr - VM_MIN_KERNEL_ADDRESS;
vm_object_reference(kmem_object);
@@ -370,7 +409,7 @@ retry:
if (flags & M_ASLEEP) {
VM_AWAIT;
}
- return (0);
+ goto bad;
}
vm_page_flag_clear(m, PG_ZERO);
m->valid = VM_PAGE_BITS_ALL;
@@ -407,7 +446,14 @@ retry:
}
vm_map_unlock(map);
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
return (addr);
+
+bad:
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
+ return (0);
}
/*
@@ -425,6 +471,11 @@ kmem_alloc_wait(map, size)
vm_size_t size;
{
vm_offset_t addr;
+ int hadvmlock;
+
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
size = round_page(size);
@@ -439,13 +490,17 @@ kmem_alloc_wait(map, size)
/* no space now; see if we can ever get space */
if (vm_map_max(map) - vm_map_min(map) < size) {
vm_map_unlock(map);
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
return (0);
}
vm_map_unlock(map);
- tsleep(map, PVM, "kmaw", 0);
+ msleep(map, &vm_mtx, PVM, "kmaw", 0);
}
vm_map_insert(map, NULL, (vm_offset_t) 0, addr, addr + size, VM_PROT_ALL, VM_PROT_ALL, 0);
vm_map_unlock(map);
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
return (addr);
}
@@ -461,10 +516,17 @@ kmem_free_wakeup(map, addr, size)
vm_offset_t addr;
vm_size_t size;
{
+ int hadvmlock;
+
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
vm_map_lock(map);
(void) vm_map_delete(map, trunc_page(addr), round_page(addr + size));
wakeup(map);
vm_map_unlock(map);
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
}
/*
diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index b33e9e4..d07d35b 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -200,6 +200,7 @@ vmspace_free(vm)
struct vmspace *vm;
{
+ mtx_assert(&vm_mtx, MA_OWNED);
if (vm->vm_refcnt == 0)
panic("vmspace_free: attempt to free already freed vmspace");
@@ -350,6 +351,8 @@ vm_map_entry_unlink(vm_map_t map,
* in the "entry" parameter. The boolean
* result indicates whether the address is
* actually contained in the map.
+ *
+ * Doesn't block.
*/
boolean_t
vm_map_lookup_entry(map, address, entry)
@@ -439,6 +442,7 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
vm_map_entry_t temp_entry;
vm_eflags_t protoeflags;
+ mtx_assert(&vm_mtx, MA_OWNED);
/*
* Check that the start and end points are not bogus.
*/
@@ -1705,7 +1709,9 @@ vm_map_clean(map, start, end, syncio, invalidate)
int flags;
vm_object_reference(object);
+ mtx_unlock(&vm_mtx);
vn_lock(object->handle, LK_EXCLUSIVE | LK_RETRY, curproc);
+ mtx_lock(&vm_mtx);
flags = (syncio || invalidate) ? OBJPC_SYNC : 0;
flags |= invalidate ? OBJPC_INVAL : 0;
vm_object_page_clean(object,
@@ -2296,6 +2302,8 @@ vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
* the stack. Also returns KERN_SUCCESS if addr is outside the
* stack range (this is strange, but preserves compatibility with
* the grow function in vm_machdep.c).
+ *
+ * Will grab vm_mtx if needed
*/
int
vm_map_growstack (struct proc *p, vm_offset_t addr)
@@ -2309,18 +2317,29 @@ vm_map_growstack (struct proc *p, vm_offset_t addr)
int grow_amount;
int rv;
int is_procstack;
+ int hadvmlock;
+
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
+#define myreturn(rval) do { \
+ if (!hadvmlock) \
+ mtx_unlock(&vm_mtx); \
+ return (rval); \
+} while (0)
+
Retry:
vm_map_lock_read(map);
/* If addr is already in the entry range, no need to grow.*/
if (vm_map_lookup_entry(map, addr, &prev_entry)) {
vm_map_unlock_read(map);
- return (KERN_SUCCESS);
+ myreturn (KERN_SUCCESS);
}
if ((stack_entry = prev_entry->next) == &map->header) {
vm_map_unlock_read(map);
- return (KERN_SUCCESS);
+ myreturn (KERN_SUCCESS);
}
if (prev_entry == &map->header)
end = stack_entry->start - stack_entry->avail_ssize;
@@ -2338,14 +2357,14 @@ Retry:
addr >= stack_entry->start ||
addr < stack_entry->start - stack_entry->avail_ssize) {
vm_map_unlock_read(map);
- return (KERN_SUCCESS);
+ myreturn (KERN_SUCCESS);
}
/* Find the minimum grow amount */
grow_amount = roundup (stack_entry->start - addr, PAGE_SIZE);
if (grow_amount > stack_entry->avail_ssize) {
vm_map_unlock_read(map);
- return (KERN_NO_SPACE);
+ myreturn (KERN_NO_SPACE);
}
/* If there is no longer enough space between the entries
@@ -2364,7 +2383,7 @@ Retry:
stack_entry->avail_ssize = stack_entry->start - end;
vm_map_unlock(map);
- return (KERN_NO_SPACE);
+ myreturn (KERN_NO_SPACE);
}
is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr;
@@ -2375,7 +2394,7 @@ Retry:
if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
vm_map_unlock_read(map);
- return (KERN_NO_SPACE);
+ myreturn (KERN_NO_SPACE);
}
/* Round up the grow amount modulo SGROWSIZ */
@@ -2427,8 +2446,8 @@ Retry:
}
vm_map_unlock(map);
- return (rv);
-
+ myreturn (rv);
+#undef myreturn
}
/*
@@ -2501,6 +2520,9 @@ vmspace_unshare(struct proc *p) {
* specified, the map may be changed to perform virtual
* copying operations, although the data referenced will
* remain the same.
+ *
+ * Can block locking maps and while calling vm_object_shadow().
+ * Will drop/reaquire the vm_mtx.
*/
int
vm_map_lookup(vm_map_t *var_map, /* IN/OUT */
@@ -2928,6 +2950,8 @@ vm_uiomove(mapa, srcobject, cp, cnta, uaddra, npages)
* Performs the copy_on_write operations necessary to allow the virtual copies
* into user space to work. This has to be called for write(2) system calls
* from other processes, file unlinking, and file size shrinkage.
+ *
+ * Requires that the vm_mtx is held
*/
void
vm_freeze_copyopts(object, froma, toa)
@@ -2938,6 +2962,7 @@ vm_freeze_copyopts(object, froma, toa)
vm_object_t robject;
vm_pindex_t idx;
+ mtx_assert(&vm_mtx, MA_OWNED);
if ((object == NULL) ||
((object->flags & OBJ_OPT) == 0))
return;
diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h
index daf2b6e..241a80c 100644
--- a/sys/vm/vm_map.h
+++ b/sys/vm/vm_map.h
@@ -196,6 +196,7 @@ struct vmspace {
caddr_t vm_minsaddr; /* user VA at max stack growth */
};
+#ifdef _KERNEL
/*
* Macros: vm_map_lock, etc.
* Function:
@@ -211,6 +212,7 @@ struct vmspace {
do { \
lockmgr(&(map)->lock, LK_DRAIN|LK_INTERLOCK, \
&(map)->ref_lock, curproc); \
+ mtx_lock(&vm_mtx); \
(map)->timestamp++; \
} while(0)
@@ -225,27 +227,33 @@ struct vmspace {
#define vm_map_lock(map) \
do { \
vm_map_printf("locking map LK_EXCLUSIVE: %p\n", map); \
- if (lockmgr(&(map)->lock, LK_EXCLUSIVE, (void *)0, curproc) != 0) \
+ mtx_assert(&vm_mtx, MA_OWNED); \
+ if (lockmgr(&(map)->lock, LK_EXCLUSIVE | LK_INTERLOCK, \
+ &vm_mtx, curproc) != 0) \
panic("vm_map_lock: failed to get lock"); \
+ mtx_lock(&vm_mtx); \
(map)->timestamp++; \
} while(0)
#define vm_map_unlock(map) \
do { \
vm_map_printf("locking map LK_RELEASE: %p\n", map); \
- lockmgr(&(map)->lock, LK_RELEASE, (void *)0, curproc); \
+ lockmgr(&(map)->lock, LK_RELEASE, NULL, curproc); \
} while (0)
#define vm_map_lock_read(map) \
do { \
vm_map_printf("locking map LK_SHARED: %p\n", map); \
- lockmgr(&(map)->lock, LK_SHARED, (void *)0, curproc); \
+ mtx_assert(&vm_mtx, MA_OWNED); \
+ lockmgr(&(map)->lock, LK_SHARED | LK_INTERLOCK, \
+ &vm_mtx, curproc); \
+ mtx_lock(&vm_mtx); \
} while (0)
#define vm_map_unlock_read(map) \
do { \
vm_map_printf("locking map LK_RELEASE: %p\n", map); \
- lockmgr(&(map)->lock, LK_RELEASE, (void *)0, curproc); \
+ lockmgr(&(map)->lock, LK_RELEASE, NULL, curproc); \
} while (0)
static __inline__ int
@@ -253,7 +261,8 @@ _vm_map_lock_upgrade(vm_map_t map, struct proc *p) {
int error;
vm_map_printf("locking map LK_EXCLUPGRADE: %p\n", map);
- error = lockmgr(&map->lock, LK_EXCLUPGRADE, (void *)0, p);
+ error = lockmgr(&map->lock, LK_EXCLUPGRADE | LK_INTERLOCK, &vm_mtx, p);
+ mtx_lock(&vm_mtx);
if (error == 0)
map->timestamp++;
return error;
@@ -264,7 +273,7 @@ _vm_map_lock_upgrade(vm_map_t map, struct proc *p) {
#define vm_map_lock_downgrade(map) \
do { \
vm_map_printf("locking map LK_DOWNGRADE: %p\n", map); \
- lockmgr(&(map)->lock, LK_DOWNGRADE, (void *)0, curproc); \
+ lockmgr(&(map)->lock, LK_DOWNGRADE, NULL, curproc); \
} while (0)
#define vm_map_set_recursive(map) \
@@ -287,6 +296,7 @@ _vm_map_lock_upgrade(vm_map_t map, struct proc *p) {
#define vm_map_min(map) ((map)->min_offset)
#define vm_map_max(map) ((map)->max_offset)
#define vm_map_pmap(map) ((map)->pmap)
+#endif /* _KERNEL */
static __inline struct pmap *
vmspace_pmap(struct vmspace *vmspace)
diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c
index 8dcb906..0f4e107 100644
--- a/sys/vm/vm_meter.c
+++ b/sys/vm/vm_meter.c
@@ -145,8 +145,10 @@ vmtotal(SYSCTL_HANDLER_ARGS)
/*
* Mark all objects as inactive.
*/
+ mtx_lock(&vm_mtx);
TAILQ_FOREACH(object, &vm_object_list, object_list)
vm_object_clear_flag(object, OBJ_ACTIVE);
+ mtx_unlock(&vm_mtx);
/*
* Calculate process statistics.
*/
@@ -197,6 +199,7 @@ vmtotal(SYSCTL_HANDLER_ARGS)
* Note active objects.
*/
paging = 0;
+ mtx_lock(&vm_mtx);
for (map = &p->p_vmspace->vm_map, entry = map->header.next;
entry != &map->header; entry = entry->next) {
if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) ||
@@ -205,6 +208,7 @@ vmtotal(SYSCTL_HANDLER_ARGS)
vm_object_set_flag(entry->object.vm_object, OBJ_ACTIVE);
paging |= entry->object.vm_object->paging_in_progress;
}
+ mtx_unlock(&vm_mtx);
if (paging)
totalp->t_pw++;
}
@@ -212,6 +216,7 @@ vmtotal(SYSCTL_HANDLER_ARGS)
/*
* Calculate object memory usage statistics.
*/
+ mtx_lock(&vm_mtx);
TAILQ_FOREACH(object, &vm_object_list, object_list) {
/*
* devices, like /dev/mem, will badly skew our totals
@@ -235,6 +240,7 @@ vmtotal(SYSCTL_HANDLER_ARGS)
}
}
totalp->t_free = cnt.v_free_count + cnt.v_cache_count;
+ mtx_unlock(&vm_mtx);
return (sysctl_handle_opaque(oidp, totalp, sizeof total, req));
}
diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c
index 14307b3..5de25d9 100644
--- a/sys/vm/vm_mmap.c
+++ b/sys/vm/vm_mmap.c
@@ -52,6 +52,7 @@
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <sys/sysproto.h>
#include <sys/filedesc.h>
#include <sys/proc.h>
@@ -515,14 +516,17 @@ msync(p, uap)
* the range of the map entry containing addr. This can be incorrect
* if the region splits or is coalesced with a neighbor.
*/
+ mtx_lock(&vm_mtx);
if (size == 0) {
vm_map_entry_t entry;
vm_map_lock_read(map);
rv = vm_map_lookup_entry(map, addr, &entry);
vm_map_unlock_read(map);
- if (rv == FALSE)
+ if (rv == FALSE) {
+ mtx_unlock(&vm_mtx);
return (EINVAL);
+ }
addr = entry->start;
size = entry->end - entry->start;
}
@@ -533,6 +537,7 @@ msync(p, uap)
rv = vm_map_clean(map, addr, addr + size, (flags & MS_ASYNC) == 0,
(flags & MS_INVALIDATE) != 0);
+ mtx_unlock(&vm_mtx);
switch (rv) {
case KERN_SUCCESS:
break;
@@ -589,10 +594,14 @@ munmap(p, uap)
/*
* Make sure entire range is allocated.
*/
- if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE))
+ mtx_lock(&vm_mtx);
+ if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE)) {
+ mtx_unlock(&vm_mtx);
return (EINVAL);
+ }
/* returns nothing but KERN_SUCCESS anyway */
(void) vm_map_remove(map, addr, addr + size);
+ mtx_unlock(&vm_mtx);
return (0);
}
@@ -624,6 +633,7 @@ mprotect(p, uap)
vm_offset_t addr;
vm_size_t size, pageoff;
register vm_prot_t prot;
+ int ret;
addr = (vm_offset_t) uap->addr;
size = uap->len;
@@ -640,8 +650,11 @@ mprotect(p, uap)
if (addr + size < addr)
return(EINVAL);
- switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot,
- FALSE)) {
+ mtx_lock(&vm_mtx);
+ ret = vm_map_protect(&p->p_vmspace->vm_map, addr,
+ addr + size, prot, FALSE);
+ mtx_unlock(&vm_mtx);
+ switch (ret) {
case KERN_SUCCESS:
return (0);
case KERN_PROTECTION_FAILURE:
@@ -665,6 +678,7 @@ minherit(p, uap)
vm_offset_t addr;
vm_size_t size, pageoff;
register vm_inherit_t inherit;
+ int ret;
addr = (vm_offset_t)uap->addr;
size = uap->len;
@@ -677,8 +691,12 @@ minherit(p, uap)
if (addr + size < addr)
return(EINVAL);
- switch (vm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size,
- inherit)) {
+ mtx_lock(&vm_mtx);
+ ret = vm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size,
+ inherit);
+ mtx_unlock(&vm_mtx);
+
+ switch (ret) {
case KERN_SUCCESS:
return (0);
case KERN_PROTECTION_FAILURE:
@@ -702,6 +720,7 @@ madvise(p, uap)
struct madvise_args *uap;
{
vm_offset_t start, end;
+ int ret;
/*
* Check for illegal behavior
@@ -729,9 +748,10 @@ madvise(p, uap)
start = trunc_page((vm_offset_t) uap->addr);
end = round_page((vm_offset_t) uap->addr + uap->len);
- if (vm_map_madvise(&p->p_vmspace->vm_map, start, end, uap->behav))
- return (EINVAL);
- return (0);
+ mtx_lock(&vm_mtx);
+ ret = vm_map_madvise(&p->p_vmspace->vm_map, start, end, uap->behav);
+ mtx_unlock(&vm_mtx);
+ return (ret ? EINVAL : 0);
}
#ifndef _SYS_SYSPROTO_H_
@@ -777,6 +797,7 @@ mincore(p, uap)
vec = uap->vec;
map = &p->p_vmspace->vm_map;
+ mtx_lock(&vm_mtx);
pmap = vmspace_pmap(p->p_vmspace);
vm_map_lock_read(map);
@@ -856,6 +877,7 @@ RestartScan:
* the map, we release the lock.
*/
vm_map_unlock_read(map);
+ mtx_unlock(&vm_mtx);
/*
* calculate index into user supplied byte vector
@@ -886,6 +908,7 @@ RestartScan:
* If the map has changed, due to the subyte, the previous
* output may be invalid.
*/
+ mtx_lock(&vm_mtx);
vm_map_lock_read(map);
if (timestamp != map->timestamp)
goto RestartScan;
@@ -900,6 +923,7 @@ RestartScan:
* the map, we release the lock.
*/
vm_map_unlock_read(map);
+ mtx_unlock(&vm_mtx);
/*
* Zero the last entries in the byte vector.
@@ -917,10 +941,12 @@ RestartScan:
* If the map has changed, due to the subyte, the previous
* output may be invalid.
*/
+ mtx_lock(&vm_mtx);
vm_map_lock_read(map);
if (timestamp != map->timestamp)
goto RestartScan;
vm_map_unlock_read(map);
+ mtx_unlock(&vm_mtx);
return (0);
}
@@ -965,7 +991,10 @@ mlock(p, uap)
return (error);
#endif
- error = vm_map_user_pageable(&p->p_vmspace->vm_map, addr, addr + size, FALSE);
+ mtx_lock(&vm_mtx);
+ error = vm_map_user_pageable(&p->p_vmspace->vm_map, addr,
+ addr + size, FALSE);
+ mtx_unlock(&vm_mtx);
return (error == KERN_SUCCESS ? 0 : ENOMEM);
}
@@ -1030,7 +1059,10 @@ munlock(p, uap)
return (error);
#endif
- error = vm_map_user_pageable(&p->p_vmspace->vm_map, addr, addr + size, TRUE);
+ mtx_lock(&vm_mtx);
+ error = vm_map_user_pageable(&p->p_vmspace->vm_map, addr,
+ addr + size, TRUE);
+ mtx_lock(&vm_mtx);
return (error == KERN_SUCCESS ? 0 : ENOMEM);
}
@@ -1077,7 +1109,9 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
if (*addr != trunc_page(*addr))
return (EINVAL);
fitit = FALSE;
+ mtx_lock(&vm_mtx);
(void) vm_map_remove(map, *addr, *addr + size);
+ mtx_unlock(&vm_mtx);
}
/*
@@ -1099,7 +1133,9 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
struct vattr vat;
int error;
+ mtx_lock(&Giant);
error = VOP_GETATTR(vp, &vat, p->p_ucred, p);
+ mtx_unlock(&Giant);
if (error)
return (error);
objsize = round_page(vat.va_size);
@@ -1148,6 +1184,7 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
maxprot |= VM_PROT_EXECUTE;
#endif
+ mtx_lock(&vm_mtx);
if (fitit) {
*addr = pmap_addr_hint(object, *addr, size);
}
@@ -1180,6 +1217,7 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
}
}
out:
+ mtx_unlock(&vm_mtx);
switch (rv) {
case KERN_SUCCESS:
return (0);
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
index 33fe834..30ef190 100644
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@@ -146,6 +146,24 @@ _vm_object_allocate(type, size, object)
vm_object_t object;
{
int incr;
+ int hadvmlock;
+
+ /*
+ * XXX: Not all callers seem to have the lock, compensate.
+ * I'm pretty sure we need to bump the gen count before possibly
+ * nuking the data contained within while under the lock.
+ */
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
+ object->generation++;
+ if ((object->type == OBJT_DEFAULT) || (object->type == OBJT_SWAP))
+ vm_object_set_flag(object, OBJ_ONEMAPPING);
+ TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
+ vm_object_count++;
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
+
TAILQ_INIT(&object->memq);
TAILQ_INIT(&object->shadow_head);
@@ -153,8 +171,6 @@ _vm_object_allocate(type, size, object)
object->size = size;
object->ref_count = 1;
object->flags = 0;
- if ((object->type == OBJT_DEFAULT) || (object->type == OBJT_SWAP))
- vm_object_set_flag(object, OBJ_ONEMAPPING);
object->paging_in_progress = 0;
object->resident_page_count = 0;
object->shadow_count = 0;
@@ -175,10 +191,6 @@ _vm_object_allocate(type, size, object)
*/
object->hash_rand = object_hash_rand - 129;
- object->generation++;
-
- TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
- vm_object_count++;
object_hash_rand = object->hash_rand;
}
@@ -226,7 +238,6 @@ vm_object_allocate(type, size)
vm_object_t result;
result = (vm_object_t) zalloc(obj_zone);
-
_vm_object_allocate(type, size, result);
return (result);
@@ -250,18 +261,29 @@ vm_object_reference(object)
object->ref_count++;
if (object->type == OBJT_VNODE) {
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
while (vget((struct vnode *) object->handle, LK_RETRY|LK_NOOBJ, curproc)) {
printf("vm_object_reference: delay in getting object\n");
}
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
}
}
+/*
+ * handle deallocating a object of type OBJT_VNODE
+ *
+ * requires vm_mtx
+ * may block
+ */
void
vm_object_vndeallocate(object)
vm_object_t object;
{
struct vnode *vp = (struct vnode *) object->handle;
+ mtx_assert(&vm_mtx, MA_OWNED);
KASSERT(object->type == OBJT_VNODE,
("vm_object_vndeallocate: not a vnode object"));
KASSERT(vp != NULL, ("vm_object_vndeallocate: missing vp"));
@@ -277,7 +299,14 @@ vm_object_vndeallocate(object)
vp->v_flag &= ~VTEXT;
vm_object_clear_flag(object, OBJ_OPT);
}
+ /*
+ * vrele may need a vop lock
+ */
+ mtx_unlock(VM_OBJECT_MTX(object));
+ mtx_lock(&Giant);
vrele(vp);
+ mtx_unlock(&Giant);
+ mtx_lock(VM_OBJECT_MTX(object));
}
/*
@@ -290,6 +319,7 @@ vm_object_vndeallocate(object)
* may be relinquished.
*
* No object may be locked.
+ * vm_mtx must be held
*/
void
vm_object_deallocate(object)
@@ -297,6 +327,7 @@ vm_object_deallocate(object)
{
vm_object_t temp;
+ mtx_assert(&vm_mtx, MA_OWNED);
while (object != NULL) {
if (object->type == OBJT_VNODE) {
@@ -422,7 +453,11 @@ vm_object_terminate(object)
vm_object_page_clean(object, 0, 0, OBJPC_SYNC);
vp = (struct vnode *) object->handle;
+ mtx_unlock(VM_OBJECT_MTX(object));
+ mtx_lock(&Giant);
vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0);
+ mtx_unlock(&Giant);
+ mtx_lock(VM_OBJECT_MTX(object));
}
KASSERT(object->ref_count == 0,
@@ -507,6 +542,7 @@ vm_object_page_clean(object, start, end, flags)
vm_page_t ma[vm_pageout_page_count];
int curgeneration;
+ mtx_assert(VM_OBJECT_MTX(object), MA_OWNED);
if (object->type != OBJT_VNODE ||
(object->flags & OBJ_MIGHTBEDIRTY) == 0)
return;
@@ -962,6 +998,7 @@ vm_object_backing_scan(vm_object_t object, int op)
vm_pindex_t backing_offset_index;
s = splvm();
+ mtx_assert(&vm_mtx, MA_OWNED);
backing_object = object->backing_object;
backing_offset_index = OFF_TO_IDX(object->backing_object_offset);
@@ -1175,6 +1212,9 @@ void
vm_object_collapse(object)
vm_object_t object;
{
+
+ mtx_assert(&vm_mtx, MA_OWNED);
+
while (TRUE) {
vm_object_t backing_object;
@@ -1386,6 +1426,8 @@ vm_object_page_remove(object, start, end, clean_only)
unsigned int size;
int all;
+ mtx_assert(&vm_mtx, MA_OWNED);
+
if (object == NULL ||
object->resident_page_count == 0)
return;
@@ -1502,6 +1544,8 @@ vm_object_coalesce(prev_object, prev_pindex, prev_size, next_size)
{
vm_pindex_t next_pindex;
+ mtx_assert(&vm_mtx, MA_OWNED);
+
if (prev_object == NULL) {
return (TRUE);
}
diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h
index ba4c026..2b29baf 100644
--- a/sys/vm/vm_object.h
+++ b/sys/vm/vm_object.h
@@ -169,34 +169,49 @@ extern vm_object_t kmem_object;
#ifdef _KERNEL
+/*
+ * For now a global vm lock.
+ */
+#define VM_OBJECT_MTX(object) (&vm_mtx)
+
static __inline void
vm_object_set_flag(vm_object_t object, u_short bits)
{
- atomic_set_short(&object->flags, bits);
+
+ mtx_assert(VM_OBJECT_MTX(object), MA_OWNED);
+ object->flags |= bits;
}
static __inline void
vm_object_clear_flag(vm_object_t object, u_short bits)
{
- atomic_clear_short(&object->flags, bits);
+
+ mtx_assert(VM_OBJECT_MTX(object), MA_OWNED);
+ object->flags &= ~bits;
}
static __inline void
vm_object_pip_add(vm_object_t object, short i)
{
- atomic_add_short(&object->paging_in_progress, i);
+
+ mtx_assert(VM_OBJECT_MTX(object), MA_OWNED);
+ object->paging_in_progress += i;
}
static __inline void
vm_object_pip_subtract(vm_object_t object, short i)
{
- atomic_subtract_short(&object->paging_in_progress, i);
+
+ mtx_assert(VM_OBJECT_MTX(object), MA_OWNED);
+ object->paging_in_progress -= i;
}
static __inline void
vm_object_pip_wakeup(vm_object_t object)
{
- atomic_subtract_short(&object->paging_in_progress, 1);
+
+ mtx_assert(VM_OBJECT_MTX(object), MA_OWNED);
+ object->paging_in_progress--;
if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) {
vm_object_clear_flag(object, OBJ_PIPWNT);
wakeup(object);
@@ -206,8 +221,10 @@ vm_object_pip_wakeup(vm_object_t object)
static __inline void
vm_object_pip_wakeupn(vm_object_t object, short i)
{
+
+ mtx_assert(VM_OBJECT_MTX(object), MA_OWNED);
if (i)
- atomic_subtract_short(&object->paging_in_progress, i);
+ object->paging_in_progress -= i;
if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) {
vm_object_clear_flag(object, OBJ_PIPWNT);
wakeup(object);
@@ -217,11 +234,13 @@ vm_object_pip_wakeupn(vm_object_t object, short i)
static __inline void
vm_object_pip_sleep(vm_object_t object, char *waitid)
{
+
+ mtx_assert(VM_OBJECT_MTX(object), MA_OWNED);
if (object->paging_in_progress) {
int s = splvm();
if (object->paging_in_progress) {
vm_object_set_flag(object, OBJ_PIPWNT);
- tsleep(object, PVM, waitid, 0);
+ msleep(object, VM_OBJECT_MTX(object), PVM, waitid, 0);
}
splx(s);
}
@@ -230,6 +249,8 @@ vm_object_pip_sleep(vm_object_t object, char *waitid)
static __inline void
vm_object_pip_wait(vm_object_t object, char *waitid)
{
+
+ mtx_assert(VM_OBJECT_MTX(object), MA_OWNED);
while (object->paging_in_progress)
vm_object_pip_sleep(object, waitid);
}
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 5865d70..2ae0fe7 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -71,6 +71,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <sys/malloc.h>
#include <sys/proc.h>
#include <sys/vmmeter.h>
@@ -147,6 +148,7 @@ vm_set_page_size()
*
* Add a new page to the freelist for use by the system.
* Must be called at splhigh().
+ * Must be called with the vm_mtx held.
*/
vm_page_t
vm_add_new_page(pa)
@@ -154,6 +156,7 @@ vm_add_new_page(pa)
{
vm_page_t m;
+ mtx_assert(&vm_mtx, MA_OWNED);
++cnt.v_page_count;
++cnt.v_free_count;
m = PHYS_TO_VM_PAGE(pa);
@@ -360,6 +363,7 @@ vm_page_insert(m, object, pindex)
{
register struct vm_page **bucket;
+ mtx_assert(&vm_mtx, MA_OWNED);
if (m->object != NULL)
panic("vm_page_insert: already inserted");
@@ -419,6 +423,7 @@ vm_page_remove(m)
{
vm_object_t object;
+ mtx_assert(&vm_mtx, MA_OWNED);
if (m->object == NULL)
return;
@@ -482,6 +487,8 @@ vm_page_remove(m)
* an interrupt makes a change, but the generation algorithm will not
* operate properly in an SMP environment where both cpu's are able to run
* kernel code simultaneously.
+ * NOTE: under the giant vm lock we should be ok, there should be
+ * no reason to check vm_page_bucket_generation
*
* The object must be locked. No side effects.
* This routine may not block.
@@ -596,6 +603,8 @@ vm_page_unqueue(m)
{
int queue = m->queue;
struct vpgqueues *pq;
+
+ mtx_assert(&vm_mtx, MA_OWNED);
if (queue != PQ_NONE) {
m->queue = PQ_NONE;
pq = &vm_page_queues[queue];
@@ -636,6 +645,7 @@ _vm_page_list_find(basequeue, index)
vm_page_t m = NULL;
struct vpgqueues *pq;
+ mtx_assert(&vm_mtx, MA_OWNED);
pq = &vm_page_queues[basequeue];
/*
@@ -673,6 +683,7 @@ vm_page_select_cache(object, pindex)
{
vm_page_t m;
+ mtx_assert(&vm_mtx, MA_OWNED);
while (TRUE) {
m = vm_page_list_find(
PQ_CACHE,
@@ -724,7 +735,7 @@ vm_page_select_free(vm_object_t object, vm_pindex_t pindex, boolean_t prefer_zer
* VM_ALLOC_INTERRUPT interrupt time request
* VM_ALLOC_ZERO zero page
*
- * Object must be locked.
+ * vm_mtx must be locked.
* This routine may not block.
*
* Additional special handling is required when called from an
@@ -741,6 +752,7 @@ vm_page_alloc(object, pindex, page_req)
register vm_page_t m = NULL;
int s;
+ mtx_assert(&vm_mtx, MA_OWNED);
KASSERT(!vm_page_lookup(object, pindex),
("vm_page_alloc: page already allocated"));
@@ -873,13 +885,13 @@ vm_wait()
s = splvm();
if (curproc == pageproc) {
vm_pageout_pages_needed = 1;
- tsleep(&vm_pageout_pages_needed, PSWP, "VMWait", 0);
+ msleep(&vm_pageout_pages_needed, &vm_mtx, PSWP, "VMWait", 0);
} else {
if (!vm_pages_needed) {
vm_pages_needed = 1;
wakeup(&vm_pages_needed);
}
- tsleep(&cnt.v_free_count, PVM, "vmwait", 0);
+ msleep(&cnt.v_free_count, &vm_mtx, PVM, "vmwait", 0);
}
splx(s);
}
@@ -910,61 +922,6 @@ vm_await()
splx(s);
}
-#if 0
-/*
- * vm_page_sleep:
- *
- * Block until page is no longer busy.
- */
-
-int
-vm_page_sleep(vm_page_t m, char *msg, char *busy) {
- int slept = 0;
- if ((busy && *busy) || (m->flags & PG_BUSY)) {
- int s;
- s = splvm();
- if ((busy && *busy) || (m->flags & PG_BUSY)) {
- vm_page_flag_set(m, PG_WANTED);
- tsleep(m, PVM, msg, 0);
- slept = 1;
- }
- splx(s);
- }
- return slept;
-}
-
-#endif
-
-#if 0
-
-/*
- * vm_page_asleep:
- *
- * Similar to vm_page_sleep(), but does not block. Returns 0 if
- * the page is not busy, or 1 if the page is busy.
- *
- * This routine has the side effect of calling asleep() if the page
- * was busy (1 returned).
- */
-
-int
-vm_page_asleep(vm_page_t m, char *msg, char *busy) {
- int slept = 0;
- if ((busy && *busy) || (m->flags & PG_BUSY)) {
- int s;
- s = splvm();
- if ((busy && *busy) || (m->flags & PG_BUSY)) {
- vm_page_flag_set(m, PG_WANTED);
- asleep(m, PVM, msg, 0);
- slept = 1;
- }
- splx(s);
- }
- return slept;
-}
-
-#endif
-
/*
* vm_page_activate:
*
@@ -982,6 +939,7 @@ vm_page_activate(m)
int s;
s = splvm();
+ mtx_assert(&vm_mtx, MA_OWNED);
if (m->queue != PQ_ACTIVE) {
if ((m->queue - m->pc) == PQ_CACHE)
cnt.v_reactivated++;
@@ -1056,6 +1014,7 @@ vm_page_free_toq(vm_page_t m)
s = splvm();
+ mtx_assert(&vm_mtx, MA_OWNED);
cnt.v_tfree++;
if (m->busy || ((m->queue - m->pc) == PQ_FREE) ||
@@ -1293,6 +1252,7 @@ _vm_page_deactivate(vm_page_t m, int athead)
{
int s;
+ mtx_assert(&vm_mtx, MA_OWNED);
/*
* Ignore if already inactive.
*/
@@ -1330,6 +1290,8 @@ vm_page_deactivate(vm_page_t m)
int
vm_page_try_to_cache(vm_page_t m)
{
+
+ mtx_assert(VM_PAGE_MTX(m), MA_OWNED);
if (m->dirty || m->hold_count || m->busy || m->wire_count ||
(m->flags & (PG_BUSY|PG_UNMANAGED))) {
return(0);
@@ -1354,6 +1316,7 @@ vm_page_cache(m)
{
int s;
+ mtx_assert(&vm_mtx, MA_OWNED);
if ((m->flags & (PG_BUSY|PG_UNMANAGED)) || m->busy || m->wire_count) {
printf("vm_page_cache: attempting to cache busy page\n");
return;
@@ -1411,6 +1374,7 @@ vm_page_dontneed(m)
int dnw;
int head;
+ mtx_assert(&vm_mtx, MA_OWNED);
dnw = ++dnweight;
/*
@@ -1451,6 +1415,7 @@ vm_page_dontneed(m)
* to be in the object. If the page doesn't exist, allocate it.
*
* This routine may block.
+ * Requires vm_mtx.
*/
vm_page_t
vm_page_grab(object, pindex, allocflags)
@@ -1458,10 +1423,10 @@ vm_page_grab(object, pindex, allocflags)
vm_pindex_t pindex;
int allocflags;
{
-
vm_page_t m;
int s, generation;
+ mtx_assert(&vm_mtx, MA_OWNED);
retrylookup:
if ((m = vm_page_lookup(object, pindex)) != NULL) {
if (m->busy || (m->flags & PG_BUSY)) {
@@ -1471,7 +1436,7 @@ retrylookup:
while ((object->generation == generation) &&
(m->busy || (m->flags & PG_BUSY))) {
vm_page_flag_set(m, PG_WANTED | PG_REFERENCED);
- tsleep(m, PVM, "pgrbwt", 0);
+ msleep(m, &vm_mtx, PVM, "pgrbwt", 0);
if ((allocflags & VM_ALLOC_RETRY) == 0) {
splx(s);
return NULL;
@@ -1534,6 +1499,8 @@ vm_page_bits(int base, int size)
* This routine may not block.
*
* (base + size) must be less then or equal to PAGE_SIZE.
+ *
+ * vm_mtx needs to be held
*/
void
vm_page_set_validclean(m, base, size)
@@ -1545,6 +1512,7 @@ vm_page_set_validclean(m, base, size)
int frag;
int endoff;
+ mtx_assert(&vm_mtx, MA_OWNED);
if (size == 0) /* handle degenerate case */
return;
@@ -1618,6 +1586,8 @@ vm_page_clear_dirty(m, base, size)
int base;
int size;
{
+
+ mtx_assert(&vm_mtx, MA_OWNED);
m->dirty &= ~vm_page_bits(base, size);
}
@@ -1637,6 +1607,7 @@ vm_page_set_invalid(m, base, size)
{
int bits;
+ mtx_assert(&vm_mtx, MA_OWNED);
bits = vm_page_bits(base, size);
m->valid &= ~bits;
m->dirty &= ~bits;
@@ -1923,8 +1894,19 @@ contigmalloc(size, type, flags, low, high, alignment, boundary)
unsigned long alignment;
unsigned long boundary;
{
- return contigmalloc1(size, type, flags, low, high, alignment, boundary,
+ void * ret;
+ int hadvmlock;
+
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
+ ret = contigmalloc1(size, type, flags, low, high, alignment, boundary,
kernel_map);
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
+
+ return (ret);
+
}
void
@@ -1933,7 +1915,14 @@ contigfree(addr, size, type)
unsigned long size;
struct malloc_type *type;
{
+ int hadvmlock;
+
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
kmem_free(kernel_map, (vm_offset_t)addr, size);
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
}
vm_offset_t
@@ -1943,8 +1932,18 @@ vm_page_alloc_contig(size, low, high, alignment)
vm_offset_t high;
vm_offset_t alignment;
{
- return ((vm_offset_t)contigmalloc1(size, M_DEVBUF, M_NOWAIT, low, high,
+ vm_offset_t ret;
+ int hadvmlock;
+
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
+ ret = ((vm_offset_t)contigmalloc1(size, M_DEVBUF, M_NOWAIT, low, high,
alignment, 0ul, kernel_map));
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
+ return (ret);
+
}
#include "opt_ddb.h"
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
index dc8290e..e1c1cc4 100644
--- a/sys/vm/vm_page.h
+++ b/sys/vm/vm_page.h
@@ -305,19 +305,28 @@ extern long first_page; /* first physical page number */
(&vm_page_array[atop(pa) - first_page ])
/*
+ * For now, a global vm lock
+ */
+#define VM_PAGE_MTX(m) (&vm_mtx)
+
+/*
* Functions implemented as macros
*/
static __inline void
vm_page_flag_set(vm_page_t m, unsigned short bits)
{
- atomic_set_short(&(m)->flags, bits);
+
+ mtx_assert(VM_PAGE_MTX(m), MA_OWNED);
+ m->flags |= bits;
}
static __inline void
vm_page_flag_clear(vm_page_t m, unsigned short bits)
{
- atomic_clear_short(&(m)->flags, bits);
+
+ mtx_assert(VM_PAGE_MTX(m), MA_OWNED);
+ m->flags &= ~bits;
}
#if 0
@@ -332,7 +341,9 @@ vm_page_assert_wait(vm_page_t m, int interruptible)
static __inline void
vm_page_busy(vm_page_t m)
{
- KASSERT((m->flags & PG_BUSY) == 0, ("vm_page_busy: page already busy!!!"));
+
+ KASSERT((m->flags & PG_BUSY) == 0,
+ ("vm_page_busy: page already busy!!!"));
vm_page_flag_set(m, PG_BUSY);
}
@@ -375,13 +386,17 @@ vm_page_wakeup(vm_page_t m)
static __inline void
vm_page_io_start(vm_page_t m)
{
- atomic_add_char(&(m)->busy, 1);
+
+ mtx_assert(VM_PAGE_MTX(m), MA_OWNED);
+ m->busy++;
}
static __inline void
vm_page_io_finish(vm_page_t m)
{
- atomic_subtract_char(&m->busy, 1);
+
+ mtx_assert(VM_PAGE_MTX(m), MA_OWNED);
+ m->busy--;
if (m->busy == 0)
vm_page_flash(m);
}
@@ -447,12 +462,16 @@ void vm_page_free_toq(vm_page_t m);
static __inline void
vm_page_hold(vm_page_t mem)
{
+
+ mtx_assert(VM_PAGE_MTX(m), MA_OWNED);
mem->hold_count++;
}
static __inline void
vm_page_unhold(vm_page_t mem)
{
+
+ mtx_assert(VM_PAGE_MTX(m), MA_OWNED);
--mem->hold_count;
KASSERT(mem->hold_count >= 0, ("vm_page_unhold: hold count < 0!!!"));
}
@@ -565,7 +584,7 @@ vm_page_sleep_busy(vm_page_t m, int also_m_busy, const char *msg)
* Page is busy. Wait and retry.
*/
vm_page_flag_set(m, PG_WANTED | PG_REFERENCED);
- tsleep(m, PVM, msg, 0);
+ msleep(m, VM_PAGE_MTX(m), PVM, msg, 0);
}
splx(s);
return(TRUE);
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index ce333cf..60e3f21 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -447,6 +447,8 @@ vm_pageout_flush(mc, count, flags)
* backing_objects.
*
* The object and map must be locked.
+ *
+ * Requires the vm_mtx
*/
static void
vm_pageout_object_deactivate_pages(map, object, desired, map_remove_only)
@@ -460,6 +462,7 @@ vm_pageout_object_deactivate_pages(map, object, desired, map_remove_only)
int remove_mode;
int s;
+ mtx_assert(&vm_mtx, MA_OWNED);
if (object->type == OBJT_DEVICE || object->type == OBJT_PHYS)
return;
@@ -1322,7 +1325,7 @@ vm_pageout()
{
int pass;
- mtx_lock(&Giant);
+ mtx_lock(&vm_mtx);
/*
* Initialize some paging parameters.
@@ -1412,7 +1415,8 @@ vm_pageout()
*/
++pass;
if (pass > 1)
- tsleep(&vm_pages_needed, PVM, "psleep", hz/2);
+ msleep(&vm_pages_needed, &vm_mtx, PVM,
+ "psleep", hz/2);
} else {
/*
* Good enough, sleep & handle stats. Prime the pass
@@ -1422,7 +1426,7 @@ vm_pageout()
pass = 1;
else
pass = 0;
- error = tsleep(&vm_pages_needed,
+ error = msleep(&vm_pages_needed, &vm_mtx,
PVM, "psleep", vm_pageout_stats_interval * hz);
if (error && !vm_pages_needed) {
splx(s);
@@ -1466,12 +1470,13 @@ vm_daemon()
{
struct proc *p;
- mtx_lock(&Giant);
+ mtx_lock(&vm_mtx);
while (TRUE) {
- tsleep(&vm_daemon_needed, PPAUSE, "psleep", 0);
+ msleep(&vm_daemon_needed, &vm_mtx, PPAUSE, "psleep", 0);
if (vm_pageout_req_swapout) {
swapout_procs(vm_pageout_req_swapout);
+ mtx_assert(&vm_mtx, MA_OWNED);
vm_pageout_req_swapout = 0;
}
/*
@@ -1479,6 +1484,7 @@ vm_daemon()
* process is swapped out -- deactivate pages
*/
+ mtx_unlock(&vm_mtx);
sx_slock(&allproc_lock);
LIST_FOREACH(p, &allproc, p_list) {
vm_pindex_t limit, size;
@@ -1515,13 +1521,16 @@ vm_daemon()
limit = 0; /* XXX */
mtx_unlock_spin(&sched_lock);
+ mtx_lock(&vm_mtx);
size = vmspace_resident_count(p->p_vmspace);
if (limit >= 0 && size >= limit) {
vm_pageout_map_deactivate_pages(
&p->p_vmspace->vm_map, limit);
}
+ mtx_unlock(&vm_mtx);
}
sx_sunlock(&allproc_lock);
+ mtx_lock(&vm_mtx);
}
}
#endif
diff --git a/sys/vm/vm_pager.c b/sys/vm/vm_pager.c
index b13c9c0..e53a14c 100644
--- a/sys/vm/vm_pager.c
+++ b/sys/vm/vm_pager.c
@@ -240,21 +240,32 @@ vm_pager_bufferinit()
* need to perform page-level validation (e.g. the device pager).
*/
vm_object_t
-vm_pager_allocate(objtype_t type, void *handle, vm_ooffset_t size, vm_prot_t prot,
- vm_ooffset_t off)
+vm_pager_allocate(objtype_t type, void *handle, vm_ooffset_t size,
+ vm_prot_t prot, vm_ooffset_t off)
{
+ vm_object_t ret;
struct pagerops *ops;
+ int hadvmlock;
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
ops = pagertab[type];
if (ops)
- return ((*ops->pgo_alloc) (handle, size, prot, off));
- return (NULL);
+ ret = (*ops->pgo_alloc) (handle, size, prot, off);
+ else
+ ret = NULL;
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
+ return (ret);
}
void
vm_pager_deallocate(object)
vm_object_t object;
{
+
+ mtx_assert(&vm_mtx, MA_OWNED);
(*pagertab[object->type]->pgo_dealloc) (object);
}
@@ -374,6 +385,8 @@ initpbuf(struct buf *bp)
*
* NOTE: pfreecnt can be NULL, but this 'feature' will be removed
* relatively soon when the rest of the subsystems get smart about it. XXX
+ *
+ * vm_mtx can be held or unheld
*/
struct buf *
getpbuf(pfreecnt)
@@ -381,8 +394,12 @@ getpbuf(pfreecnt)
{
int s;
struct buf *bp;
+ int hadvmlock;
s = splvm();
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (hadvmlock)
+ mtx_unlock(&vm_mtx);
mtx_lock(&pbuf_mtx);
for (;;) {
@@ -407,6 +424,8 @@ getpbuf(pfreecnt)
splx(s);
initpbuf(bp);
+ if (hadvmlock)
+ mtx_lock(&vm_mtx);
return bp;
}
diff --git a/sys/vm/vm_pager.h b/sys/vm/vm_pager.h
index f54c739..b4511ca 100644
--- a/sys/vm/vm_pager.h
+++ b/sys/vm/vm_pager.h
@@ -124,10 +124,12 @@ vm_pager_get_pages(
) {
int r;
+ mtx_assert(&vm_mtx, MA_OWNED);
r = (*pagertab[object->type]->pgo_getpages)(object, m, count, reqpage);
if (r == VM_PAGER_OK && m[reqpage]->valid != VM_PAGE_BITS_ALL) {
vm_page_zero_invalid(m[reqpage], TRUE);
}
+ mtx_assert(&vm_mtx, MA_OWNED);
return(r);
}
@@ -139,8 +141,11 @@ vm_pager_put_pages(
int flags,
int *rtvals
) {
+
+ mtx_assert(&vm_mtx, MA_OWNED);
(*pagertab[object->type]->pgo_putpages)
(object, m, count, flags, rtvals);
+ mtx_assert(&vm_mtx, MA_OWNED);
}
/*
@@ -161,7 +166,13 @@ vm_pager_has_page(
int *before,
int *after
) {
- return ((*pagertab[object->type]->pgo_haspage) (object, offset, before, after));
+ boolean_t ret;
+
+ mtx_assert(&vm_mtx, MA_OWNED);
+ ret = (*pagertab[object->type]->pgo_haspage)
+ (object, offset, before, after);
+ mtx_assert(&vm_mtx, MA_OWNED);
+ return (ret);
}
/*
@@ -175,8 +186,11 @@ vm_pager_has_page(
static __inline void
vm_pager_page_unswapped(vm_page_t m)
{
+
+ mtx_assert(&vm_mtx, MA_OWNED);
if (pagertab[m->object->type]->pgo_pageunswapped)
(*pagertab[m->object->type]->pgo_pageunswapped)(m);
+ mtx_assert(&vm_mtx, MA_OWNED);
}
#endif
diff --git a/sys/vm/vm_unix.c b/sys/vm/vm_unix.c
index f9b24f8..4861306 100644
--- a/sys/vm/vm_unix.c
+++ b/sys/vm/vm_unix.c
@@ -49,6 +49,9 @@
#include <sys/sysproto.h>
#include <sys/proc.h>
#include <sys/resourcevar.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/systm.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
@@ -93,6 +96,7 @@ obreak(p, uap)
return EINVAL;
}
+ mtx_lock(&vm_mtx);
if (new > old) {
vm_size_t diff;
@@ -100,16 +104,19 @@ obreak(p, uap)
rv = vm_map_find(&vm->vm_map, NULL, 0, &old, diff, FALSE,
VM_PROT_ALL, VM_PROT_ALL, 0);
if (rv != KERN_SUCCESS) {
+ mtx_unlock(&vm_mtx);
return (ENOMEM);
}
vm->vm_dsize += btoc(diff);
} else if (new < old) {
rv = vm_map_remove(&vm->vm_map, new, old);
if (rv != KERN_SUCCESS) {
+ mtx_unlock(&vm_mtx);
return (ENOMEM);
}
vm->vm_dsize -= btoc(old - new);
}
+ mtx_unlock(&vm_mtx);
return (0);
}
diff --git a/sys/vm/vm_zone.c b/sys/vm/vm_zone.c
index 4cddadc..30fadbe 100644
--- a/sys/vm/vm_zone.c
+++ b/sys/vm/vm_zone.c
@@ -137,6 +137,7 @@ zinitna(vm_zone_t z, vm_object_t obj, char *name, int size,
* in pages as needed.
*/
if (z->zflags & ZONE_INTERRUPT) {
+ int hadvmlock;
totsize = round_page(z->zsize * nentries);
atomic_add_int(&zone_kmem_kvaspace, totsize);
@@ -145,12 +146,17 @@ zinitna(vm_zone_t z, vm_object_t obj, char *name, int size,
return 0;
z->zpagemax = totsize / PAGE_SIZE;
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
if (obj == NULL) {
z->zobj = vm_object_allocate(OBJT_DEFAULT, z->zpagemax);
} else {
z->zobj = obj;
_vm_object_allocate(OBJT_DEFAULT, z->zpagemax, obj);
}
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
z->zallocflag = VM_ALLOC_INTERRUPT;
z->zmax += nentries;
} else {
@@ -262,7 +268,6 @@ _zget(vm_zone_t z)
void *item;
KASSERT(z != NULL, ("invalid zone"));
- mtx_assert(&z->zmtx, MA_OWNED);
if (z->zflags & ZONE_INTERRUPT) {
item = (char *) z->zkva + z->zpagecount * PAGE_SIZE;
@@ -299,16 +304,13 @@ _zget(vm_zone_t z)
* We can wait, so just do normal map allocation in the appropriate
* map.
*/
+ mtx_unlock(&z->zmtx);
if (lockstatus(&kernel_map->lock, NULL)) {
- mtx_unlock(&z->zmtx);
item = (void *) kmem_malloc(kmem_map, nbytes, M_WAITOK);
- mtx_lock(&z->zmtx);
if (item != NULL)
atomic_add_int(&zone_kmem_pages, z->zalloc);
} else {
- mtx_unlock(&z->zmtx);
item = (void *) kmem_alloc(kernel_map, nbytes);
- mtx_lock(&z->zmtx);
if (item != NULL)
atomic_add_int(&zone_kern_pages, z->zalloc);
}
@@ -318,6 +320,7 @@ _zget(vm_zone_t z)
nbytes = 0;
}
nitems = nbytes / z->zsize;
+ mtx_lock(&z->zmtx);
}
z->ztotal += nitems;
@@ -361,14 +364,17 @@ void *
zalloc(vm_zone_t z)
{
void *item;
+ int hadvmlock;
KASSERT(z != NULL, ("invalid zone"));
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
mtx_lock(&z->zmtx);
if (z->zfreecnt <= z->zfreemin) {
item = _zget(z);
- mtx_unlock(&z->zmtx);
- return item;
+ goto out;
}
item = z->zitems;
@@ -381,8 +387,11 @@ zalloc(vm_zone_t z)
z->zfreecnt--;
z->znalloc++;
-
+
+out:
mtx_unlock(&z->zmtx);
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
return item;
}
@@ -392,8 +401,13 @@ zalloc(vm_zone_t z)
void
zfree(vm_zone_t z, void *item)
{
+ int hadvmlock;
+
KASSERT(z != NULL, ("invalid zone"));
KASSERT(item != NULL, ("invalid item"));
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
mtx_lock(&z->zmtx);
((void **) item)[0] = z->zitems;
@@ -405,6 +419,8 @@ zfree(vm_zone_t z, void *item)
z->zitems = item;
z->zfreecnt++;
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
mtx_unlock(&z->zmtx);
}
diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c
index e9400b8..12763c8 100644
--- a/sys/vm/vnode_pager.c
+++ b/sys/vm/vnode_pager.c
@@ -103,6 +103,7 @@ vnode_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
vm_object_t object;
struct vnode *vp;
+ mtx_assert(&vm_mtx, MA_OWNED);
/*
* Pageout to vnode, no can do yet.
*/
@@ -122,11 +123,15 @@ vnode_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
* Prevent race condition when allocating the object. This
* can happen with NFS vnodes since the nfsnode isn't locked.
*/
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
while (vp->v_flag & VOLOCK) {
vp->v_flag |= VOWANT;
tsleep(vp, PVM, "vnpobj", 0);
}
vp->v_flag |= VOLOCK;
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
/*
* If the object is being terminated, wait for it to
@@ -134,7 +139,7 @@ vnode_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
*/
while (((object = vp->v_object) != NULL) &&
(object->flags & OBJ_DEAD)) {
- tsleep(object, PVM, "vadead", 0);
+ msleep(object, &vm_mtx, PVM, "vadead", 0);
}
if (vp->v_usecount == 0)
@@ -157,11 +162,15 @@ vnode_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
vp->v_usecount++;
}
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
vp->v_flag &= ~VOLOCK;
if (vp->v_flag & VOWANT) {
vp->v_flag &= ~VOWANT;
wakeup(vp);
}
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
return (object);
}
@@ -221,8 +230,12 @@ vnode_pager_haspage(object, pindex, before, after)
blocksperpage = (PAGE_SIZE / bsize);
reqblock = pindex * blocksperpage;
}
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
err = VOP_BMAP(vp, reqblock, (struct vnode **) 0, &bn,
after, before);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
if (err)
return TRUE;
if ( bn == -1)
@@ -285,6 +298,11 @@ vnode_pager_setsize(vp, nsize)
* File has shrunk. Toss any cached pages beyond the new EOF.
*/
if (nsize < object->un_pager.vnp.vnp_size) {
+ int hadvmlock;
+
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
vm_freeze_copyopts(object, OFF_TO_IDX(nsize), object->size);
if (nobjsize < object->size) {
vm_object_page_remove(object, nobjsize, object->size,
@@ -325,6 +343,8 @@ vnode_pager_setsize(vp, nsize)
m->dirty = VM_PAGE_BITS_ALL;
}
}
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
}
object->un_pager.vnp.vnp_size = nsize;
object->size = nobjsize;
@@ -542,8 +562,8 @@ vnode_pager_input_old(object, m)
*/
/*
- * EOPNOTSUPP is no longer legal. For local media VFS's that do not
- * implement their own VOP_GETPAGES, their VOP_GETPAGES should call to
+ * Local media VFS's that do not implement their own VOP_GETPAGES
+ * should have their VOP_GETPAGES should call to
* vnode_pager_generic_getpages() to implement the previous behaviour.
*
* All other FS's should use the bypass to get to the local media
@@ -560,16 +580,11 @@ vnode_pager_getpages(object, m, count, reqpage)
struct vnode *vp;
int bytes = count * PAGE_SIZE;
+ mtx_assert(&vm_mtx, MA_OWNED);
vp = object->handle;
- /*
- * XXX temporary diagnostic message to help track stale FS code,
- * Returning EOPNOTSUPP from here may make things unhappy.
- */
rtval = VOP_GETPAGES(vp, m, bytes, reqpage, 0);
- if (rtval == EOPNOTSUPP) {
- printf("vnode_pager: *** WARNING *** stale FS getpages\n");
- rtval = vnode_pager_generic_getpages( vp, m, bytes, reqpage);
- }
+ KASSERT(rtval != EOPNOTSUPP,
+ ("vnode_pager: FS getpages not implemented\n"));
return rtval;
}
@@ -891,13 +906,19 @@ vnode_pager_putpages(object, m, count, sync, rtvals)
vp = object->handle;
if (vp->v_type != VREG)
mp = NULL;
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
(void)vn_start_write(vp, &mp, V_WAIT);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
rtval = VOP_PUTPAGES(vp, m, bytes, sync, rtvals, 0);
- if (rtval == EOPNOTSUPP) {
- printf("vnode_pager: *** WARNING *** stale FS putpages\n");
- rtval = vnode_pager_generic_putpages( vp, m, bytes, sync, rtvals);
- }
+ KASSERT(rtval != EOPNOTSUPP,
+ ("vnode_pager: stale FS putpages\n"));
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
vn_finished_write(mp);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
}
@@ -1000,6 +1021,8 @@ vnode_pager_lock(object)
{
struct proc *p = curproc; /* XXX */
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
+ mtx_assert(&Giant, MA_OWNED);
for (; object != NULL; object = object->backing_object) {
if (object->type != OBJT_VNODE)
continue;
OpenPOWER on IntegriCloud