21 files changed, 259 insertions, 35 deletions
diff --git a/sys/kern/imgact_aout.c b/sys/kern/imgact_aout.c
index 9478eb3..8becda3 100644
--- a/sys/kern/imgact_aout.c
+++ b/sys/kern/imgact_aout.c
@@ -171,6 +171,7 @@ exec_aout_imgact(imgp)
 	if (error)
 		return (error);
 
+	mtx_lock(&vm_mtx);
 	/*
 	 * Destroy old process VM and create a new one (with a new stack)
 	 */
@@ -184,7 +185,9 @@ exec_aout_imgact(imgp)
 	vp = imgp->vp;
 	map = &vmspace->vm_map;
 	vm_map_lock(map);
+	mtx_unlock(&vm_mtx);
 	VOP_GETVOBJECT(vp, &object);
+	mtx_lock(&vm_mtx);
 	vm_object_reference(object);
 
 	text_end = virtual_offset + a_out->a_text;
@@ -195,6 +198,7 @@ exec_aout_imgact(imgp)
 		MAP_COPY_ON_WRITE | MAP_PREFAULT);
 	if (error) {
 		vm_map_unlock(map);
+		mtx_unlock(&vm_mtx);
 		return (error);
 	}
 	data_end = text_end + a_out->a_data;
@@ -207,6 +211,7 @@ exec_aout_imgact(imgp)
 			MAP_COPY_ON_WRITE | MAP_PREFAULT);
 		if (error) {
 			vm_map_unlock(map);
+			mtx_unlock(&vm_mtx);
 			return (error);
 		}
 	}
@@ -217,6 +222,7 @@ exec_aout_imgact(imgp)
 			VM_PROT_ALL, VM_PROT_ALL, 0);
 		if (error) {
 			vm_map_unlock(map);
+			mtx_unlock(&vm_mtx);
 			return (error);
 		}
 	}
@@ -229,6 +235,8 @@ exec_aout_imgact(imgp)
 	vmspace->vm_daddr = (caddr_t) (uintptr_t)
 			    (virtual_offset + a_out->a_text);
 
+	mtx_unlock(&vm_mtx);
+
 	/* Fill in image_params */
 	imgp->interpreted = 0;
 	imgp->entry_addr = a_out->a_entry;
diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c
index da7b9cb..2a15e9c 100644
--- a/sys/kern/imgact_elf.c
+++ b/sys/kern/imgact_elf.c
@@ -230,6 +230,7 @@ elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_o
 	else
 		map_len = round_page(offset+filsz) - file_addr;
 
+	mtx_lock(&vm_mtx);
 	if (map_len != 0) {
 		vm_object_reference(object);
 		vm_map_lock(&vmspace->vm_map);
@@ -244,12 +245,15 @@ elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_o
 		vm_map_unlock(&vmspace->vm_map);
 		if (rv != KERN_SUCCESS) {
 			vm_object_deallocate(object);
+			mtx_unlock(&vm_mtx);
 			return EINVAL;
 		}
 
 		/* we can stop now if we've covered it all */
-		if (memsz == filsz)
+		if (memsz == filsz) {
+			mtx_unlock(&vm_mtx);
 			return 0;
+		}
 	}
 
 
@@ -270,8 +274,10 @@ elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_o
 					map_addr, map_addr + map_len,
 					VM_PROT_ALL, VM_PROT_ALL, 0);
 		vm_map_unlock(&vmspace->vm_map);
-		if (rv != KERN_SUCCESS)
+		if (rv != KERN_SUCCESS) {
+			mtx_unlock(&vm_mtx);
 			return EINVAL; 
+		}	
 	}
 
 	if (copy_len != 0) {
@@ -287,14 +293,19 @@ elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_o
 				 MAP_COPY_ON_WRITE | MAP_PREFAULT_PARTIAL);
 		if (rv != KERN_SUCCESS) {
 			vm_object_deallocate(object);
+			mtx_unlock(&vm_mtx);
 			return EINVAL;
 		}
 
 		/* send the page fragment to user space */
+		mtx_unlock(&vm_mtx);
 		error = copyout((caddr_t)data_buf, (caddr_t)map_addr, copy_len);
+		mtx_lock(&vm_mtx);
 		vm_map_remove(exec_map, data_buf, data_buf + PAGE_SIZE);
-		if (error)
+		if (error) {
+			mtx_unlock(&vm_mtx);
 			return (error);
+		}
 	}
 
 	/*
@@ -303,6 +314,7 @@ elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_o
 	vm_map_protect(&vmspace->vm_map, map_addr, map_addr + map_len,  prot,
 		       FALSE);
 
+	mtx_unlock(&vm_mtx);
 	return error;
 }
 
@@ -498,9 +510,11 @@ exec_elf_imgact(struct image_params *imgp)
 	if ((error = exec_extract_strings(imgp)) != 0)
 		goto fail;
 
+	mtx_lock(&vm_mtx);
 	exec_new_vmspace(imgp);
 
 	vmspace = imgp->proc->p_vmspace;
+	mtx_unlock(&vm_mtx);
 
 	for (i = 0; i < hdr->e_phnum; i++) {
 		switch(phdr[i].p_type) {
@@ -557,6 +571,7 @@ exec_elf_imgact(struct image_params *imgp)
 		}
 	}
 
+	/* XXX: lock the vm_mtx when twiddling vmspace? */
 	vmspace->vm_tsize = text_size >> PAGE_SHIFT;
 	vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr;
 	vmspace->vm_dsize = data_size >> PAGE_SHIFT;
diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
index f1a6a0b..6f5c653 100644
--- a/sys/kern/init_main.c
+++ b/sys/kern/init_main.c
@@ -485,11 +485,15 @@ start_init(void *dummy)
 	 * Need just enough stack to hold the faked-up "execve()" arguments.
 	 */
 	addr = trunc_page(USRSTACK - PAGE_SIZE);
+	mtx_unlock(&Giant);
+	mtx_lock(&vm_mtx);
 	if (vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &addr, PAGE_SIZE,
 			FALSE, VM_PROT_ALL, VM_PROT_ALL, 0) != 0)
 		panic("init: couldn't allocate argument space");
 	p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
 	p->p_vmspace->vm_ssize = 1;
+	mtx_unlock(&vm_mtx);
+	mtx_lock(&Giant);
 
 	if ((var = getenv("init_path")) != NULL) {
 		strncpy(init_path, var, sizeof init_path);
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index 0b1b29e..8f49538 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -412,6 +412,7 @@ exec_map_first_page(imgp)
 
 	VOP_GETVOBJECT(imgp->vp, &object);
 	s = splvm();
+	mtx_lock(&vm_mtx);
 
 	ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
 
@@ -443,6 +444,7 @@ exec_map_first_page(imgp)
 				vm_page_free(ma[0]);
 			}
 			splx(s);
+			mtx_unlock(&vm_mtx);
 			return EIO;
 		}
 	}
@@ -454,6 +456,7 @@ exec_map_first_page(imgp)
 	pmap_kenter((vm_offset_t) imgp->image_header, VM_PAGE_TO_PHYS(ma[0]));
 	imgp->firstpage = ma[0];
 
+	mtx_unlock(&vm_mtx);
 	return 0;
 }
 
@@ -461,9 +464,12 @@ void
 exec_unmap_first_page(imgp)
 	struct image_params *imgp;
 {
+
 	if (imgp->firstpage) {
+		mtx_lock(&vm_mtx);
 		pmap_kremove((vm_offset_t) imgp->image_header);
 		vm_page_unwire(imgp->firstpage, 1);
+		mtx_unlock(&vm_mtx);
 		imgp->firstpage = NULL;
 	}
 }
@@ -482,6 +488,7 @@ exec_new_vmspace(imgp)
 	caddr_t	stack_addr = (caddr_t) (USRSTACK - MAXSSIZ);
 	vm_map_t map = &vmspace->vm_map;
 
+	mtx_assert(&vm_mtx, MA_OWNED);
 	imgp->vmspace_destroyed = 1;
 
 	/*
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
index d5dccab..1af27d2 100644
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c
@@ -222,6 +222,7 @@ exit1(p, rv)
 	 * Can't free the entire vmspace as the kernel stack
 	 * may be mapped within that space also.
 	 */
+	mtx_lock(&vm_mtx);
 	if (vm->vm_refcnt == 1) {
 		if (vm->vm_shm)
 			shmexit(p);
@@ -230,6 +231,7 @@ exit1(p, rv)
 		(void) vm_map_remove(&vm->vm_map, VM_MIN_ADDRESS,
 		    VM_MAXUSER_ADDRESS);
 	}
+	mtx_unlock(&vm_mtx);
 
 	PROC_LOCK(p);
 	if (SESS_LEADER(p)) {
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index d3b991d..62dcc06 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -220,6 +220,7 @@ fork1(p1, flags, procp)
 	if ((flags & RFPROC) == 0) {
 
 		vm_fork(p1, 0, flags);
+		mtx_assert(&vm_mtx, MA_NOTOWNED);
 
 		/*
 		 * Close all file descriptors.
@@ -567,6 +568,7 @@ again:
 	 * execution path later.  (ie: directly into user mode)
 	 */
 	vm_fork(p1, p2, flags);
+	mtx_assert(&vm_mtx, MA_NOTOWNED);
 
 	if (flags == (RFFDG | RFPROC)) {
 		cnt.v_forks++;
diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c
index 27431ab..f46313c 100644
--- a/sys/kern/kern_resource.c
+++ b/sys/kern/kern_resource.c
@@ -498,8 +498,10 @@ dosetrlimit(p, which, limp)
 			}
 			addr = trunc_page(addr);
 			size = round_page(size);
+			mtx_lock(&vm_mtx);
 			(void) vm_map_protect(&p->p_vmspace->vm_map,
 					      addr, addr+size, prot, FALSE);
+			mtx_unlock(&vm_mtx);
 		}
 		break;
 
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index 7d793de..e09a377 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c
@@ -378,6 +378,13 @@ msleep(ident, mtx, priority, wmesg, timo)
 	int rval = 0;
 	WITNESS_SAVE_DECL(mtx);
 
+	KASSERT(ident == &proc0 ||	/* XXX: swapper */
+	    timo != 0 ||		/* XXX: we might still miss a wakeup */
+	    mtx_owned(&Giant) || mtx != NULL,
+	    ("indefinite sleep without mutex, wmesg: \"%s\" ident: %p",
+	     wmesg, ident));
+	if (mtx_owned(&vm_mtx) && mtx != &vm_mtx)
+		panic("sleeping with vm_mtx held.");
 #ifdef KTRACE
 	if (p && KTRPOINT(p, KTR_CSW))
 		ktrcsw(p->p_tracep, 1, 0);
diff --git a/sys/kern/link_elf.c b/sys/kern/link_elf.c
index 344f163..613d1e4 100644
--- a/sys/kern/link_elf.c
+++ b/sys/kern/link_elf.c
@@ -653,8 +653,10 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu
 
     ef = (elf_file_t) lf;
 #ifdef SPARSE_MAPPING
+    mtx_lock(&vm_mtx);
     ef->object = vm_object_allocate(OBJT_DEFAULT, mapsize >> PAGE_SHIFT);
     if (ef->object == NULL) {
+    	mtx_unlock(&vm_mtx);
 	free(ef, M_LINKER);
 	error = ENOMEM;
 	goto out;
@@ -667,9 +669,11 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu
 			VM_PROT_ALL, VM_PROT_ALL, 0);
     if (error) {
 	vm_object_deallocate(ef->object);
+    	mtx_unlock(&vm_mtx);
 	ef->object = 0;
 	goto out;
     }
+    mtx_unlock(&vm_mtx);
 #else
     ef->address = malloc(mapsize, M_LINKER, M_WAITOK);
     if (!ef->address) {
@@ -697,10 +701,12 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu
 	/*
 	 * Wire down the pages
 	 */
+	mtx_lock(&vm_mtx);
 	vm_map_pageable(kernel_map,
 			(vm_offset_t) segbase,
 			(vm_offset_t) segbase + segs[i]->p_memsz,
 			FALSE);
+	mtx_unlock(&vm_mtx);
 #endif
     }
 
@@ -824,10 +830,12 @@ link_elf_unload_file(linker_file_t file)
     }
 #ifdef SPARSE_MAPPING
     if (ef->object) {
+	mtx_lock(&vm_mtx);	    
 	vm_map_remove(kernel_map, (vm_offset_t) ef->address,
 		      (vm_offset_t) ef->address
 		      + (ef->object->size << PAGE_SHIFT));
 	vm_object_deallocate(ef->object);
+	mtx_unlock(&vm_mtx);	    
     }
 #else
     if (ef->address)
diff --git a/sys/kern/link_elf_obj.c b/sys/kern/link_elf_obj.c
index 344f163..613d1e4 100644
--- a/sys/kern/link_elf_obj.c
+++ b/sys/kern/link_elf_obj.c
@@ -653,8 +653,10 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu
 
     ef = (elf_file_t) lf;
 #ifdef SPARSE_MAPPING
+    mtx_lock(&vm_mtx);
     ef->object = vm_object_allocate(OBJT_DEFAULT, mapsize >> PAGE_SHIFT);
     if (ef->object == NULL) {
+    	mtx_unlock(&vm_mtx);
 	free(ef, M_LINKER);
 	error = ENOMEM;
 	goto out;
@@ -667,9 +669,11 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu
 			VM_PROT_ALL, VM_PROT_ALL, 0);
     if (error) {
 	vm_object_deallocate(ef->object);
+    	mtx_unlock(&vm_mtx);
 	ef->object = 0;
 	goto out;
     }
+    mtx_unlock(&vm_mtx);
 #else
     ef->address = malloc(mapsize, M_LINKER, M_WAITOK);
     if (!ef->address) {
@@ -697,10 +701,12 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu
 	/*
 	 * Wire down the pages
 	 */
+	mtx_lock(&vm_mtx);
 	vm_map_pageable(kernel_map,
 			(vm_offset_t) segbase,
 			(vm_offset_t) segbase + segs[i]->p_memsz,
 			FALSE);
+	mtx_unlock(&vm_mtx);
 #endif
     }
 
@@ -824,10 +830,12 @@ link_elf_unload_file(linker_file_t file)
     }
 #ifdef SPARSE_MAPPING
     if (ef->object) {
+	mtx_lock(&vm_mtx);	    
 	vm_map_remove(kernel_map, (vm_offset_t) ef->address,
 		      (vm_offset_t) ef->address
 		      + (ef->object->size << PAGE_SHIFT));
 	vm_object_deallocate(ef->object);
+	mtx_unlock(&vm_mtx);	    
     }
 #else
     if (ef->address)
diff --git a/sys/kern/subr_blist.c b/sys/kern/subr_blist.c
index 9ac4338..061d151 100644
--- a/sys/kern/subr_blist.c
+++ b/sys/kern/subr_blist.c
@@ -71,6 +71,7 @@
 #include <sys/kernel.h>
 #include <sys/blist.h>
 #include <sys/malloc.h>
+#include <sys/mutex.h> 
 #include <vm/vm.h>
 #include <vm/vm_object.h>
 #include <vm/vm_kern.h>
diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c
index af7bfc1..8924fa2 100644
--- a/sys/kern/subr_trap.c
+++ b/sys/kern/subr_trap.c
@@ -330,9 +330,7 @@ restart:
 			 */
 			eva = rcr2();
 			enable_intr();
-			mtx_lock(&Giant);
 			i = trap_pfault(&frame, TRUE, eva);
-			mtx_unlock(&Giant);
 #if defined(I586_CPU) && !defined(NO_F00F_HACK)
 			if (i == -2) {
 				/*
@@ -443,9 +441,7 @@ restart:
 			 */
 			eva = rcr2();
 			enable_intr();
-			mtx_lock(&Giant);
 			(void) trap_pfault(&frame, FALSE, eva);
-			mtx_unlock(&Giant);
 			goto out;
 
 		case T_DNA:
@@ -887,7 +883,9 @@ nogo:
 			frame->tf_eip = (int)PCPU_GET(curpcb)->pcb_onfault;
 			return (0);
 		}
+		mtx_lock(&Giant);
 		trap_fatal(frame, eva);
+		mtx_unlock(&Giant);
 		return (-1);
 	}
 
@@ -1147,14 +1145,17 @@ syscall(frame)
 
 	/*
 	 * Try to run the syscall without the MP lock if the syscall
-	 * is MP safe.  We have to obtain the MP lock no matter what if 
-	 * we are ktracing
+	 * is MP safe.
 	 */
 	if ((callp->sy_narg & SYF_MPSAFE) == 0) {
 		mtx_lock(&Giant);
 	}
 
 #ifdef KTRACE
+	/*
+	 * We have to obtain the MP lock no matter what if 
+	 * we are ktracing
+	 */
 	if (KTRPOINT(p, KTR_SYSCALL)) {
 		if (!mtx_owned(&Giant))
 			mtx_lock(&Giant);
diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c
index 0c32402..a788448 100644
--- a/sys/kern/sys_pipe.c
+++ b/sys/kern/sys_pipe.c
@@ -56,6 +56,7 @@
 #include <sys/filedesc.h>
 #include <sys/filio.h>
 #include <sys/lock.h>
+#include <sys/mutex.h>
 #include <sys/ttycom.h>
 #include <sys/stat.h>
 #include <sys/poll.h>
@@ -253,6 +254,7 @@ pipespace(cpipe, size)
 	 * kernel_object.
 	 * XXX -- minor change needed here for NetBSD/OpenBSD VM systems.
 	 */
+	mtx_lock(&vm_mtx);
 	object = vm_object_allocate(OBJT_DEFAULT, npages);
 	buffer = (caddr_t) vm_map_min(kernel_map);
 
@@ -264,6 +266,7 @@ pipespace(cpipe, size)
 	error = vm_map_find(kernel_map, object, 0,
 		(vm_offset_t *) &buffer, size, 1,
 		VM_PROT_ALL, VM_PROT_ALL, 0);
+	mtx_unlock(&vm_mtx);
 
 	if (error != KERN_SUCCESS) {
 		vm_object_deallocate(object);
@@ -551,6 +554,7 @@ pipe_build_write_buffer(wpipe, uio)
 		size = wpipe->pipe_buffer.size;
 
 	endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size);
+	mtx_lock(&vm_mtx);
 	addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base);
 	for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) {
 		vm_page_t m;
@@ -561,6 +565,7 @@ pipe_build_write_buffer(wpipe, uio)
 
 			for (j = 0; j < i; j++)
 				vm_page_unwire(wpipe->pipe_map.ms[j], 1);
+			mtx_unlock(&vm_mtx);
 			return (EFAULT);
 		}
 
@@ -592,6 +597,7 @@ pipe_build_write_buffer(wpipe, uio)
 	pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms,
 		wpipe->pipe_map.npages);
 
+	mtx_unlock(&vm_mtx);
 /*
  * and update the uio data
  */
@@ -625,8 +631,10 @@ pipe_destroy_write_buffer(wpipe)
 			amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE;
 		}
 	}
+	mtx_lock(&vm_mtx);
 	for (i = 0; i < wpipe->pipe_map.npages; i++)
 		vm_page_unwire(wpipe->pipe_map.ms[i], 1);
+	mtx_unlock(&vm_mtx);
 }
 
 /*
@@ -1199,12 +1207,13 @@ pipeclose(cpipe)
 			wakeup(ppipe);
 			ppipe->pipe_peer = NULL;
 		}
-
 		/*
 		 * free resources
 		 */
+		mtx_lock(&vm_mtx);
 		pipe_free_kmem(cpipe);
 		zfree(pipe_zone, cpipe);
+		mtx_unlock(&vm_mtx);
 	}
 }
 
diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master
index 32255bc..269814c 100644
--- a/sys/kern/syscalls.master
+++ b/sys/kern/syscalls.master
@@ -61,7 +61,7 @@
 14	STD	POSIX	{ int mknod(char *path, int mode, int dev); }
 15	STD	POSIX	{ int chmod(char *path, int mode); }
 16	STD	POSIX	{ int chown(char *path, int uid, int gid); }
-17	STD	BSD	{ int obreak(char *nsize); } break obreak_args int
+17	MPSAFE	STD	BSD	{ int obreak(char *nsize); } break obreak_args int
 18	STD	BSD	{ int getfsstat(struct statfs *buf, long bufsize, \
 			    int flags); }
 19	COMPAT	POSIX	{ long lseek(int fd, long offset, int whence); }
@@ -121,23 +121,23 @@
 62	COMPAT	POSIX	{ int fstat(int fd, struct ostat *sb); }
 63	COMPAT	BSD	{ int getkerninfo(int op, char *where, size_t *size, \
 			    int arg); } getkerninfo getkerninfo_args int
-64	COMPAT	BSD	{ int getpagesize(void); } \
+64	MPSAFE	COMPAT	BSD	{ int getpagesize(void); } \
 			    getpagesize getpagesize_args int
 65	STD	BSD	{ int msync(void *addr, size_t len, int flags); }
 66	STD	BSD	{ int vfork(void); }
 67	OBSOL	NOHIDE	vread
 68	OBSOL	NOHIDE	vwrite
-69	STD	BSD	{ int sbrk(int incr); }
-70	STD	BSD	{ int sstk(int incr); }
-71	COMPAT	BSD	{ int mmap(void *addr, int len, int prot, \
+69	MPSAFE	STD	BSD	{ int sbrk(int incr); }
+70	MPSAFE	STD	BSD	{ int sstk(int incr); }
+71	MPSAFE	COMPAT	BSD	{ int mmap(void *addr, int len, int prot, \
 			    int flags, int fd, long pos); }
-72	STD	BSD	{ int ovadvise(int anom); } vadvise ovadvise_args int
-73	STD	BSD	{ int munmap(void *addr, size_t len); }
-74	STD	BSD	{ int mprotect(const void *addr, size_t len, int prot); }
-75	STD	BSD	{ int madvise(void *addr, size_t len, int behav); }
+72	MPSAFE	STD	BSD	{ int ovadvise(int anom); } vadvise ovadvise_args int
+73	MPSAFE	STD	BSD	{ int munmap(void *addr, size_t len); }
+74	MPSAFE	STD	BSD	{ int mprotect(const void *addr, size_t len, int prot); }
+75	MPSAFE	STD	BSD	{ int madvise(void *addr, size_t len, int behav); }
 76	OBSOL	NOHIDE	vhangup
 77	OBSOL	NOHIDE	vlimit
-78	STD	BSD	{ int mincore(const void *addr, size_t len, \
+78	MPSAFE	STD	BSD	{ int mincore(const void *addr, size_t len, \
 			    char *vec); }
 79	STD	POSIX	{ int getgroups(u_int gidsetsize, gid_t *gidset); }
 80	STD	POSIX	{ int setgroups(u_int gidsetsize, gid_t *gidset); }
@@ -306,7 +306,7 @@
 			    setrlimit __setrlimit_args int
 196	STD	BSD	{ int getdirentries(int fd, char *buf, u_int count, \
 			    long *basep); }
-197	STD	BSD	{ caddr_t mmap(caddr_t addr, size_t len, int prot, \
+197	MPSAFE	STD	BSD	{ caddr_t mmap(caddr_t addr, size_t len, int prot, \
 			    int flags, int fd, int pad, off_t pos); }
 198	STD	NOHIDE	{ int nosys(void); } __syscall __syscall_args int
 199	STD	POSIX	{ off_t lseek(int fd, int pad, off_t offset, \
@@ -318,8 +318,8 @@
 			    __sysctl sysctl_args int
 ; properly, __sysctl should be a NOHIDE, but making an exception
 ; here allows to avoid one in libc/sys/Makefile.inc.
-203	STD	BSD	{ int mlock(const void *addr, size_t len); }
-204	STD	BSD	{ int munlock(const void *addr, size_t len); }
+203	MPSAFE	STD	BSD	{ int mlock(const void *addr, size_t len); }
+204	MPSAFE	STD	BSD	{ int munlock(const void *addr, size_t len); }
 205	STD	BSD	{ int undelete(char *path); }
 206	STD	BSD	{ int futimes(int fd, struct timeval *tptr); }
 207	STD	BSD	{ int getpgid(pid_t pid); }
@@ -386,7 +386,7 @@
 248	UNIMPL	NOHIDE	nosys
 249	UNIMPL	NOHIDE	nosys
 ; syscall numbers initially used in OpenBSD
-250	STD	BSD	{ int minherit(void *addr, size_t len, int inherit); }
+250	MPSAFE	STD	BSD	{ int minherit(void *addr, size_t len, int inherit); }
 251	STD	BSD	{ int rfork(int flags); }
 252	STD	BSD	{ int openbsd_poll(struct pollfd *fds, u_int nfds, \
 			    int timeout); }
@@ -414,7 +414,7 @@
 274	STD	BSD	{ int lchmod(char *path, mode_t mode); }
 275	NOPROTO BSD	{ int lchown(char *path, uid_t uid, gid_t gid); } netbsd_lchown lchown_args int
 276	STD	BSD	{ int lutimes(char *path, struct timeval *tptr); }
-277	NOPROTO	BSD	{ int msync(void *addr, size_t len, int flags); } netbsd_msync msync_args int
+277	MPSAFE	NOPROTO	BSD	{ int msync(void *addr, size_t len, int flags); } netbsd_msync msync_args int
 278	STD	BSD	{ int nstat(char *path, struct nstat *ub); }
 279	STD	BSD	{ int nfstat(int fd, struct nstat *sb); }
 280	STD	BSD	{ int nlstat(char *path, struct nstat *ub); }
@@ -463,8 +463,8 @@
 321     STD     BSD     { int yield(void); }
 322	OBSOL	NOHIDE	thr_sleep
 323	OBSOL	NOHIDE	thr_wakeup
-324     STD     BSD     { int mlockall(int how); }
-325     STD     BSD     { int munlockall(void); }
+324     MPSAFE	STD     BSD     { int mlockall(int how); }
+325     MPSAFE	STD     BSD     { int munlockall(void); }
 326     STD     BSD     { int __getcwd(u_char *buf, u_int buflen); }
 
 327     STD     POSIX   { int sched_setparam (pid_t pid, const struct sched_param *param); }
diff --git a/sys/kern/sysv_shm.c b/sys/kern/sysv_shm.c
index fab53a8..0a9abda 100644
--- a/sys/kern/sysv_shm.c
+++ b/sys/kern/sysv_shm.c
@@ -43,6 +43,7 @@
 #include <sys/shm.h>
 #include <sys/proc.h>
 #include <sys/malloc.h>
+#include <sys/mutex.h>
 #include <sys/mman.h>
 #include <sys/stat.h>
 #include <sys/syscall.h>
@@ -314,14 +315,17 @@ shmat(p, uap)
 	}
 
 	shm_handle = shmseg->shm_internal;
+	mtx_lock(&vm_mtx);
 	vm_object_reference(shm_handle->shm_object);
 	rv = vm_map_find(&p->p_vmspace->vm_map, shm_handle->shm_object,
 		0, &attach_va, size, (flags & MAP_FIXED)?0:1, prot, prot, 0);
 	if (rv != KERN_SUCCESS) {
+		mtx_unlock(&vm_mtx);
 		return ENOMEM;
 	}
 	vm_map_inherit(&p->p_vmspace->vm_map,
 		attach_va, attach_va + size, VM_INHERIT_SHARE);
+	mtx_unlock(&vm_mtx);
 
 	shmmap_s->va = attach_va;
 	shmmap_s->shmid = uap->shmid;
@@ -549,6 +553,7 @@ shmget_allocate_segment(p, uap, mode)
 	 * We make sure that we have allocated a pager before we need
 	 * to.
 	 */
+	mtx_lock(&vm_mtx);
 	if (shm_use_phys) {
 		shm_handle->shm_object =
 		    vm_pager_allocate(OBJT_PHYS, 0, size, VM_PROT_DEFAULT, 0);
@@ -558,6 +563,7 @@ shmget_allocate_segment(p, uap, mode)
 	}
 	vm_object_clear_flag(shm_handle->shm_object, OBJ_ONEMAPPING);
 	vm_object_set_flag(shm_handle->shm_object, OBJ_NOSPLIT);
+	mtx_unlock(&vm_mtx);
 
 	shmseg->shm_internal = shm_handle;
 	shmseg->shm_perm.cuid = shmseg->shm_perm.uid = cred->cr_uid;
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index c1b53d8..a980330 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -281,6 +281,8 @@ waitrunningbufspace(void)
  *	Called when a buffer is extended.  This function clears the B_CACHE
  *	bit if the newly extended portion of the buffer does not contain
  *	valid data.
+ *
+ *	must be called with vm_mtx held
  */
 static __inline__
 void
@@ -426,11 +428,13 @@ bufinit(void)
  * from buf_daemon.
  */
 
+	mtx_lock(&vm_mtx);
 	bogus_offset = kmem_alloc_pageable(kernel_map, PAGE_SIZE);
 	bogus_page = vm_page_alloc(kernel_object,
 			((bogus_offset - VM_MIN_KERNEL_ADDRESS) >> PAGE_SHIFT),
 			VM_ALLOC_NORMAL);
 	cnt.v_wire_count++;
+	mtx_unlock(&vm_mtx);
 
 }
 
@@ -441,17 +445,27 @@ bufinit(void)
  *	buffer_map.
  *
  *	Since this call frees up buffer space, we call bufspacewakeup().
+ *
+ *	Can be called with or without the vm_mtx.
  */
 static void
 bfreekva(struct buf * bp)
 {
+
 	if (bp->b_kvasize) {
+		int hadvmlock;
+
 		++buffreekvacnt;
 		bufspace -= bp->b_kvasize;
+		hadvmlock = mtx_owned(&vm_mtx);
+		if (!hadvmlock)
+			mtx_lock(&vm_mtx);
 		vm_map_delete(buffer_map,
 		    (vm_offset_t) bp->b_kvabase,
 		    (vm_offset_t) bp->b_kvabase + bp->b_kvasize
 		);
+		if (!hadvmlock)
+			mtx_unlock(&vm_mtx);
 		bp->b_kvasize = 0;
 		bufspacewakeup();
 	}
@@ -807,6 +821,7 @@ bdwrite(struct buf * bp)
 		VOP_BMAP(bp->b_vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL, NULL);
 	}
 
+	mtx_lock(&vm_mtx);
 	/*
 	 * Set the *dirty* buffer range based upon the VM system dirty pages.
 	 */
@@ -820,6 +835,7 @@ bdwrite(struct buf * bp)
 	 * out on the next sync, or perhaps the cluster will be completed.
 	 */
 	vfs_clean_pages(bp);
+	mtx_unlock(&vm_mtx);
 	bqrelse(bp);
 
 	/*
@@ -973,12 +989,15 @@ buf_dirty_count_severe(void)
  *	Release a busy buffer and, if requested, free its resources.  The
  *	buffer will be stashed in the appropriate bufqueue[] allowing it
  *	to be accessed later as a cache entity or reused for other purposes.
+ *
+ *	vm_mtx must be not be held.
  */
 void
 brelse(struct buf * bp)
 {
 	int s;
 
+	mtx_assert(&vm_mtx, MA_NOTOWNED);
 	KASSERT(!(bp->b_flags & (B_CLUSTER|B_PAGING)), ("brelse: inappropriate B_PAGING or B_CLUSTER bp %p", bp));
 
 	s = splbio();
@@ -1088,6 +1107,7 @@ brelse(struct buf * bp)
 		resid = bp->b_bufsize;
 		foff = bp->b_offset;
 
+		mtx_lock(&vm_mtx);
 		for (i = 0; i < bp->b_npages; i++) {
 			int had_bogus = 0;
 
@@ -1099,10 +1119,12 @@ brelse(struct buf * bp)
 			 * now.
 			 */
 			if (m == bogus_page) {
+				mtx_unlock(&vm_mtx);
 				VOP_GETVOBJECT(vp, &obj);
 				poff = OFF_TO_IDX(bp->b_offset);
 				had_bogus = 1;
 
+				mtx_lock(&vm_mtx);
 				for (j = i; j < bp->b_npages; j++) {
 					vm_page_t mtmp;
 					mtmp = bp->b_pages[j];
@@ -1136,11 +1158,15 @@ brelse(struct buf * bp)
 
 		if (bp->b_flags & (B_INVAL | B_RELBUF))
 			vfs_vmio_release(bp);
+		mtx_unlock(&vm_mtx);
 
 	} else if (bp->b_flags & B_VMIO) {
 
-		if (bp->b_flags & (B_INVAL | B_RELBUF))
+		if (bp->b_flags & (B_INVAL | B_RELBUF)) {
+			mtx_lock(&vm_mtx);
 			vfs_vmio_release(bp);
+			mtx_unlock(&vm_mtx);
+		}
 
 	}
 			
@@ -1302,6 +1328,9 @@ bqrelse(struct buf * bp)
 	splx(s);
 }
 
+/*
+ * Must be called with vm_mtx held.
+ */
 static void
 vfs_vmio_release(bp)
 	struct buf *bp;
@@ -1310,6 +1339,7 @@ vfs_vmio_release(bp)
 	vm_page_t m;
 
 	s = splvm();
+	mtx_assert(&vm_mtx, MA_OWNED);
 	for (i = 0; i < bp->b_npages; i++) {
 		m = bp->b_pages[i];
 		bp->b_pages[i] = NULL;
@@ -1343,6 +1373,9 @@ vfs_vmio_release(bp)
 	}
 	splx(s);
 	pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages);
+
+	/* could drop vm_mtx here */
+	
 	if (bp->b_bufsize) {
 		bufspacewakeup();
 		bp->b_bufsize = 0;
@@ -1614,7 +1647,9 @@ restart:
 		if (qindex == QUEUE_CLEAN) {
 			if (bp->b_flags & B_VMIO) {
 				bp->b_flags &= ~B_ASYNC;
+				mtx_lock(&vm_mtx);
 				vfs_vmio_release(bp);
+				mtx_unlock(&vm_mtx);
 			}
 			if (bp->b_vp)
 				brelvp(bp);
@@ -1735,6 +1770,8 @@ restart:
 		if (maxsize != bp->b_kvasize) {
 			vm_offset_t addr = 0;
 
+			/* we'll hold the lock over some vm ops */
+			mtx_lock(&vm_mtx);
 			bfreekva(bp);
 
 			if (vm_map_findspace(buffer_map,
@@ -1743,6 +1780,7 @@ restart:
 				 * Uh oh.  Buffer map is to fragmented.  We
 				 * must defragment the map.
 				 */
+				mtx_unlock(&vm_mtx);
 				++bufdefragcnt;
 				defrag = 1;
 				bp->b_flags |= B_INVAL;
@@ -1759,6 +1797,7 @@ restart:
 				bufspace += bp->b_kvasize;
 				++bufreusecnt;
 			}
+			mtx_unlock(&vm_mtx);
 		}
 		bp->b_data = bp->b_kvabase;
 	}
@@ -1936,18 +1975,24 @@ inmem(struct vnode * vp, daddr_t blkno)
 		size = vp->v_mount->mnt_stat.f_iosize;
 	off = (vm_ooffset_t)blkno * (vm_ooffset_t)vp->v_mount->mnt_stat.f_iosize;
 
+	mtx_lock(&vm_mtx);
 	for (toff = 0; toff < vp->v_mount->mnt_stat.f_iosize; toff += tinc) {
 		m = vm_page_lookup(obj, OFF_TO_IDX(off + toff));
 		if (!m)
-			return 0;
+			goto notinmem;
 		tinc = size;
 		if (tinc > PAGE_SIZE - ((toff + off) & PAGE_MASK))
 			tinc = PAGE_SIZE - ((toff + off) & PAGE_MASK);
 		if (vm_page_is_valid(m,
 		    (vm_offset_t) ((toff + off) & PAGE_MASK), tinc) == 0)
-			return 0;
+			goto notinmem;
 	}
+	mtx_unlock(&vm_mtx);
 	return 1;
+
+notinmem:
+	mtx_unlock(&vm_mtx);
+	return (0);
 }
 
 /*
@@ -1960,11 +2005,14 @@ inmem(struct vnode * vp, daddr_t blkno)
  *
  *	This routine is primarily used by NFS, but is generalized for the
  *	B_VMIO case.
+ *
+ *	Can be called with or without vm_mtx
  */
 static void
 vfs_setdirty(struct buf *bp) 
 {
 	int i;
+	int hadvmlock;
 	vm_object_t object;
 
 	/*
@@ -1983,6 +2031,10 @@ vfs_setdirty(struct buf *bp)
 	if ((bp->b_flags & B_VMIO) == 0)
 		return;
 
+	hadvmlock = mtx_owned(&vm_mtx);
+	if (!hadvmlock)
+		mtx_lock(&vm_mtx);
+
 	object = bp->b_pages[0]->object;
 
 	if ((object->flags & OBJ_WRITEABLE) && !(object->flags & OBJ_MIGHTBEDIRTY))
@@ -2040,6 +2092,8 @@ vfs_setdirty(struct buf *bp)
 				bp->b_dirtyend = eoffset;
 		}
 	}
+	if (!hadvmlock)
+		mtx_unlock(&vm_mtx);
 }
 
 /*
@@ -2441,6 +2495,7 @@ allocbuf(struct buf *bp, int size)
 			 * DEV_BSIZE aligned existing buffer size.  Figure out
 			 * if we have to remove any pages.
 			 */
+			mtx_lock(&vm_mtx);
 			if (desiredpages < bp->b_npages) {
 				for (i = desiredpages; i < bp->b_npages; i++) {
 					/*
@@ -2461,6 +2516,7 @@ allocbuf(struct buf *bp, int size)
 				    (desiredpages << PAGE_SHIFT), (bp->b_npages - desiredpages));
 				bp->b_npages = desiredpages;
 			}
+			mtx_unlock(&vm_mtx);
 		} else if (size > bp->b_bcount) {
 			/*
 			 * We are growing the buffer, possibly in a 
@@ -2481,6 +2537,7 @@ allocbuf(struct buf *bp, int size)
 			vp = bp->b_vp;
 			VOP_GETVOBJECT(vp, &obj);
 
+			mtx_lock(&vm_mtx);
 			while (bp->b_npages < desiredpages) {
 				vm_page_t m;
 				vm_pindex_t pi;
@@ -2589,6 +2646,9 @@ allocbuf(struct buf *bp, int size)
 			    bp->b_pages, 
 			    bp->b_npages
 			);
+			
+			mtx_unlock(&vm_mtx);
+
 			bp->b_data = (caddr_t)((vm_offset_t)bp->b_data | 
 			    (vm_offset_t)(bp->b_offset & PAGE_MASK));
 		}
@@ -2726,6 +2786,7 @@ bufdone(struct buf *bp)
 		if (error) {
 			panic("biodone: no object");
 		}
+		mtx_lock(&vm_mtx);
 #if defined(VFS_BIO_DEBUG)
 		if (obj->paging_in_progress < bp->b_npages) {
 			printf("biodone: paging in progress(%d) < bp->b_npages(%d)\n",
@@ -2814,6 +2875,7 @@ bufdone(struct buf *bp)
 		}
 		if (obj)
 			vm_object_pip_wakeupn(obj, 0);
+		mtx_unlock(&vm_mtx);
 	}
 
 	/*
@@ -2837,12 +2899,15 @@ bufdone(struct buf *bp)
  * This routine is called in lieu of iodone in the case of
  * incomplete I/O.  This keeps the busy status for pages
  * consistant.
+ *
+ * vm_mtx should not be held
  */
 void
 vfs_unbusy_pages(struct buf * bp)
 {
 	int i;
 
+	mtx_assert(&vm_mtx, MA_NOTOWNED);
 	runningbufwakeup(bp);
 	if (bp->b_flags & B_VMIO) {
 		struct vnode *vp = bp->b_vp;
@@ -2850,6 +2915,7 @@ vfs_unbusy_pages(struct buf * bp)
 
 		VOP_GETVOBJECT(vp, &obj);
 
+		mtx_lock(&vm_mtx);
 		for (i = 0; i < bp->b_npages; i++) {
 			vm_page_t m = bp->b_pages[i];
 
@@ -2866,6 +2932,7 @@ vfs_unbusy_pages(struct buf * bp)
 			vm_page_io_finish(m);
 		}
 		vm_object_pip_wakeupn(obj, 0);
+		mtx_unlock(&vm_mtx);
 	}
 }
 
@@ -2876,12 +2943,15 @@ vfs_unbusy_pages(struct buf * bp)
  *	range is restricted to the buffer's size.
  *
  *	This routine is typically called after a read completes.
+ *
+ *	vm_mtx should be held
  */
 static void
 vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, int pageno, vm_page_t m)
 {
 	vm_ooffset_t soff, eoff;
 
+	mtx_assert(&vm_mtx, MA_OWNED);
 	/*
 	 * Start and end offsets in buffer.  eoff - soff may not cross a
 	 * page boundry or cross the end of the buffer.  The end of the
@@ -2917,12 +2987,15 @@ vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, int pageno, vm_page_t m)
  * Since I/O has not been initiated yet, certain buffer flags
  * such as BIO_ERROR or B_INVAL may be in an inconsistant state
  * and should be ignored.
+ *
+ * vm_mtx should not be held
  */
 void
 vfs_busy_pages(struct buf * bp, int clear_modify)
 {
 	int i, bogus;
 
+	mtx_assert(&vm_mtx, MA_NOTOWNED);
 	if (bp->b_flags & B_VMIO) {
 		struct vnode *vp = bp->b_vp;
 		vm_object_t obj;
@@ -2932,6 +3005,7 @@ vfs_busy_pages(struct buf * bp, int clear_modify)
 		foff = bp->b_offset;
 		KASSERT(bp->b_offset != NOOFFSET,
 		    ("vfs_busy_pages: no buffer offset"));
+		mtx_lock(&vm_mtx);
 		vfs_setdirty(bp);
 
 retry:
@@ -2979,6 +3053,7 @@ retry:
 		}
 		if (bogus)
 			pmap_qenter(trunc_page((vm_offset_t)bp->b_data), bp->b_pages, bp->b_npages);
+		mtx_unlock(&vm_mtx);
 	}
 }
 
@@ -2989,12 +3064,15 @@ retry:
  *
  * Note that while we only really need to clean through to b_bcount, we
  * just go ahead and clean through to b_bufsize.
+ *
+ * should be called with vm_mtx held
  */
 static void
 vfs_clean_pages(struct buf * bp)
 {
 	int i;
 
+	mtx_assert(&vm_mtx, MA_OWNED);
 	if (bp->b_flags & B_VMIO) {
 		vm_ooffset_t foff;
 
@@ -3021,6 +3099,7 @@ vfs_clean_pages(struct buf * bp)
  *	Set the range within the buffer to valid and clean.  The range is 
  *	relative to the beginning of the buffer, b_offset.  Note that b_offset
  *	itself may be offset from the beginning of the first page.
+ *
  */
 
 void   
@@ -3061,13 +3140,18 @@ vfs_bio_set_validclean(struct buf *bp, int base, int size)
  *
  *	Note that while we only theoretically need to clear through b_bcount,
  *	we go ahead and clear through b_bufsize.
+ *
+ *	We'll get vm_mtx here for safety if processing a VMIO buffer.
+ *	I don't think vm_mtx is needed, but we're twiddling vm_page flags.
  */
 
 void
 vfs_bio_clrbuf(struct buf *bp) {
 	int i, mask = 0;
 	caddr_t sa, ea;
+
 	if ((bp->b_flags & (B_VMIO | B_MALLOC)) == B_VMIO) {
+		mtx_lock(&vm_mtx);
 		bp->b_flags &= ~B_INVAL;
 		bp->b_ioflags &= ~BIO_ERROR;
 		if( (bp->b_npages == 1) && (bp->b_bufsize < PAGE_SIZE) &&
@@ -3079,6 +3163,7 @@ vfs_bio_clrbuf(struct buf *bp) {
 			}
 			bp->b_pages[0]->valid |= mask;
 			bp->b_resid = 0;
+			mtx_unlock(&vm_mtx);
 			return;
 		}
 		ea = sa = bp->b_data;
@@ -3106,6 +3191,7 @@ vfs_bio_clrbuf(struct buf *bp) {
 			vm_page_flag_clear(bp->b_pages[i], PG_ZERO);
 		}
 		bp->b_resid = 0;
+		mtx_unlock(&vm_mtx);
 	} else {
 		clrbuf(bp);
 	}
@@ -3115,18 +3201,22 @@ vfs_bio_clrbuf(struct buf *bp) {
  * vm_hold_load_pages and vm_hold_unload pages get pages into
  * a buffers address space.  The pages are anonymous and are
  * not associated with a file object.
+ *
+ * vm_mtx should not be held
  */
-void
+static void
 vm_hold_load_pages(struct buf * bp, vm_offset_t from, vm_offset_t to)
 {
 	vm_offset_t pg;
 	vm_page_t p;
 	int index;
 
+	mtx_assert(&vm_mtx, MA_NOTOWNED);
 	to = round_page(to);
 	from = round_page(from);
 	index = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT;
 
+	mtx_lock(&vm_mtx);
 	for (pg = from; pg < to; pg += PAGE_SIZE, index++) {
 
 tryagain:
@@ -3152,6 +3242,7 @@ tryagain:
 		vm_page_wakeup(p);
 	}
 	bp->b_npages = index;
+	mtx_unlock(&vm_mtx);
 }
 
 void
@@ -3160,11 +3251,15 @@ vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to)
 	vm_offset_t pg;
 	vm_page_t p;
 	int index, newnpages;
+	int hadvmlock;
 
 	from = round_page(from);
 	to = round_page(to);
 	newnpages = index = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT;
 
+	hadvmlock = mtx_owned(&vm_mtx);
+	if (!hadvmlock)
+		mtx_lock(&vm_mtx);
 	for (pg = from; pg < to; pg += PAGE_SIZE, index++) {
 		p = bp->b_pages[index];
 		if (p && (index < bp->b_npages)) {
@@ -3180,6 +3275,8 @@ vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to)
 		}
 	}
 	bp->b_npages = newnpages;
+	if (!hadvmlock)
+		mtx_unlock(&vm_mtx);
 }
 
 
diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c
index 8a6e045..0eb47bd 100644
--- a/sys/kern/vfs_cluster.c
+++ b/sys/kern/vfs_cluster.c
@@ -433,6 +433,7 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp)
 		BUF_KERNPROC(tbp);
 		TAILQ_INSERT_TAIL(&bp->b_cluster.cluster_head,
 			tbp, b_cluster.cluster_entry);
+		mtx_lock(&vm_mtx);
 		for (j = 0; j < tbp->b_npages; j += 1) {
 			vm_page_t m;
 			m = tbp->b_pages[j];
@@ -446,10 +447,12 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp)
 			if ((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL)
 				tbp->b_pages[j] = bogus_page;
 		}
+		mtx_unlock(&vm_mtx);
 		bp->b_bcount += tbp->b_bcount;
 		bp->b_bufsize += tbp->b_bufsize;
 	}
 
+	mtx_lock(&vm_mtx);
 	for(j=0;j<bp->b_npages;j++) {
 		if ((bp->b_pages[j]->valid & VM_PAGE_BITS_ALL) ==
 			VM_PAGE_BITS_ALL)
@@ -462,6 +465,7 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp)
 
 	pmap_qenter(trunc_page((vm_offset_t) bp->b_data),
 		(vm_page_t *)bp->b_pages, bp->b_npages);
+	mtx_unlock(&vm_mtx);
 	return (bp);
 }
 
@@ -484,7 +488,9 @@ cluster_callback(bp)
 	if (bp->b_ioflags & BIO_ERROR)
 		error = bp->b_error;
 
+	mtx_lock(&vm_mtx);
 	pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages);
+	mtx_unlock(&vm_mtx);
 	/*
 	 * Move memory from the large cluster buffer into the component
 	 * buffers and mark IO as done on these.
@@ -851,6 +857,7 @@ cluster_wbuild(vp, size, start_lbn, len)
 					}
 				}
 					
+				mtx_lock(&vm_mtx);
 				for (j = 0; j < tbp->b_npages; j += 1) {
 					m = tbp->b_pages[j];
 					vm_page_io_start(m);
@@ -861,6 +868,7 @@ cluster_wbuild(vp, size, start_lbn, len)
 						bp->b_npages++;
 					}
 				}
+				mtx_unlock(&vm_mtx);
 			}
 			bp->b_bcount += size;
 			bp->b_bufsize += size;
@@ -879,8 +887,10 @@ cluster_wbuild(vp, size, start_lbn, len)
 				tbp, b_cluster.cluster_entry);
 		}
 	finishcluster:
+		mtx_lock(&vm_mtx);
 		pmap_qenter(trunc_page((vm_offset_t) bp->b_data),
 			(vm_page_t *) bp->b_pages, bp->b_npages);
+		mtx_unlock(&vm_mtx);
 		if (bp->b_bufsize > bp->b_kvasize)
 			panic(
 			    "cluster_wbuild: b_bufsize(%ld) > b_kvasize(%d)\n",
diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c
index 328a9b1..d17e934 100644
--- a/sys/kern/vfs_default.c
+++ b/sys/kern/vfs_default.c
@@ -535,14 +535,18 @@ retry:
 		if (vp->v_type == VREG || vp->v_type == VDIR) {
 			if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0)
 				goto retn;
+			mtx_lock(&vm_mtx);
 			object = vnode_pager_alloc(vp, vat.va_size, 0, 0);
+			mtx_unlock(&vm_mtx);
 		} else if (devsw(vp->v_rdev) != NULL) {
 			/*
 			 * This simply allocates the biggest object possible
 			 * for a disk vnode.  This should be fixed, but doesn't
 			 * cause any problems (yet).
 			 */
+			mtx_lock(&vm_mtx);
 			object = vnode_pager_alloc(vp, IDX_TO_OFF(INT_MAX), 0, 0);
+			mtx_unlock(&vm_mtx);
 		} else {
 			goto retn;
 		}
@@ -550,15 +554,23 @@ retry:
 		 * Dereference the reference we just created.  This assumes
 		 * that the object is associated with the vp.
 		 */
+		mtx_lock(&vm_mtx);
 		object->ref_count--;
+		mtx_unlock(&vm_mtx);
 		vp->v_usecount--;
 	} else {
+		/*
+		 * XXX: safe to hold vm mutex through VOP_UNLOCK?
+		 */
+		mtx_lock(&vm_mtx);
 		if (object->flags & OBJ_DEAD) {
 			VOP_UNLOCK(vp, 0, p);
-			tsleep(object, PVM, "vodead", 0);
+			msleep(object, VM_OBJECT_MTX(object), PVM, "vodead", 0);
+			mtx_unlock(&vm_mtx);
 			vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
 			goto retry;
 		}
+		mtx_unlock(&vm_mtx);
 	}
 
 	KASSERT(vp->v_object != NULL, ("vfs_object_create: NULL object"));
@@ -580,6 +592,7 @@ vop_stddestroyvobject(ap)
 	if (vp->v_object == NULL)
 		return (0);
 
+	mtx_lock(&vm_mtx);
 	if (obj->ref_count == 0) {
 		/*
 		 * vclean() may be called twice. The first time
@@ -594,6 +607,7 @@ vop_stddestroyvobject(ap)
 		 */
 		vm_pager_deallocate(obj);
 	}
+	mtx_unlock(&vm_mtx);
 	return (0);
 }
 
diff --git a/sys/kern/vfs_extattr.c b/sys/kern/vfs_extattr.c
index 6b73258..3f97551 100644
--- a/sys/kern/vfs_extattr.c
+++ b/sys/kern/vfs_extattr.c
@@ -2770,8 +2770,13 @@ fsync(p, uap)
 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 		return (error);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
-	if (VOP_GETVOBJECT(vp, &obj) == 0)
+	if (VOP_GETVOBJECT(vp, &obj) == 0) {
+		mtx_unlock(&Giant);
+		mtx_lock(&vm_mtx);
 		vm_object_page_clean(obj, 0, 0, 0);
+		mtx_unlock(&vm_mtx);
+		mtx_lock(&Giant);
+	}
 	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);
 #ifdef SOFTUPDATES
 	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 2f4dc8d..6c050ba 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -711,6 +711,8 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
 	int s, error;
 	vm_object_t object;
 
+	mtx_assert(&vm_mtx, MA_NOTOWNED);
+
 	if (flags & V_SAVE) {
 		s = splbio();
 		while (vp->v_numoutput) {
@@ -797,8 +799,10 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
 	 */
 	mtx_lock(&vp->v_interlock);
 	if (VOP_GETVOBJECT(vp, &object) == 0) {
+		mtx_lock(&vm_mtx);
 		vm_object_page_remove(object, 0, 0,
 			(flags & V_SAVE) ? TRUE : FALSE);
+		mtx_unlock(&vm_mtx);
 	}
 	mtx_unlock(&vp->v_interlock);
 
@@ -1132,6 +1136,8 @@ speedup_syncer()
  * Also sets B_PAGING flag to indicate that vnode is not fully associated
  * with the buffer.  i.e. the bp has not been linked into the vnode or
  * ref-counted.
+ *
+ * Doesn't block, only vnode seems to need a lock.
  */
 void
 pbgetvp(vp, bp)
@@ -1554,6 +1560,7 @@ vput(vp)
 {
 	struct proc *p = curproc;	/* XXX */
 
+	mtx_assert(&Giant, MA_OWNED);
 	KASSERT(vp != NULL, ("vput: null vp"));
 	mtx_lock(&vp->v_interlock);
 	/* Skip this v_writecount check if we're going to panic below. */
@@ -2382,7 +2389,11 @@ loop:
 			if (!vget(vp,
 				LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) {
 				if (VOP_GETVOBJECT(vp, &obj) == 0) {
-					vm_object_page_clean(obj, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : OBJPC_NOSYNC);
+					mtx_lock(&vm_mtx);
+					vm_object_page_clean(obj, 0, 0,
+					    flags == MNT_WAIT ?
+					    OBJPC_SYNC : OBJPC_NOSYNC);
+					mtx_unlock(&vm_mtx);
 					anyio = 1;
 				}
 				vput(vp);
@@ -2409,6 +2420,8 @@ vfs_object_create(vp, p, cred)
 	struct proc *p;
 	struct ucred *cred;
 {
+
+	mtx_assert(&vm_mtx, MA_NOTOWNED);
 	return (VOP_CREATEVOBJECT(vp, cred, p));
 }
 
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index 6b73258..3f97551 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -2770,8 +2770,13 @@ fsync(p, uap)
 	if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
 		return (error);
 	vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
-	if (VOP_GETVOBJECT(vp, &obj) == 0)
+	if (VOP_GETVOBJECT(vp, &obj) == 0) {
+		mtx_unlock(&Giant);
+		mtx_lock(&vm_mtx);
 		vm_object_page_clean(obj, 0, 0, 0);
+		mtx_unlock(&vm_mtx);
+		mtx_lock(&Giant);
+	}
 	error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);
 #ifdef SOFTUPDATES
 	if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))