summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authoralfred <alfred@FreeBSD.org>2001-05-19 01:28:09 +0000
committeralfred <alfred@FreeBSD.org>2001-05-19 01:28:09 +0000
commita3f0842419d98da211706f921fc626e160cd960b (patch)
treee86922a5639c32e1242d4f3088fc487f3be5b236 /sys
parent9eda9187f024233436e6a743f13bd938b1a0f19c (diff)
downloadFreeBSD-src-a3f0842419d98da211706f921fc626e160cd960b.zip
FreeBSD-src-a3f0842419d98da211706f921fc626e160cd960b.tar.gz
Introduce a global lock for the vm subsystem (vm_mtx).
vm_mtx does not recurse and is required for most low level vm operations. faults can not be taken without holding Giant. Memory subsystems can now call the base page allocators safely. Almost all atomic ops were removed as they are covered under the vm mutex. Alpha and ia64 now need to catch up to i386's trap handlers. FFS and NFS have been tested, other filesystems will need minor changes (grabbing the vm lock when twiddling page properties). Reviewed (partially) by: jake, jhb
Diffstat (limited to 'sys')
-rw-r--r--sys/amd64/amd64/busdma_machdep.c2
-rw-r--r--sys/amd64/amd64/machdep.c4
-rw-r--r--sys/amd64/amd64/mem.c18
-rw-r--r--sys/amd64/amd64/pmap.c1
-rw-r--r--sys/amd64/amd64/trap.c13
-rw-r--r--sys/amd64/amd64/vm_machdep.c22
-rw-r--r--sys/coda/coda_namecache.c1
-rw-r--r--sys/compat/linprocfs/linprocfs.c1
-rw-r--r--sys/compat/linprocfs/linprocfs_misc.c1
-rw-r--r--sys/compat/pecoff/imgact_pecoff.c1
-rw-r--r--sys/dev/agp/agp.c1
-rw-r--r--sys/dev/agp/agp_ali.c1
-rw-r--r--sys/dev/agp/agp_amd.c1
-rw-r--r--sys/dev/agp/agp_i810.c1
-rw-r--r--sys/dev/agp/agp_intel.c1
-rw-r--r--sys/dev/agp/agp_sis.c1
-rw-r--r--sys/dev/agp/agp_via.c1
-rw-r--r--sys/dev/md/md.c1
-rw-r--r--sys/fs/coda/coda_namecache.c1
-rw-r--r--sys/fs/procfs/procfs_map.c1
-rw-r--r--sys/fs/procfs/procfs_mem.c12
-rw-r--r--sys/fs/specfs/spec_vnops.c4
-rw-r--r--sys/fs/unionfs/union_subr.c1
-rw-r--r--sys/i386/i386/busdma_machdep.c2
-rw-r--r--sys/i386/i386/machdep.c4
-rw-r--r--sys/i386/i386/mem.c18
-rw-r--r--sys/i386/i386/pmap.c1
-rw-r--r--sys/i386/i386/trap.c13
-rw-r--r--sys/i386/i386/vm_machdep.c22
-rw-r--r--sys/i386/linux/linux_sysvec.c1
-rw-r--r--sys/kern/imgact_aout.c8
-rw-r--r--sys/kern/imgact_elf.c21
-rw-r--r--sys/kern/init_main.c4
-rw-r--r--sys/kern/kern_exec.c7
-rw-r--r--sys/kern/kern_exit.c2
-rw-r--r--sys/kern/kern_fork.c2
-rw-r--r--sys/kern/kern_resource.c2
-rw-r--r--sys/kern/kern_synch.c7
-rw-r--r--sys/kern/link_elf.c8
-rw-r--r--sys/kern/link_elf_obj.c8
-rw-r--r--sys/kern/subr_blist.c1
-rw-r--r--sys/kern/subr_trap.c13
-rw-r--r--sys/kern/sys_pipe.c11
-rw-r--r--sys/kern/syscalls.master34
-rw-r--r--sys/kern/sysv_shm.c6
-rw-r--r--sys/kern/vfs_bio.c105
-rw-r--r--sys/kern/vfs_cluster.c10
-rw-r--r--sys/kern/vfs_default.c16
-rw-r--r--sys/kern/vfs_extattr.c7
-rw-r--r--sys/kern/vfs_subr.c15
-rw-r--r--sys/kern/vfs_syscalls.c7
-rw-r--r--sys/miscfs/procfs/procfs_map.c1
-rw-r--r--sys/miscfs/procfs/procfs_mem.c12
-rw-r--r--sys/miscfs/specfs/spec_vnops.c4
-rw-r--r--sys/miscfs/union/union_subr.c1
-rw-r--r--sys/nfs/nfs_bio.c31
-rw-r--r--sys/nfs/nfs_common.c2
-rw-r--r--sys/nfs/nfs_subs.c2
-rw-r--r--sys/nfsclient/nfs_bio.c31
-rw-r--r--sys/nfsclient/nfs_subs.c2
-rw-r--r--sys/nfsserver/nfs_srvsubs.c2
-rw-r--r--sys/pci/agp.c1
-rw-r--r--sys/pci/agp_ali.c1
-rw-r--r--sys/pci/agp_amd.c1
-rw-r--r--sys/pci/agp_i810.c1
-rw-r--r--sys/pci/agp_intel.c1
-rw-r--r--sys/pci/agp_sis.c1
-rw-r--r--sys/pci/agp_via.c1
-rw-r--r--sys/ufs/ufs/ufs_readwrite.c47
-rw-r--r--sys/vm/default_pager.c2
-rw-r--r--sys/vm/phys_pager.c16
-rw-r--r--sys/vm/swap_pager.c77
-rw-r--r--sys/vm/vm.h4
-rw-r--r--sys/vm/vm_fault.c65
-rw-r--r--sys/vm/vm_glue.c38
-rw-r--r--sys/vm/vm_init.c8
-rw-r--r--sys/vm/vm_kern.c74
-rw-r--r--sys/vm/vm_map.c41
-rw-r--r--sys/vm/vm_map.h22
-rw-r--r--sys/vm/vm_meter.c6
-rw-r--r--sys/vm/vm_mmap.c60
-rw-r--r--sys/vm/vm_object.c58
-rw-r--r--sys/vm/vm_object.h35
-rw-r--r--sys/vm/vm_page.c123
-rw-r--r--sys/vm/vm_page.h31
-rw-r--r--sys/vm/vm_pageout.c19
-rw-r--r--sys/vm/vm_pager.c27
-rw-r--r--sys/vm/vm_pager.h16
-rw-r--r--sys/vm/vm_unix.c7
-rw-r--r--sys/vm/vm_zone.c32
-rw-r--r--sys/vm/vnode_pager.c53
91 files changed, 1149 insertions, 253 deletions
diff --git a/sys/amd64/amd64/busdma_machdep.c b/sys/amd64/amd64/busdma_machdep.c
index 63906dd..3dc9e76 100644
--- a/sys/amd64/amd64/busdma_machdep.c
+++ b/sys/amd64/amd64/busdma_machdep.c
@@ -31,6 +31,8 @@
#include <sys/malloc.h>
#include <sys/bus.h>
#include <sys/interrupt.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
#include <vm/vm.h>
#include <vm/vm_page.h>
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index bb552a3..e02569c1 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -264,6 +264,7 @@ cpu_startup(dummy)
/*
* Good {morning,afternoon,evening,night}.
*/
+ mtx_lock(&vm_mtx);
earlysetcpuclass();
startrtclock();
printcpuinfo();
@@ -397,6 +398,7 @@ again:
exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
(16*(ARG_MAX+(PAGE_SIZE*3))));
+ mtx_unlock(&vm_mtx);
/*
* XXX: Mbuf system machine-specific initializations should
* go here, if anywhere.
@@ -2075,9 +2077,11 @@ f00f_hack(void *unused) {
r_idt.rd_base = (int)new_idt;
lidt(&r_idt);
idt = new_idt;
+ mtx_lock(&vm_mtx);
if (vm_map_protect(kernel_map, tmp, tmp + PAGE_SIZE,
VM_PROT_READ, FALSE) != KERN_SUCCESS)
panic("vm_map_protect failed");
+ mtx_unlock(&vm_mtx);
return;
}
#endif /* defined(I586_CPU) && !NO_F00F_HACK */
diff --git a/sys/amd64/amd64/mem.c b/sys/amd64/amd64/mem.c
index a5a9135..8671530 100644
--- a/sys/amd64/amd64/mem.c
+++ b/sys/amd64/amd64/mem.c
@@ -50,6 +50,8 @@
#include <sys/fcntl.h>
#include <sys/ioccom.h>
#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
#include <sys/malloc.h>
#include <sys/memrange.h>
#include <sys/proc.h>
@@ -153,13 +155,17 @@ mmrw(dev_t dev, struct uio *uio, int flags)
case 0:
v = uio->uio_offset;
v &= ~PAGE_MASK;
+ mtx_lock(&vm_mtx);
pmap_kenter((vm_offset_t)ptvmmap, v);
+ mtx_unlock(&vm_mtx);
o = (int)uio->uio_offset & PAGE_MASK;
c = (u_int)(PAGE_SIZE - ((int)iov->iov_base & PAGE_MASK));
c = min(c, (u_int)(PAGE_SIZE - o));
c = min(c, (u_int)iov->iov_len);
error = uiomove((caddr_t)&ptvmmap[o], (int)c, uio);
+ mtx_lock(&vm_mtx);
pmap_kremove((vm_offset_t)ptvmmap);
+ mtx_unlock(&vm_mtx);
continue;
/* minor device 1 is kernel memory */
@@ -177,14 +183,20 @@ mmrw(dev_t dev, struct uio *uio, int flags)
return EFAULT;
if (eaddr >= (vm_offset_t)VADDR(APTDPTDI, 0))
return EFAULT;
+ mtx_lock(&vm_mtx);
for (; addr < eaddr; addr += PAGE_SIZE)
- if (pmap_extract(kernel_pmap, addr) == 0)
+ if (pmap_extract(kernel_pmap, addr) == 0) {
+ mtx_unlock(&vm_mtx);
return EFAULT;
-
+ }
+
if (!kernacc((caddr_t)(int)uio->uio_offset, c,
uio->uio_rw == UIO_READ ?
- VM_PROT_READ : VM_PROT_WRITE))
+ VM_PROT_READ : VM_PROT_WRITE)) {
+ mtx_unlock(&vm_mtx);
return (EFAULT);
+ }
+ mtx_unlock(&vm_mtx);
error = uiomove((caddr_t)(int)uio->uio_offset, (int)c, uio);
continue;
}
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 93807ee..488a8a5 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -75,6 +75,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <sys/mman.h>
#include <sys/msgbuf.h>
#include <sys/proc.h>
diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c
index af7bfc1..8924fa2 100644
--- a/sys/amd64/amd64/trap.c
+++ b/sys/amd64/amd64/trap.c
@@ -330,9 +330,7 @@ restart:
*/
eva = rcr2();
enable_intr();
- mtx_lock(&Giant);
i = trap_pfault(&frame, TRUE, eva);
- mtx_unlock(&Giant);
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
if (i == -2) {
/*
@@ -443,9 +441,7 @@ restart:
*/
eva = rcr2();
enable_intr();
- mtx_lock(&Giant);
(void) trap_pfault(&frame, FALSE, eva);
- mtx_unlock(&Giant);
goto out;
case T_DNA:
@@ -887,7 +883,9 @@ nogo:
frame->tf_eip = (int)PCPU_GET(curpcb)->pcb_onfault;
return (0);
}
+ mtx_lock(&Giant);
trap_fatal(frame, eva);
+ mtx_unlock(&Giant);
return (-1);
}
@@ -1147,14 +1145,17 @@ syscall(frame)
/*
* Try to run the syscall without the MP lock if the syscall
- * is MP safe. We have to obtain the MP lock no matter what if
- * we are ktracing
+ * is MP safe.
*/
if ((callp->sy_narg & SYF_MPSAFE) == 0) {
mtx_lock(&Giant);
}
#ifdef KTRACE
+ /*
+ * We have to obtain the MP lock no matter what if
+ * we are ktracing
+ */
if (KTRPOINT(p, KTR_SYSCALL)) {
if (!mtx_owned(&Giant))
mtx_lock(&Giant);
diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c
index fd626a3..eda2386 100644
--- a/sys/amd64/amd64/vm_machdep.c
+++ b/sys/amd64/amd64/vm_machdep.c
@@ -290,11 +290,14 @@ void
cpu_wait(p)
struct proc *p;
{
+
+ mtx_lock(&vm_mtx);
/* drop per-process resources */
pmap_dispose_proc(p);
/* and clean-out the vmspace */
vmspace_free(p->p_vmspace);
+ mtx_unlock(&vm_mtx);
}
/*
@@ -376,6 +379,7 @@ vmapbuf(bp)
if ((bp->b_flags & B_PHYS) == 0)
panic("vmapbuf");
+ mtx_lock(&vm_mtx);
for (v = bp->b_saveaddr, addr = (caddr_t)trunc_page((vm_offset_t)bp->b_data);
addr < bp->b_data + bp->b_bufsize;
addr += PAGE_SIZE, v += PAGE_SIZE) {
@@ -391,6 +395,7 @@ vmapbuf(bp)
vm_page_hold(PHYS_TO_VM_PAGE(pa));
pmap_kenter((vm_offset_t) v, pa);
}
+ mtx_unlock(&vm_mtx);
kva = bp->b_saveaddr;
bp->b_saveaddr = bp->b_data;
@@ -411,6 +416,7 @@ vunmapbuf(bp)
if ((bp->b_flags & B_PHYS) == 0)
panic("vunmapbuf");
+ mtx_lock(&vm_mtx);
for (addr = (caddr_t)trunc_page((vm_offset_t)bp->b_data);
addr < bp->b_data + bp->b_bufsize;
addr += PAGE_SIZE) {
@@ -418,6 +424,7 @@ vunmapbuf(bp)
pmap_kremove((vm_offset_t) addr);
vm_page_unhold(PHYS_TO_VM_PAGE(pa));
}
+ mtx_unlock(&vm_mtx);
bp->b_data = bp->b_saveaddr;
}
@@ -574,12 +581,17 @@ vm_page_zero_idle()
* pages because doing so may flush our L1 and L2 caches too much.
*/
- if (zero_state && vm_page_zero_count >= ZIDLE_LO(cnt.v_free_count))
+ if (mtx_trylock(&vm_mtx) == 0)
+ return (0);
+ if (zero_state && vm_page_zero_count >= ZIDLE_LO(cnt.v_free_count)) {
+ mtx_unlock(&vm_mtx);
return(0);
- if (vm_page_zero_count >= ZIDLE_HI(cnt.v_free_count))
+ }
+ if (vm_page_zero_count >= ZIDLE_HI(cnt.v_free_count)) {
+ mtx_unlock(&vm_mtx);
return(0);
+ }
- if (mtx_trylock(&Giant)) {
s = splvm();
zero_state = 0;
m = vm_page_list_find(PQ_FREE, free_rover, FALSE);
@@ -602,10 +614,8 @@ vm_page_zero_idle()
}
free_rover = (free_rover + PQ_PRIME2) & PQ_L2_MASK;
splx(s);
- mtx_unlock(&Giant);
+ mtx_unlock(&vm_mtx);
return (1);
- }
- return (0);
}
/*
diff --git a/sys/coda/coda_namecache.c b/sys/coda/coda_namecache.c
index 3b73a67..9dfaf19 100644
--- a/sys/coda/coda_namecache.c
+++ b/sys/coda/coda_namecache.c
@@ -81,6 +81,7 @@
#include <sys/errno.h>
#include <sys/lock.h>
#include <sys/malloc.h>
+#include <sys/mutex.h>
#include <sys/ucred.h>
#include <vm/vm.h>
diff --git a/sys/compat/linprocfs/linprocfs.c b/sys/compat/linprocfs/linprocfs.c
index 004ec23..1a8e078 100644
--- a/sys/compat/linprocfs/linprocfs.c
+++ b/sys/compat/linprocfs/linprocfs.c
@@ -50,6 +50,7 @@
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
+#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/resourcevar.h>
#include <sys/sbuf.h>
diff --git a/sys/compat/linprocfs/linprocfs_misc.c b/sys/compat/linprocfs/linprocfs_misc.c
index 004ec23..1a8e078 100644
--- a/sys/compat/linprocfs/linprocfs_misc.c
+++ b/sys/compat/linprocfs/linprocfs_misc.c
@@ -50,6 +50,7 @@
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
+#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/resourcevar.h>
#include <sys/sbuf.h>
diff --git a/sys/compat/pecoff/imgact_pecoff.c b/sys/compat/pecoff/imgact_pecoff.c
index 36b4288..f5cbfa8 100644
--- a/sys/compat/pecoff/imgact_pecoff.c
+++ b/sys/compat/pecoff/imgact_pecoff.c
@@ -49,6 +49,7 @@
#include <sys/imgact.h>
#include <sys/lock.h>
#include <sys/malloc.h>
+#include <sys/mutex.h>
#include <sys/mman.h>
#include <sys/namei.h>
#include <sys/proc.h>
diff --git a/sys/dev/agp/agp.c b/sys/dev/agp/agp.c
index 6419635..333c4c8 100644
--- a/sys/dev/agp/agp.c
+++ b/sys/dev/agp/agp.c
@@ -38,6 +38,7 @@
#include <sys/ioccom.h>
#include <sys/agpio.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <sys/proc.h>
#include <pci/pcivar.h>
diff --git a/sys/dev/agp/agp_ali.c b/sys/dev/agp/agp_ali.c
index 86e070e..aa805e1 100644
--- a/sys/dev/agp/agp_ali.c
+++ b/sys/dev/agp/agp_ali.c
@@ -35,6 +35,7 @@
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <pci/pcivar.h>
#include <pci/pcireg.h>
diff --git a/sys/dev/agp/agp_amd.c b/sys/dev/agp/agp_amd.c
index 0a498f7..4aaf4e9 100644
--- a/sys/dev/agp/agp_amd.c
+++ b/sys/dev/agp/agp_amd.c
@@ -35,6 +35,7 @@
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <pci/pcivar.h>
#include <pci/pcireg.h>
diff --git a/sys/dev/agp/agp_i810.c b/sys/dev/agp/agp_i810.c
index 79fd566..5c40493 100644
--- a/sys/dev/agp/agp_i810.c
+++ b/sys/dev/agp/agp_i810.c
@@ -36,6 +36,7 @@
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <pci/pcivar.h>
#include <pci/pcireg.h>
diff --git a/sys/dev/agp/agp_intel.c b/sys/dev/agp/agp_intel.c
index a4b9a43..dc1ef4d 100644
--- a/sys/dev/agp/agp_intel.c
+++ b/sys/dev/agp/agp_intel.c
@@ -35,6 +35,7 @@
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <pci/pcivar.h>
#include <pci/pcireg.h>
diff --git a/sys/dev/agp/agp_sis.c b/sys/dev/agp/agp_sis.c
index 1f1a50b..a6a20a4 100644
--- a/sys/dev/agp/agp_sis.c
+++ b/sys/dev/agp/agp_sis.c
@@ -35,6 +35,7 @@
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <pci/pcivar.h>
#include <pci/pcireg.h>
diff --git a/sys/dev/agp/agp_via.c b/sys/dev/agp/agp_via.c
index 983348e..086b027 100644
--- a/sys/dev/agp/agp_via.c
+++ b/sys/dev/agp/agp_via.c
@@ -35,6 +35,7 @@
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <pci/pcivar.h>
#include <pci/pcireg.h>
diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c
index edf2890..ae783dd 100644
--- a/sys/dev/md/md.c
+++ b/sys/dev/md/md.c
@@ -71,6 +71,7 @@
#include <sys/linker.h>
#include <sys/lock.h>
#include <sys/malloc.h>
+#include <sys/mutex.h>
#include <sys/mdioctl.h>
#include <sys/namei.h>
#include <sys/proc.h>
diff --git a/sys/fs/coda/coda_namecache.c b/sys/fs/coda/coda_namecache.c
index 3b73a67..9dfaf19 100644
--- a/sys/fs/coda/coda_namecache.c
+++ b/sys/fs/coda/coda_namecache.c
@@ -81,6 +81,7 @@
#include <sys/errno.h>
#include <sys/lock.h>
#include <sys/malloc.h>
+#include <sys/mutex.h>
#include <sys/ucred.h>
#include <vm/vm.h>
diff --git a/sys/fs/procfs/procfs_map.c b/sys/fs/procfs/procfs_map.c
index 5e4a30c..5c21993 100644
--- a/sys/fs/procfs/procfs_map.c
+++ b/sys/fs/procfs/procfs_map.c
@@ -42,6 +42,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/vnode.h>
diff --git a/sys/fs/procfs/procfs_mem.c b/sys/fs/procfs/procfs_mem.c
index 3a2f8d2..1e28870 100644
--- a/sys/fs/procfs/procfs_mem.c
+++ b/sys/fs/procfs/procfs_mem.c
@@ -48,6 +48,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/ptrace.h>
#include <sys/user.h>
@@ -88,8 +89,14 @@ procfs_rwmem(curp, p, uio)
* usage in that process can be messed up.
*/
vm = p->p_vmspace;
- if ((p->p_flag & P_WEXIT) || (vm->vm_refcnt < 1))
+ if ((p->p_flag & P_WEXIT))
return EFAULT;
+
+ mtx_lock(&vm_mtx);
+ if (vm->vm_refcnt < 1) {
+ mtx_unlock(&vm_mtx);
+ return EFAULT;
+ }
++vm->vm_refcnt;
/*
* The map we want...
@@ -207,7 +214,9 @@ procfs_rwmem(curp, p, uio)
/*
* Now do the i/o move.
*/
+ mtx_unlock(&vm_mtx);
error = uiomove((caddr_t)(kva + page_offset), len, uio);
+ mtx_lock(&vm_mtx);
pmap_kremove(kva);
@@ -226,6 +235,7 @@ procfs_rwmem(curp, p, uio)
kmem_free(kernel_map, kva, PAGE_SIZE);
vmspace_free(vm);
+ mtx_unlock(&vm_mtx);
return (error);
}
diff --git a/sys/fs/specfs/spec_vnops.c b/sys/fs/specfs/spec_vnops.c
index 2940f40..ba81229 100644
--- a/sys/fs/specfs/spec_vnops.c
+++ b/sys/fs/specfs/spec_vnops.c
@@ -731,6 +731,8 @@ spec_getpages(ap)
cnt.v_vnodein++;
cnt.v_vnodepgsin += pcount;
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
/* Do the input. */
BUF_STRATEGY(bp);
@@ -741,6 +743,8 @@ spec_getpages(ap)
tsleep(bp, PVM, "spread", 0);
splx(s);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
if ((bp->b_ioflags & BIO_ERROR) != 0) {
if (bp->b_error)
diff --git a/sys/fs/unionfs/union_subr.c b/sys/fs/unionfs/union_subr.c
index 869818f..3ac98bf 100644
--- a/sys/fs/unionfs/union_subr.c
+++ b/sys/fs/unionfs/union_subr.c
@@ -45,6 +45,7 @@
#include <sys/filedesc.h>
#include <sys/kernel.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <sys/malloc.h>
#include <sys/module.h>
#include <sys/mount.h>
diff --git a/sys/i386/i386/busdma_machdep.c b/sys/i386/i386/busdma_machdep.c
index 63906dd..3dc9e76 100644
--- a/sys/i386/i386/busdma_machdep.c
+++ b/sys/i386/i386/busdma_machdep.c
@@ -31,6 +31,8 @@
#include <sys/malloc.h>
#include <sys/bus.h>
#include <sys/interrupt.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
#include <vm/vm.h>
#include <vm/vm_page.h>
diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c
index bb552a3..e02569c1 100644
--- a/sys/i386/i386/machdep.c
+++ b/sys/i386/i386/machdep.c
@@ -264,6 +264,7 @@ cpu_startup(dummy)
/*
* Good {morning,afternoon,evening,night}.
*/
+ mtx_lock(&vm_mtx);
earlysetcpuclass();
startrtclock();
printcpuinfo();
@@ -397,6 +398,7 @@ again:
exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
(16*(ARG_MAX+(PAGE_SIZE*3))));
+ mtx_unlock(&vm_mtx);
/*
* XXX: Mbuf system machine-specific initializations should
* go here, if anywhere.
@@ -2075,9 +2077,11 @@ f00f_hack(void *unused) {
r_idt.rd_base = (int)new_idt;
lidt(&r_idt);
idt = new_idt;
+ mtx_lock(&vm_mtx);
if (vm_map_protect(kernel_map, tmp, tmp + PAGE_SIZE,
VM_PROT_READ, FALSE) != KERN_SUCCESS)
panic("vm_map_protect failed");
+ mtx_unlock(&vm_mtx);
return;
}
#endif /* defined(I586_CPU) && !NO_F00F_HACK */
diff --git a/sys/i386/i386/mem.c b/sys/i386/i386/mem.c
index a5a9135..8671530 100644
--- a/sys/i386/i386/mem.c
+++ b/sys/i386/i386/mem.c
@@ -50,6 +50,8 @@
#include <sys/fcntl.h>
#include <sys/ioccom.h>
#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
#include <sys/malloc.h>
#include <sys/memrange.h>
#include <sys/proc.h>
@@ -153,13 +155,17 @@ mmrw(dev_t dev, struct uio *uio, int flags)
case 0:
v = uio->uio_offset;
v &= ~PAGE_MASK;
+ mtx_lock(&vm_mtx);
pmap_kenter((vm_offset_t)ptvmmap, v);
+ mtx_unlock(&vm_mtx);
o = (int)uio->uio_offset & PAGE_MASK;
c = (u_int)(PAGE_SIZE - ((int)iov->iov_base & PAGE_MASK));
c = min(c, (u_int)(PAGE_SIZE - o));
c = min(c, (u_int)iov->iov_len);
error = uiomove((caddr_t)&ptvmmap[o], (int)c, uio);
+ mtx_lock(&vm_mtx);
pmap_kremove((vm_offset_t)ptvmmap);
+ mtx_unlock(&vm_mtx);
continue;
/* minor device 1 is kernel memory */
@@ -177,14 +183,20 @@ mmrw(dev_t dev, struct uio *uio, int flags)
return EFAULT;
if (eaddr >= (vm_offset_t)VADDR(APTDPTDI, 0))
return EFAULT;
+ mtx_lock(&vm_mtx);
for (; addr < eaddr; addr += PAGE_SIZE)
- if (pmap_extract(kernel_pmap, addr) == 0)
+ if (pmap_extract(kernel_pmap, addr) == 0) {
+ mtx_unlock(&vm_mtx);
return EFAULT;
-
+ }
+
if (!kernacc((caddr_t)(int)uio->uio_offset, c,
uio->uio_rw == UIO_READ ?
- VM_PROT_READ : VM_PROT_WRITE))
+ VM_PROT_READ : VM_PROT_WRITE)) {
+ mtx_unlock(&vm_mtx);
return (EFAULT);
+ }
+ mtx_unlock(&vm_mtx);
error = uiomove((caddr_t)(int)uio->uio_offset, (int)c, uio);
continue;
}
diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c
index 93807ee..488a8a5 100644
--- a/sys/i386/i386/pmap.c
+++ b/sys/i386/i386/pmap.c
@@ -75,6 +75,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <sys/mman.h>
#include <sys/msgbuf.h>
#include <sys/proc.h>
diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c
index af7bfc1..8924fa2 100644
--- a/sys/i386/i386/trap.c
+++ b/sys/i386/i386/trap.c
@@ -330,9 +330,7 @@ restart:
*/
eva = rcr2();
enable_intr();
- mtx_lock(&Giant);
i = trap_pfault(&frame, TRUE, eva);
- mtx_unlock(&Giant);
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
if (i == -2) {
/*
@@ -443,9 +441,7 @@ restart:
*/
eva = rcr2();
enable_intr();
- mtx_lock(&Giant);
(void) trap_pfault(&frame, FALSE, eva);
- mtx_unlock(&Giant);
goto out;
case T_DNA:
@@ -887,7 +883,9 @@ nogo:
frame->tf_eip = (int)PCPU_GET(curpcb)->pcb_onfault;
return (0);
}
+ mtx_lock(&Giant);
trap_fatal(frame, eva);
+ mtx_unlock(&Giant);
return (-1);
}
@@ -1147,14 +1145,17 @@ syscall(frame)
/*
* Try to run the syscall without the MP lock if the syscall
- * is MP safe. We have to obtain the MP lock no matter what if
- * we are ktracing
+ * is MP safe.
*/
if ((callp->sy_narg & SYF_MPSAFE) == 0) {
mtx_lock(&Giant);
}
#ifdef KTRACE
+ /*
+ * We have to obtain the MP lock no matter what if
+ * we are ktracing
+ */
if (KTRPOINT(p, KTR_SYSCALL)) {
if (!mtx_owned(&Giant))
mtx_lock(&Giant);
diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c
index fd626a3..eda2386 100644
--- a/sys/i386/i386/vm_machdep.c
+++ b/sys/i386/i386/vm_machdep.c
@@ -290,11 +290,14 @@ void
cpu_wait(p)
struct proc *p;
{
+
+ mtx_lock(&vm_mtx);
/* drop per-process resources */
pmap_dispose_proc(p);
/* and clean-out the vmspace */
vmspace_free(p->p_vmspace);
+ mtx_unlock(&vm_mtx);
}
/*
@@ -376,6 +379,7 @@ vmapbuf(bp)
if ((bp->b_flags & B_PHYS) == 0)
panic("vmapbuf");
+ mtx_lock(&vm_mtx);
for (v = bp->b_saveaddr, addr = (caddr_t)trunc_page((vm_offset_t)bp->b_data);
addr < bp->b_data + bp->b_bufsize;
addr += PAGE_SIZE, v += PAGE_SIZE) {
@@ -391,6 +395,7 @@ vmapbuf(bp)
vm_page_hold(PHYS_TO_VM_PAGE(pa));
pmap_kenter((vm_offset_t) v, pa);
}
+ mtx_unlock(&vm_mtx);
kva = bp->b_saveaddr;
bp->b_saveaddr = bp->b_data;
@@ -411,6 +416,7 @@ vunmapbuf(bp)
if ((bp->b_flags & B_PHYS) == 0)
panic("vunmapbuf");
+ mtx_lock(&vm_mtx);
for (addr = (caddr_t)trunc_page((vm_offset_t)bp->b_data);
addr < bp->b_data + bp->b_bufsize;
addr += PAGE_SIZE) {
@@ -418,6 +424,7 @@ vunmapbuf(bp)
pmap_kremove((vm_offset_t) addr);
vm_page_unhold(PHYS_TO_VM_PAGE(pa));
}
+ mtx_unlock(&vm_mtx);
bp->b_data = bp->b_saveaddr;
}
@@ -574,12 +581,17 @@ vm_page_zero_idle()
* pages because doing so may flush our L1 and L2 caches too much.
*/
- if (zero_state && vm_page_zero_count >= ZIDLE_LO(cnt.v_free_count))
+ if (mtx_trylock(&vm_mtx) == 0)
+ return (0);
+ if (zero_state && vm_page_zero_count >= ZIDLE_LO(cnt.v_free_count)) {
+ mtx_unlock(&vm_mtx);
return(0);
- if (vm_page_zero_count >= ZIDLE_HI(cnt.v_free_count))
+ }
+ if (vm_page_zero_count >= ZIDLE_HI(cnt.v_free_count)) {
+ mtx_unlock(&vm_mtx);
return(0);
+ }
- if (mtx_trylock(&Giant)) {
s = splvm();
zero_state = 0;
m = vm_page_list_find(PQ_FREE, free_rover, FALSE);
@@ -602,10 +614,8 @@ vm_page_zero_idle()
}
free_rover = (free_rover + PQ_PRIME2) & PQ_L2_MASK;
splx(s);
- mtx_unlock(&Giant);
+ mtx_unlock(&vm_mtx);
return (1);
- }
- return (0);
}
/*
diff --git a/sys/i386/linux/linux_sysvec.c b/sys/i386/linux/linux_sysvec.c
index 13c29f8..0734ba4 100644
--- a/sys/i386/linux/linux_sysvec.c
+++ b/sys/i386/linux/linux_sysvec.c
@@ -41,6 +41,7 @@
#include <sys/imgact_aout.h>
#include <sys/imgact_elf.h>
#include <sys/malloc.h>
+#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/signalvar.h>
#include <sys/sysent.h>
diff --git a/sys/kern/imgact_aout.c b/sys/kern/imgact_aout.c
index 9478eb3..8becda3 100644
--- a/sys/kern/imgact_aout.c
+++ b/sys/kern/imgact_aout.c
@@ -171,6 +171,7 @@ exec_aout_imgact(imgp)
if (error)
return (error);
+ mtx_lock(&vm_mtx);
/*
* Destroy old process VM and create a new one (with a new stack)
*/
@@ -184,7 +185,9 @@ exec_aout_imgact(imgp)
vp = imgp->vp;
map = &vmspace->vm_map;
vm_map_lock(map);
+ mtx_unlock(&vm_mtx);
VOP_GETVOBJECT(vp, &object);
+ mtx_lock(&vm_mtx);
vm_object_reference(object);
text_end = virtual_offset + a_out->a_text;
@@ -195,6 +198,7 @@ exec_aout_imgact(imgp)
MAP_COPY_ON_WRITE | MAP_PREFAULT);
if (error) {
vm_map_unlock(map);
+ mtx_unlock(&vm_mtx);
return (error);
}
data_end = text_end + a_out->a_data;
@@ -207,6 +211,7 @@ exec_aout_imgact(imgp)
MAP_COPY_ON_WRITE | MAP_PREFAULT);
if (error) {
vm_map_unlock(map);
+ mtx_unlock(&vm_mtx);
return (error);
}
}
@@ -217,6 +222,7 @@ exec_aout_imgact(imgp)
VM_PROT_ALL, VM_PROT_ALL, 0);
if (error) {
vm_map_unlock(map);
+ mtx_unlock(&vm_mtx);
return (error);
}
}
@@ -229,6 +235,8 @@ exec_aout_imgact(imgp)
vmspace->vm_daddr = (caddr_t) (uintptr_t)
(virtual_offset + a_out->a_text);
+ mtx_unlock(&vm_mtx);
+
/* Fill in image_params */
imgp->interpreted = 0;
imgp->entry_addr = a_out->a_entry;
diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c
index da7b9cb..2a15e9c 100644
--- a/sys/kern/imgact_elf.c
+++ b/sys/kern/imgact_elf.c
@@ -230,6 +230,7 @@ elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_o
else
map_len = round_page(offset+filsz) - file_addr;
+ mtx_lock(&vm_mtx);
if (map_len != 0) {
vm_object_reference(object);
vm_map_lock(&vmspace->vm_map);
@@ -244,12 +245,15 @@ elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_o
vm_map_unlock(&vmspace->vm_map);
if (rv != KERN_SUCCESS) {
vm_object_deallocate(object);
+ mtx_unlock(&vm_mtx);
return EINVAL;
}
/* we can stop now if we've covered it all */
- if (memsz == filsz)
+ if (memsz == filsz) {
+ mtx_unlock(&vm_mtx);
return 0;
+ }
}
@@ -270,8 +274,10 @@ elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_o
map_addr, map_addr + map_len,
VM_PROT_ALL, VM_PROT_ALL, 0);
vm_map_unlock(&vmspace->vm_map);
- if (rv != KERN_SUCCESS)
+ if (rv != KERN_SUCCESS) {
+ mtx_unlock(&vm_mtx);
return EINVAL;
+ }
}
if (copy_len != 0) {
@@ -287,14 +293,19 @@ elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_o
MAP_COPY_ON_WRITE | MAP_PREFAULT_PARTIAL);
if (rv != KERN_SUCCESS) {
vm_object_deallocate(object);
+ mtx_unlock(&vm_mtx);
return EINVAL;
}
/* send the page fragment to user space */
+ mtx_unlock(&vm_mtx);
error = copyout((caddr_t)data_buf, (caddr_t)map_addr, copy_len);
+ mtx_lock(&vm_mtx);
vm_map_remove(exec_map, data_buf, data_buf + PAGE_SIZE);
- if (error)
+ if (error) {
+ mtx_unlock(&vm_mtx);
return (error);
+ }
}
/*
@@ -303,6 +314,7 @@ elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_o
vm_map_protect(&vmspace->vm_map, map_addr, map_addr + map_len, prot,
FALSE);
+ mtx_unlock(&vm_mtx);
return error;
}
@@ -498,9 +510,11 @@ exec_elf_imgact(struct image_params *imgp)
if ((error = exec_extract_strings(imgp)) != 0)
goto fail;
+ mtx_lock(&vm_mtx);
exec_new_vmspace(imgp);
vmspace = imgp->proc->p_vmspace;
+ mtx_unlock(&vm_mtx);
for (i = 0; i < hdr->e_phnum; i++) {
switch(phdr[i].p_type) {
@@ -557,6 +571,7 @@ exec_elf_imgact(struct image_params *imgp)
}
}
+ /* XXX: lock the vm_mtx when twiddling vmspace? */
vmspace->vm_tsize = text_size >> PAGE_SHIFT;
vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr;
vmspace->vm_dsize = data_size >> PAGE_SHIFT;
diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
index f1a6a0b..6f5c653 100644
--- a/sys/kern/init_main.c
+++ b/sys/kern/init_main.c
@@ -485,11 +485,15 @@ start_init(void *dummy)
* Need just enough stack to hold the faked-up "execve()" arguments.
*/
addr = trunc_page(USRSTACK - PAGE_SIZE);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
if (vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &addr, PAGE_SIZE,
FALSE, VM_PROT_ALL, VM_PROT_ALL, 0) != 0)
panic("init: couldn't allocate argument space");
p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
p->p_vmspace->vm_ssize = 1;
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
if ((var = getenv("init_path")) != NULL) {
strncpy(init_path, var, sizeof init_path);
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index 0b1b29e..8f49538 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -412,6 +412,7 @@ exec_map_first_page(imgp)
VOP_GETVOBJECT(imgp->vp, &object);
s = splvm();
+ mtx_lock(&vm_mtx);
ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
@@ -443,6 +444,7 @@ exec_map_first_page(imgp)
vm_page_free(ma[0]);
}
splx(s);
+ mtx_unlock(&vm_mtx);
return EIO;
}
}
@@ -454,6 +456,7 @@ exec_map_first_page(imgp)
pmap_kenter((vm_offset_t) imgp->image_header, VM_PAGE_TO_PHYS(ma[0]));
imgp->firstpage = ma[0];
+ mtx_unlock(&vm_mtx);
return 0;
}
@@ -461,9 +464,12 @@ void
exec_unmap_first_page(imgp)
struct image_params *imgp;
{
+
if (imgp->firstpage) {
+ mtx_lock(&vm_mtx);
pmap_kremove((vm_offset_t) imgp->image_header);
vm_page_unwire(imgp->firstpage, 1);
+ mtx_unlock(&vm_mtx);
imgp->firstpage = NULL;
}
}
@@ -482,6 +488,7 @@ exec_new_vmspace(imgp)
caddr_t stack_addr = (caddr_t) (USRSTACK - MAXSSIZ);
vm_map_t map = &vmspace->vm_map;
+ mtx_assert(&vm_mtx, MA_OWNED);
imgp->vmspace_destroyed = 1;
/*
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
index d5dccab..1af27d2 100644
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c
@@ -222,6 +222,7 @@ exit1(p, rv)
* Can't free the entire vmspace as the kernel stack
* may be mapped within that space also.
*/
+ mtx_lock(&vm_mtx);
if (vm->vm_refcnt == 1) {
if (vm->vm_shm)
shmexit(p);
@@ -230,6 +231,7 @@ exit1(p, rv)
(void) vm_map_remove(&vm->vm_map, VM_MIN_ADDRESS,
VM_MAXUSER_ADDRESS);
}
+ mtx_unlock(&vm_mtx);
PROC_LOCK(p);
if (SESS_LEADER(p)) {
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index d3b991d..62dcc06 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -220,6 +220,7 @@ fork1(p1, flags, procp)
if ((flags & RFPROC) == 0) {
vm_fork(p1, 0, flags);
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
/*
* Close all file descriptors.
@@ -567,6 +568,7 @@ again:
* execution path later. (ie: directly into user mode)
*/
vm_fork(p1, p2, flags);
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
if (flags == (RFFDG | RFPROC)) {
cnt.v_forks++;
diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c
index 27431ab..f46313c 100644
--- a/sys/kern/kern_resource.c
+++ b/sys/kern/kern_resource.c
@@ -498,8 +498,10 @@ dosetrlimit(p, which, limp)
}
addr = trunc_page(addr);
size = round_page(size);
+ mtx_lock(&vm_mtx);
(void) vm_map_protect(&p->p_vmspace->vm_map,
addr, addr+size, prot, FALSE);
+ mtx_unlock(&vm_mtx);
}
break;
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index 7d793de..e09a377 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c
@@ -378,6 +378,13 @@ msleep(ident, mtx, priority, wmesg, timo)
int rval = 0;
WITNESS_SAVE_DECL(mtx);
+ KASSERT(ident == &proc0 || /* XXX: swapper */
+ timo != 0 || /* XXX: we might still miss a wakeup */
+ mtx_owned(&Giant) || mtx != NULL,
+ ("indefinite sleep without mutex, wmesg: \"%s\" ident: %p",
+ wmesg, ident));
+ if (mtx_owned(&vm_mtx) && mtx != &vm_mtx)
+ panic("sleeping with vm_mtx held.");
#ifdef KTRACE
if (p && KTRPOINT(p, KTR_CSW))
ktrcsw(p->p_tracep, 1, 0);
diff --git a/sys/kern/link_elf.c b/sys/kern/link_elf.c
index 344f163..613d1e4 100644
--- a/sys/kern/link_elf.c
+++ b/sys/kern/link_elf.c
@@ -653,8 +653,10 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu
ef = (elf_file_t) lf;
#ifdef SPARSE_MAPPING
+ mtx_lock(&vm_mtx);
ef->object = vm_object_allocate(OBJT_DEFAULT, mapsize >> PAGE_SHIFT);
if (ef->object == NULL) {
+ mtx_unlock(&vm_mtx);
free(ef, M_LINKER);
error = ENOMEM;
goto out;
@@ -667,9 +669,11 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu
VM_PROT_ALL, VM_PROT_ALL, 0);
if (error) {
vm_object_deallocate(ef->object);
+ mtx_unlock(&vm_mtx);
ef->object = 0;
goto out;
}
+ mtx_unlock(&vm_mtx);
#else
ef->address = malloc(mapsize, M_LINKER, M_WAITOK);
if (!ef->address) {
@@ -697,10 +701,12 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu
/*
* Wire down the pages
*/
+ mtx_lock(&vm_mtx);
vm_map_pageable(kernel_map,
(vm_offset_t) segbase,
(vm_offset_t) segbase + segs[i]->p_memsz,
FALSE);
+ mtx_unlock(&vm_mtx);
#endif
}
@@ -824,10 +830,12 @@ link_elf_unload_file(linker_file_t file)
}
#ifdef SPARSE_MAPPING
if (ef->object) {
+ mtx_lock(&vm_mtx);
vm_map_remove(kernel_map, (vm_offset_t) ef->address,
(vm_offset_t) ef->address
+ (ef->object->size << PAGE_SHIFT));
vm_object_deallocate(ef->object);
+ mtx_unlock(&vm_mtx);
}
#else
if (ef->address)
diff --git a/sys/kern/link_elf_obj.c b/sys/kern/link_elf_obj.c
index 344f163..613d1e4 100644
--- a/sys/kern/link_elf_obj.c
+++ b/sys/kern/link_elf_obj.c
@@ -653,8 +653,10 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu
ef = (elf_file_t) lf;
#ifdef SPARSE_MAPPING
+ mtx_lock(&vm_mtx);
ef->object = vm_object_allocate(OBJT_DEFAULT, mapsize >> PAGE_SHIFT);
if (ef->object == NULL) {
+ mtx_unlock(&vm_mtx);
free(ef, M_LINKER);
error = ENOMEM;
goto out;
@@ -667,9 +669,11 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu
VM_PROT_ALL, VM_PROT_ALL, 0);
if (error) {
vm_object_deallocate(ef->object);
+ mtx_unlock(&vm_mtx);
ef->object = 0;
goto out;
}
+ mtx_unlock(&vm_mtx);
#else
ef->address = malloc(mapsize, M_LINKER, M_WAITOK);
if (!ef->address) {
@@ -697,10 +701,12 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu
/*
* Wire down the pages
*/
+ mtx_lock(&vm_mtx);
vm_map_pageable(kernel_map,
(vm_offset_t) segbase,
(vm_offset_t) segbase + segs[i]->p_memsz,
FALSE);
+ mtx_unlock(&vm_mtx);
#endif
}
@@ -824,10 +830,12 @@ link_elf_unload_file(linker_file_t file)
}
#ifdef SPARSE_MAPPING
if (ef->object) {
+ mtx_lock(&vm_mtx);
vm_map_remove(kernel_map, (vm_offset_t) ef->address,
(vm_offset_t) ef->address
+ (ef->object->size << PAGE_SHIFT));
vm_object_deallocate(ef->object);
+ mtx_unlock(&vm_mtx);
}
#else
if (ef->address)
diff --git a/sys/kern/subr_blist.c b/sys/kern/subr_blist.c
index 9ac4338..061d151 100644
--- a/sys/kern/subr_blist.c
+++ b/sys/kern/subr_blist.c
@@ -71,6 +71,7 @@
#include <sys/kernel.h>
#include <sys/blist.h>
#include <sys/malloc.h>
+#include <sys/mutex.h>
#include <vm/vm.h>
#include <vm/vm_object.h>
#include <vm/vm_kern.h>
diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c
index af7bfc1..8924fa2 100644
--- a/sys/kern/subr_trap.c
+++ b/sys/kern/subr_trap.c
@@ -330,9 +330,7 @@ restart:
*/
eva = rcr2();
enable_intr();
- mtx_lock(&Giant);
i = trap_pfault(&frame, TRUE, eva);
- mtx_unlock(&Giant);
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
if (i == -2) {
/*
@@ -443,9 +441,7 @@ restart:
*/
eva = rcr2();
enable_intr();
- mtx_lock(&Giant);
(void) trap_pfault(&frame, FALSE, eva);
- mtx_unlock(&Giant);
goto out;
case T_DNA:
@@ -887,7 +883,9 @@ nogo:
frame->tf_eip = (int)PCPU_GET(curpcb)->pcb_onfault;
return (0);
}
+ mtx_lock(&Giant);
trap_fatal(frame, eva);
+ mtx_unlock(&Giant);
return (-1);
}
@@ -1147,14 +1145,17 @@ syscall(frame)
/*
* Try to run the syscall without the MP lock if the syscall
- * is MP safe. We have to obtain the MP lock no matter what if
- * we are ktracing
+ * is MP safe.
*/
if ((callp->sy_narg & SYF_MPSAFE) == 0) {
mtx_lock(&Giant);
}
#ifdef KTRACE
+ /*
+ * We have to obtain the MP lock no matter what if
+ * we are ktracing
+ */
if (KTRPOINT(p, KTR_SYSCALL)) {
if (!mtx_owned(&Giant))
mtx_lock(&Giant);
diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c
index 0c32402..a788448 100644
--- a/sys/kern/sys_pipe.c
+++ b/sys/kern/sys_pipe.c
@@ -56,6 +56,7 @@
#include <sys/filedesc.h>
#include <sys/filio.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <sys/ttycom.h>
#include <sys/stat.h>
#include <sys/poll.h>
@@ -253,6 +254,7 @@ pipespace(cpipe, size)
* kernel_object.
* XXX -- minor change needed here for NetBSD/OpenBSD VM systems.
*/
+ mtx_lock(&vm_mtx);
object = vm_object_allocate(OBJT_DEFAULT, npages);
buffer = (caddr_t) vm_map_min(kernel_map);
@@ -264,6 +266,7 @@ pipespace(cpipe, size)
error = vm_map_find(kernel_map, object, 0,
(vm_offset_t *) &buffer, size, 1,
VM_PROT_ALL, VM_PROT_ALL, 0);
+ mtx_unlock(&vm_mtx);
if (error != KERN_SUCCESS) {
vm_object_deallocate(object);
@@ -551,6 +554,7 @@ pipe_build_write_buffer(wpipe, uio)
size = wpipe->pipe_buffer.size;
endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size);
+ mtx_lock(&vm_mtx);
addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base);
for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) {
vm_page_t m;
@@ -561,6 +565,7 @@ pipe_build_write_buffer(wpipe, uio)
for (j = 0; j < i; j++)
vm_page_unwire(wpipe->pipe_map.ms[j], 1);
+ mtx_unlock(&vm_mtx);
return (EFAULT);
}
@@ -592,6 +597,7 @@ pipe_build_write_buffer(wpipe, uio)
pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms,
wpipe->pipe_map.npages);
+ mtx_unlock(&vm_mtx);
/*
* and update the uio data
*/
@@ -625,8 +631,10 @@ pipe_destroy_write_buffer(wpipe)
amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE;
}
}
+ mtx_lock(&vm_mtx);
for (i = 0; i < wpipe->pipe_map.npages; i++)
vm_page_unwire(wpipe->pipe_map.ms[i], 1);
+ mtx_unlock(&vm_mtx);
}
/*
@@ -1199,12 +1207,13 @@ pipeclose(cpipe)
wakeup(ppipe);
ppipe->pipe_peer = NULL;
}
-
/*
* free resources
*/
+ mtx_lock(&vm_mtx);
pipe_free_kmem(cpipe);
zfree(pipe_zone, cpipe);
+ mtx_unlock(&vm_mtx);
}
}
diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master
index 32255bc..269814c 100644
--- a/sys/kern/syscalls.master
+++ b/sys/kern/syscalls.master
@@ -61,7 +61,7 @@
14 STD POSIX { int mknod(char *path, int mode, int dev); }
15 STD POSIX { int chmod(char *path, int mode); }
16 STD POSIX { int chown(char *path, int uid, int gid); }
-17 STD BSD { int obreak(char *nsize); } break obreak_args int
+17 MPSAFE STD BSD { int obreak(char *nsize); } break obreak_args int
18 STD BSD { int getfsstat(struct statfs *buf, long bufsize, \
int flags); }
19 COMPAT POSIX { long lseek(int fd, long offset, int whence); }
@@ -121,23 +121,23 @@
62 COMPAT POSIX { int fstat(int fd, struct ostat *sb); }
63 COMPAT BSD { int getkerninfo(int op, char *where, size_t *size, \
int arg); } getkerninfo getkerninfo_args int
-64 COMPAT BSD { int getpagesize(void); } \
+64 MPSAFE COMPAT BSD { int getpagesize(void); } \
getpagesize getpagesize_args int
65 STD BSD { int msync(void *addr, size_t len, int flags); }
66 STD BSD { int vfork(void); }
67 OBSOL NOHIDE vread
68 OBSOL NOHIDE vwrite
-69 STD BSD { int sbrk(int incr); }
-70 STD BSD { int sstk(int incr); }
-71 COMPAT BSD { int mmap(void *addr, int len, int prot, \
+69 MPSAFE STD BSD { int sbrk(int incr); }
+70 MPSAFE STD BSD { int sstk(int incr); }
+71 MPSAFE COMPAT BSD { int mmap(void *addr, int len, int prot, \
int flags, int fd, long pos); }
-72 STD BSD { int ovadvise(int anom); } vadvise ovadvise_args int
-73 STD BSD { int munmap(void *addr, size_t len); }
-74 STD BSD { int mprotect(const void *addr, size_t len, int prot); }
-75 STD BSD { int madvise(void *addr, size_t len, int behav); }
+72 MPSAFE STD BSD { int ovadvise(int anom); } vadvise ovadvise_args int
+73 MPSAFE STD BSD { int munmap(void *addr, size_t len); }
+74 MPSAFE STD BSD { int mprotect(const void *addr, size_t len, int prot); }
+75 MPSAFE STD BSD { int madvise(void *addr, size_t len, int behav); }
76 OBSOL NOHIDE vhangup
77 OBSOL NOHIDE vlimit
-78 STD BSD { int mincore(const void *addr, size_t len, \
+78 MPSAFE STD BSD { int mincore(const void *addr, size_t len, \
char *vec); }
79 STD POSIX { int getgroups(u_int gidsetsize, gid_t *gidset); }
80 STD POSIX { int setgroups(u_int gidsetsize, gid_t *gidset); }
@@ -306,7 +306,7 @@
setrlimit __setrlimit_args int
196 STD BSD { int getdirentries(int fd, char *buf, u_int count, \
long *basep); }
-197 STD BSD { caddr_t mmap(caddr_t addr, size_t len, int prot, \
+197 MPSAFE STD BSD { caddr_t mmap(caddr_t addr, size_t len, int prot, \
int flags, int fd, int pad, off_t pos); }
198 STD NOHIDE { int nosys(void); } __syscall __syscall_args int
199 STD POSIX { off_t lseek(int fd, int pad, off_t offset, \
@@ -318,8 +318,8 @@
__sysctl sysctl_args int
; properly, __sysctl should be a NOHIDE, but making an exception
; here allows to avoid one in libc/sys/Makefile.inc.
-203 STD BSD { int mlock(const void *addr, size_t len); }
-204 STD BSD { int munlock(const void *addr, size_t len); }
+203 MPSAFE STD BSD { int mlock(const void *addr, size_t len); }
+204 MPSAFE STD BSD { int munlock(const void *addr, size_t len); }
205 STD BSD { int undelete(char *path); }
206 STD BSD { int futimes(int fd, struct timeval *tptr); }
207 STD BSD { int getpgid(pid_t pid); }
@@ -386,7 +386,7 @@
248 UNIMPL NOHIDE nosys
249 UNIMPL NOHIDE nosys
; syscall numbers initially used in OpenBSD
-250 STD BSD { int minherit(void *addr, size_t len, int inherit); }
+250 MPSAFE STD BSD { int minherit(void *addr, size_t len, int inherit); }
251 STD BSD { int rfork(int flags); }
252 STD BSD { int openbsd_poll(struct pollfd *fds, u_int nfds, \
int timeout); }
@@ -414,7 +414,7 @@
274 STD BSD { int lchmod(char *path, mode_t mode); }
275 NOPROTO BSD { int lchown(char *path, uid_t uid, gid_t gid); } netbsd_lchown lchown_args int
276 STD BSD { int lutimes(char *path, struct timeval *tptr); }
-277 NOPROTO BSD { int msync(void *addr, size_t len, int flags); } netbsd_msync msync_args int
+277 MPSAFE NOPROTO BSD { int msync(void *addr, size_t len, int flags); } netbsd_msync msync_args int
278 STD BSD { int nstat(char *path, struct nstat *ub); }
279 STD BSD { int nfstat(int fd, struct nstat *sb); }
280 STD BSD { int nlstat(char *path, struct nstat *ub); }
@@ -463,8 +463,8 @@
321 STD BSD { int yield(void); }
322 OBSOL NOHIDE thr_sleep
323 OBSOL NOHIDE thr_wakeup
-324 STD BSD { int mlockall(int how); }
-325 STD BSD { int munlockall(void); }
+324 MPSAFE STD BSD { int mlockall(int how); }
+325 MPSAFE STD BSD { int munlockall(void); }
326 STD BSD { int __getcwd(u_char *buf, u_int buflen); }
327 STD POSIX { int sched_setparam (pid_t pid, const struct sched_param *param); }
diff --git a/sys/kern/sysv_shm.c b/sys/kern/sysv_shm.c
index fab53a8..0a9abda 100644
--- a/sys/kern/sysv_shm.c
+++ b/sys/kern/sysv_shm.c
@@ -43,6 +43,7 @@
#include <sys/shm.h>
#include <sys/proc.h>
#include <sys/malloc.h>
+#include <sys/mutex.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/syscall.h>
@@ -314,14 +315,17 @@ shmat(p, uap)
}
shm_handle = shmseg->shm_internal;
+ mtx_lock(&vm_mtx);
vm_object_reference(shm_handle->shm_object);
rv = vm_map_find(&p->p_vmspace->vm_map, shm_handle->shm_object,
0, &attach_va, size, (flags & MAP_FIXED)?0:1, prot, prot, 0);
if (rv != KERN_SUCCESS) {
+ mtx_unlock(&vm_mtx);
return ENOMEM;
}
vm_map_inherit(&p->p_vmspace->vm_map,
attach_va, attach_va + size, VM_INHERIT_SHARE);
+ mtx_unlock(&vm_mtx);
shmmap_s->va = attach_va;
shmmap_s->shmid = uap->shmid;
@@ -549,6 +553,7 @@ shmget_allocate_segment(p, uap, mode)
* We make sure that we have allocated a pager before we need
* to.
*/
+ mtx_lock(&vm_mtx);
if (shm_use_phys) {
shm_handle->shm_object =
vm_pager_allocate(OBJT_PHYS, 0, size, VM_PROT_DEFAULT, 0);
@@ -558,6 +563,7 @@ shmget_allocate_segment(p, uap, mode)
}
vm_object_clear_flag(shm_handle->shm_object, OBJ_ONEMAPPING);
vm_object_set_flag(shm_handle->shm_object, OBJ_NOSPLIT);
+ mtx_unlock(&vm_mtx);
shmseg->shm_internal = shm_handle;
shmseg->shm_perm.cuid = shmseg->shm_perm.uid = cred->cr_uid;
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index c1b53d8..a980330 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -281,6 +281,8 @@ waitrunningbufspace(void)
* Called when a buffer is extended. This function clears the B_CACHE
* bit if the newly extended portion of the buffer does not contain
* valid data.
+ *
+ * must be called with vm_mtx held
*/
static __inline__
void
@@ -426,11 +428,13 @@ bufinit(void)
* from buf_daemon.
*/
+ mtx_lock(&vm_mtx);
bogus_offset = kmem_alloc_pageable(kernel_map, PAGE_SIZE);
bogus_page = vm_page_alloc(kernel_object,
((bogus_offset - VM_MIN_KERNEL_ADDRESS) >> PAGE_SHIFT),
VM_ALLOC_NORMAL);
cnt.v_wire_count++;
+ mtx_unlock(&vm_mtx);
}
@@ -441,17 +445,27 @@ bufinit(void)
* buffer_map.
*
* Since this call frees up buffer space, we call bufspacewakeup().
+ *
+ * Can be called with or without the vm_mtx.
*/
static void
bfreekva(struct buf * bp)
{
+
if (bp->b_kvasize) {
+ int hadvmlock;
+
++buffreekvacnt;
bufspace -= bp->b_kvasize;
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
vm_map_delete(buffer_map,
(vm_offset_t) bp->b_kvabase,
(vm_offset_t) bp->b_kvabase + bp->b_kvasize
);
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
bp->b_kvasize = 0;
bufspacewakeup();
}
@@ -807,6 +821,7 @@ bdwrite(struct buf * bp)
VOP_BMAP(bp->b_vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL, NULL);
}
+ mtx_lock(&vm_mtx);
/*
* Set the *dirty* buffer range based upon the VM system dirty pages.
*/
@@ -820,6 +835,7 @@ bdwrite(struct buf * bp)
* out on the next sync, or perhaps the cluster will be completed.
*/
vfs_clean_pages(bp);
+ mtx_unlock(&vm_mtx);
bqrelse(bp);
/*
@@ -973,12 +989,15 @@ buf_dirty_count_severe(void)
* Release a busy buffer and, if requested, free its resources. The
* buffer will be stashed in the appropriate bufqueue[] allowing it
* to be accessed later as a cache entity or reused for other purposes.
+ *
+ * vm_mtx must be not be held.
*/
void
brelse(struct buf * bp)
{
int s;
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
KASSERT(!(bp->b_flags & (B_CLUSTER|B_PAGING)), ("brelse: inappropriate B_PAGING or B_CLUSTER bp %p", bp));
s = splbio();
@@ -1088,6 +1107,7 @@ brelse(struct buf * bp)
resid = bp->b_bufsize;
foff = bp->b_offset;
+ mtx_lock(&vm_mtx);
for (i = 0; i < bp->b_npages; i++) {
int had_bogus = 0;
@@ -1099,10 +1119,12 @@ brelse(struct buf * bp)
* now.
*/
if (m == bogus_page) {
+ mtx_unlock(&vm_mtx);
VOP_GETVOBJECT(vp, &obj);
poff = OFF_TO_IDX(bp->b_offset);
had_bogus = 1;
+ mtx_lock(&vm_mtx);
for (j = i; j < bp->b_npages; j++) {
vm_page_t mtmp;
mtmp = bp->b_pages[j];
@@ -1136,11 +1158,15 @@ brelse(struct buf * bp)
if (bp->b_flags & (B_INVAL | B_RELBUF))
vfs_vmio_release(bp);
+ mtx_unlock(&vm_mtx);
} else if (bp->b_flags & B_VMIO) {
- if (bp->b_flags & (B_INVAL | B_RELBUF))
+ if (bp->b_flags & (B_INVAL | B_RELBUF)) {
+ mtx_lock(&vm_mtx);
vfs_vmio_release(bp);
+ mtx_unlock(&vm_mtx);
+ }
}
@@ -1302,6 +1328,9 @@ bqrelse(struct buf * bp)
splx(s);
}
+/*
+ * Must be called with vm_mtx held.
+ */
static void
vfs_vmio_release(bp)
struct buf *bp;
@@ -1310,6 +1339,7 @@ vfs_vmio_release(bp)
vm_page_t m;
s = splvm();
+ mtx_assert(&vm_mtx, MA_OWNED);
for (i = 0; i < bp->b_npages; i++) {
m = bp->b_pages[i];
bp->b_pages[i] = NULL;
@@ -1343,6 +1373,9 @@ vfs_vmio_release(bp)
}
splx(s);
pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages);
+
+ /* could drop vm_mtx here */
+
if (bp->b_bufsize) {
bufspacewakeup();
bp->b_bufsize = 0;
@@ -1614,7 +1647,9 @@ restart:
if (qindex == QUEUE_CLEAN) {
if (bp->b_flags & B_VMIO) {
bp->b_flags &= ~B_ASYNC;
+ mtx_lock(&vm_mtx);
vfs_vmio_release(bp);
+ mtx_unlock(&vm_mtx);
}
if (bp->b_vp)
brelvp(bp);
@@ -1735,6 +1770,8 @@ restart:
if (maxsize != bp->b_kvasize) {
vm_offset_t addr = 0;
+ /* we'll hold the lock over some vm ops */
+ mtx_lock(&vm_mtx);
bfreekva(bp);
if (vm_map_findspace(buffer_map,
@@ -1743,6 +1780,7 @@ restart:
* Uh oh. Buffer map is to fragmented. We
* must defragment the map.
*/
+ mtx_unlock(&vm_mtx);
++bufdefragcnt;
defrag = 1;
bp->b_flags |= B_INVAL;
@@ -1759,6 +1797,7 @@ restart:
bufspace += bp->b_kvasize;
++bufreusecnt;
}
+ mtx_unlock(&vm_mtx);
}
bp->b_data = bp->b_kvabase;
}
@@ -1936,18 +1975,24 @@ inmem(struct vnode * vp, daddr_t blkno)
size = vp->v_mount->mnt_stat.f_iosize;
off = (vm_ooffset_t)blkno * (vm_ooffset_t)vp->v_mount->mnt_stat.f_iosize;
+ mtx_lock(&vm_mtx);
for (toff = 0; toff < vp->v_mount->mnt_stat.f_iosize; toff += tinc) {
m = vm_page_lookup(obj, OFF_TO_IDX(off + toff));
if (!m)
- return 0;
+ goto notinmem;
tinc = size;
if (tinc > PAGE_SIZE - ((toff + off) & PAGE_MASK))
tinc = PAGE_SIZE - ((toff + off) & PAGE_MASK);
if (vm_page_is_valid(m,
(vm_offset_t) ((toff + off) & PAGE_MASK), tinc) == 0)
- return 0;
+ goto notinmem;
}
+ mtx_unlock(&vm_mtx);
return 1;
+
+notinmem:
+ mtx_unlock(&vm_mtx);
+ return (0);
}
/*
@@ -1960,11 +2005,14 @@ inmem(struct vnode * vp, daddr_t blkno)
*
* This routine is primarily used by NFS, but is generalized for the
* B_VMIO case.
+ *
+ * Can be called with or without vm_mtx
*/
static void
vfs_setdirty(struct buf *bp)
{
int i;
+ int hadvmlock;
vm_object_t object;
/*
@@ -1983,6 +2031,10 @@ vfs_setdirty(struct buf *bp)
if ((bp->b_flags & B_VMIO) == 0)
return;
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
+
object = bp->b_pages[0]->object;
if ((object->flags & OBJ_WRITEABLE) && !(object->flags & OBJ_MIGHTBEDIRTY))
@@ -2040,6 +2092,8 @@ vfs_setdirty(struct buf *bp)
bp->b_dirtyend = eoffset;
}
}
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
}
/*
@@ -2441,6 +2495,7 @@ allocbuf(struct buf *bp, int size)
* DEV_BSIZE aligned existing buffer size. Figure out
* if we have to remove any pages.
*/
+ mtx_lock(&vm_mtx);
if (desiredpages < bp->b_npages) {
for (i = desiredpages; i < bp->b_npages; i++) {
/*
@@ -2461,6 +2516,7 @@ allocbuf(struct buf *bp, int size)
(desiredpages << PAGE_SHIFT), (bp->b_npages - desiredpages));
bp->b_npages = desiredpages;
}
+ mtx_unlock(&vm_mtx);
} else if (size > bp->b_bcount) {
/*
* We are growing the buffer, possibly in a
@@ -2481,6 +2537,7 @@ allocbuf(struct buf *bp, int size)
vp = bp->b_vp;
VOP_GETVOBJECT(vp, &obj);
+ mtx_lock(&vm_mtx);
while (bp->b_npages < desiredpages) {
vm_page_t m;
vm_pindex_t pi;
@@ -2589,6 +2646,9 @@ allocbuf(struct buf *bp, int size)
bp->b_pages,
bp->b_npages
);
+
+ mtx_unlock(&vm_mtx);
+
bp->b_data = (caddr_t)((vm_offset_t)bp->b_data |
(vm_offset_t)(bp->b_offset & PAGE_MASK));
}
@@ -2726,6 +2786,7 @@ bufdone(struct buf *bp)
if (error) {
panic("biodone: no object");
}
+ mtx_lock(&vm_mtx);
#if defined(VFS_BIO_DEBUG)
if (obj->paging_in_progress < bp->b_npages) {
printf("biodone: paging in progress(%d) < bp->b_npages(%d)\n",
@@ -2814,6 +2875,7 @@ bufdone(struct buf *bp)
}
if (obj)
vm_object_pip_wakeupn(obj, 0);
+ mtx_unlock(&vm_mtx);
}
/*
@@ -2837,12 +2899,15 @@ bufdone(struct buf *bp)
* This routine is called in lieu of iodone in the case of
* incomplete I/O. This keeps the busy status for pages
* consistant.
+ *
+ * vm_mtx should not be held
*/
void
vfs_unbusy_pages(struct buf * bp)
{
int i;
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
runningbufwakeup(bp);
if (bp->b_flags & B_VMIO) {
struct vnode *vp = bp->b_vp;
@@ -2850,6 +2915,7 @@ vfs_unbusy_pages(struct buf * bp)
VOP_GETVOBJECT(vp, &obj);
+ mtx_lock(&vm_mtx);
for (i = 0; i < bp->b_npages; i++) {
vm_page_t m = bp->b_pages[i];
@@ -2866,6 +2932,7 @@ vfs_unbusy_pages(struct buf * bp)
vm_page_io_finish(m);
}
vm_object_pip_wakeupn(obj, 0);
+ mtx_unlock(&vm_mtx);
}
}
@@ -2876,12 +2943,15 @@ vfs_unbusy_pages(struct buf * bp)
* range is restricted to the buffer's size.
*
* This routine is typically called after a read completes.
+ *
+ * vm_mtx should be held
*/
static void
vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, int pageno, vm_page_t m)
{
vm_ooffset_t soff, eoff;
+ mtx_assert(&vm_mtx, MA_OWNED);
/*
* Start and end offsets in buffer. eoff - soff may not cross a
* page boundry or cross the end of the buffer. The end of the
@@ -2917,12 +2987,15 @@ vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, int pageno, vm_page_t m)
* Since I/O has not been initiated yet, certain buffer flags
* such as BIO_ERROR or B_INVAL may be in an inconsistant state
* and should be ignored.
+ *
+ * vm_mtx should not be held
*/
void
vfs_busy_pages(struct buf * bp, int clear_modify)
{
int i, bogus;
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
if (bp->b_flags & B_VMIO) {
struct vnode *vp = bp->b_vp;
vm_object_t obj;
@@ -2932,6 +3005,7 @@ vfs_busy_pages(struct buf * bp, int clear_modify)
foff = bp->b_offset;
KASSERT(bp->b_offset != NOOFFSET,
("vfs_busy_pages: no buffer offset"));
+ mtx_lock(&vm_mtx);
vfs_setdirty(bp);
retry:
@@ -2979,6 +3053,7 @@ retry:
}
if (bogus)
pmap_qenter(trunc_page((vm_offset_t)bp->b_data), bp->b_pages, bp->b_npages);
+ mtx_unlock(&vm_mtx);
}
}
@@ -2989,12 +3064,15 @@ retry:
*
* Note that while we only really need to clean through to b_bcount, we
* just go ahead and clean through to b_bufsize.
+ *
+ * should be called with vm_mtx held
*/
static void
vfs_clean_pages(struct buf * bp)
{
int i;
+ mtx_assert(&vm_mtx, MA_OWNED);
if (bp->b_flags & B_VMIO) {
vm_ooffset_t foff;
@@ -3021,6 +3099,7 @@ vfs_clean_pages(struct buf * bp)
* Set the range within the buffer to valid and clean. The range is
* relative to the beginning of the buffer, b_offset. Note that b_offset
* itself may be offset from the beginning of the first page.
+ *
*/
void
@@ -3061,13 +3140,18 @@ vfs_bio_set_validclean(struct buf *bp, int base, int size)
*
* Note that while we only theoretically need to clear through b_bcount,
* we go ahead and clear through b_bufsize.
+ *
+ * We'll get vm_mtx here for safety if processing a VMIO buffer.
+ * I don't think vm_mtx is needed, but we're twiddling vm_page flags.
*/
void
vfs_bio_clrbuf(struct buf *bp) {
int i, mask = 0;
caddr_t sa, ea;
+
if ((bp->b_flags & (B_VMIO | B_MALLOC)) == B_VMIO) {
+ mtx_lock(&vm_mtx);
bp->b_flags &= ~B_INVAL;
bp->b_ioflags &= ~BIO_ERROR;
if( (bp->b_npages == 1) && (bp->b_bufsize < PAGE_SIZE) &&
@@ -3079,6 +3163,7 @@ vfs_bio_clrbuf(struct buf *bp) {
}
bp->b_pages[0]->valid |= mask;
bp->b_resid = 0;
+ mtx_unlock(&vm_mtx);
return;
}
ea = sa = bp->b_data;
@@ -3106,6 +3191,7 @@ vfs_bio_clrbuf(struct buf *bp) {
vm_page_flag_clear(bp->b_pages[i], PG_ZERO);
}
bp->b_resid = 0;
+ mtx_unlock(&vm_mtx);
} else {
clrbuf(bp);
}
@@ -3115,18 +3201,22 @@ vfs_bio_clrbuf(struct buf *bp) {
* vm_hold_load_pages and vm_hold_unload pages get pages into
* a buffers address space. The pages are anonymous and are
* not associated with a file object.
+ *
+ * vm_mtx should not be held
*/
-void
+static void
vm_hold_load_pages(struct buf * bp, vm_offset_t from, vm_offset_t to)
{
vm_offset_t pg;
vm_page_t p;
int index;
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
to = round_page(to);
from = round_page(from);
index = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT;
+ mtx_lock(&vm_mtx);
for (pg = from; pg < to; pg += PAGE_SIZE, index++) {
tryagain:
@@ -3152,6 +3242,7 @@ tryagain:
vm_page_wakeup(p);
}
bp->b_npages = index;
+ mtx_unlock(&vm_mtx);
}
void
@@ -3160,11 +3251,15 @@ vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to)
vm_offset_t pg;
vm_page_t p;
int index, newnpages;
+ int hadvmlock;
from = round_page(from);
to = round_page(to);
newnpages = index = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT;
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
for (pg = from; pg < to; pg += PAGE_SIZE, index++) {
p = bp->b_pages[index];
if (p && (index < bp->b_npages)) {
@@ -3180,6 +3275,8 @@ vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to)
}
}
bp->b_npages = newnpages;
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
}
diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c
index 8a6e045..0eb47bd 100644
--- a/sys/kern/vfs_cluster.c
+++ b/sys/kern/vfs_cluster.c
@@ -433,6 +433,7 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp)
BUF_KERNPROC(tbp);
TAILQ_INSERT_TAIL(&bp->b_cluster.cluster_head,
tbp, b_cluster.cluster_entry);
+ mtx_lock(&vm_mtx);
for (j = 0; j < tbp->b_npages; j += 1) {
vm_page_t m;
m = tbp->b_pages[j];
@@ -446,10 +447,12 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp)
if ((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL)
tbp->b_pages[j] = bogus_page;
}
+ mtx_unlock(&vm_mtx);
bp->b_bcount += tbp->b_bcount;
bp->b_bufsize += tbp->b_bufsize;
}
+ mtx_lock(&vm_mtx);
for(j=0;j<bp->b_npages;j++) {
if ((bp->b_pages[j]->valid & VM_PAGE_BITS_ALL) ==
VM_PAGE_BITS_ALL)
@@ -462,6 +465,7 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp)
pmap_qenter(trunc_page((vm_offset_t) bp->b_data),
(vm_page_t *)bp->b_pages, bp->b_npages);
+ mtx_unlock(&vm_mtx);
return (bp);
}
@@ -484,7 +488,9 @@ cluster_callback(bp)
if (bp->b_ioflags & BIO_ERROR)
error = bp->b_error;
+ mtx_lock(&vm_mtx);
pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages);
+ mtx_unlock(&vm_mtx);
/*
* Move memory from the large cluster buffer into the component
* buffers and mark IO as done on these.
@@ -851,6 +857,7 @@ cluster_wbuild(vp, size, start_lbn, len)
}
}
+ mtx_lock(&vm_mtx);
for (j = 0; j < tbp->b_npages; j += 1) {
m = tbp->b_pages[j];
vm_page_io_start(m);
@@ -861,6 +868,7 @@ cluster_wbuild(vp, size, start_lbn, len)
bp->b_npages++;
}
}
+ mtx_unlock(&vm_mtx);
}
bp->b_bcount += size;
bp->b_bufsize += size;
@@ -879,8 +887,10 @@ cluster_wbuild(vp, size, start_lbn, len)
tbp, b_cluster.cluster_entry);
}
finishcluster:
+ mtx_lock(&vm_mtx);
pmap_qenter(trunc_page((vm_offset_t) bp->b_data),
(vm_page_t *) bp->b_pages, bp->b_npages);
+ mtx_unlock(&vm_mtx);
if (bp->b_bufsize > bp->b_kvasize)
panic(
"cluster_wbuild: b_bufsize(%ld) > b_kvasize(%d)\n",
diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c
index 328a9b1..d17e934 100644
--- a/sys/kern/vfs_default.c
+++ b/sys/kern/vfs_default.c
@@ -535,14 +535,18 @@ retry:
if (vp->v_type == VREG || vp->v_type == VDIR) {
if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0)
goto retn;
+ mtx_lock(&vm_mtx);
object = vnode_pager_alloc(vp, vat.va_size, 0, 0);
+ mtx_unlock(&vm_mtx);
} else if (devsw(vp->v_rdev) != NULL) {
/*
* This simply allocates the biggest object possible
* for a disk vnode. This should be fixed, but doesn't
* cause any problems (yet).
*/
+ mtx_lock(&vm_mtx);
object = vnode_pager_alloc(vp, IDX_TO_OFF(INT_MAX), 0, 0);
+ mtx_unlock(&vm_mtx);
} else {
goto retn;
}
@@ -550,15 +554,23 @@ retry:
* Dereference the reference we just created. This assumes
* that the object is associated with the vp.
*/
+ mtx_lock(&vm_mtx);
object->ref_count--;
+ mtx_unlock(&vm_mtx);
vp->v_usecount--;
} else {
+ /*
+ * XXX: safe to hold vm mutex through VOP_UNLOCK?
+ */
+ mtx_lock(&vm_mtx);
if (object->flags & OBJ_DEAD) {
VOP_UNLOCK(vp, 0, p);
- tsleep(object, PVM, "vodead", 0);
+ msleep(object, VM_OBJECT_MTX(object), PVM, "vodead", 0);
+ mtx_unlock(&vm_mtx);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
goto retry;
}
+ mtx_unlock(&vm_mtx);
}
KASSERT(vp->v_object != NULL, ("vfs_object_create: NULL object"));
@@ -580,6 +592,7 @@ vop_stddestroyvobject(ap)
if (vp->v_object == NULL)
return (0);
+ mtx_lock(&vm_mtx);
if (obj->ref_count == 0) {
/*
* vclean() may be called twice. The first time
@@ -594,6 +607,7 @@ vop_stddestroyvobject(ap)
*/
vm_pager_deallocate(obj);
}
+ mtx_unlock(&vm_mtx);
return (0);
}
diff --git a/sys/kern/vfs_extattr.c b/sys/kern/vfs_extattr.c
index 6b73258..3f97551 100644
--- a/sys/kern/vfs_extattr.c
+++ b/sys/kern/vfs_extattr.c
@@ -2770,8 +2770,13 @@ fsync(p, uap)
if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
return (error);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
- if (VOP_GETVOBJECT(vp, &obj) == 0)
+ if (VOP_GETVOBJECT(vp, &obj) == 0) {
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
vm_object_page_clean(obj, 0, 0, 0);
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
+ }
error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);
#ifdef SOFTUPDATES
if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 2f4dc8d..6c050ba 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -711,6 +711,8 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
int s, error;
vm_object_t object;
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
+
if (flags & V_SAVE) {
s = splbio();
while (vp->v_numoutput) {
@@ -797,8 +799,10 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
*/
mtx_lock(&vp->v_interlock);
if (VOP_GETVOBJECT(vp, &object) == 0) {
+ mtx_lock(&vm_mtx);
vm_object_page_remove(object, 0, 0,
(flags & V_SAVE) ? TRUE : FALSE);
+ mtx_unlock(&vm_mtx);
}
mtx_unlock(&vp->v_interlock);
@@ -1132,6 +1136,8 @@ speedup_syncer()
* Also sets B_PAGING flag to indicate that vnode is not fully associated
* with the buffer. i.e. the bp has not been linked into the vnode or
* ref-counted.
+ *
+ * Doesn't block, only vnode seems to need a lock.
*/
void
pbgetvp(vp, bp)
@@ -1554,6 +1560,7 @@ vput(vp)
{
struct proc *p = curproc; /* XXX */
+ mtx_assert(&Giant, MA_OWNED);
KASSERT(vp != NULL, ("vput: null vp"));
mtx_lock(&vp->v_interlock);
/* Skip this v_writecount check if we're going to panic below. */
@@ -2382,7 +2389,11 @@ loop:
if (!vget(vp,
LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) {
if (VOP_GETVOBJECT(vp, &obj) == 0) {
- vm_object_page_clean(obj, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : OBJPC_NOSYNC);
+ mtx_lock(&vm_mtx);
+ vm_object_page_clean(obj, 0, 0,
+ flags == MNT_WAIT ?
+ OBJPC_SYNC : OBJPC_NOSYNC);
+ mtx_unlock(&vm_mtx);
anyio = 1;
}
vput(vp);
@@ -2409,6 +2420,8 @@ vfs_object_create(vp, p, cred)
struct proc *p;
struct ucred *cred;
{
+
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
return (VOP_CREATEVOBJECT(vp, cred, p));
}
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index 6b73258..3f97551 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -2770,8 +2770,13 @@ fsync(p, uap)
if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
return (error);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
- if (VOP_GETVOBJECT(vp, &obj) == 0)
+ if (VOP_GETVOBJECT(vp, &obj) == 0) {
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
vm_object_page_clean(obj, 0, 0, 0);
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
+ }
error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);
#ifdef SOFTUPDATES
if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
diff --git a/sys/miscfs/procfs/procfs_map.c b/sys/miscfs/procfs/procfs_map.c
index 5e4a30c..5c21993 100644
--- a/sys/miscfs/procfs/procfs_map.c
+++ b/sys/miscfs/procfs/procfs_map.c
@@ -42,6 +42,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/vnode.h>
diff --git a/sys/miscfs/procfs/procfs_mem.c b/sys/miscfs/procfs/procfs_mem.c
index 3a2f8d2..1e28870 100644
--- a/sys/miscfs/procfs/procfs_mem.c
+++ b/sys/miscfs/procfs/procfs_mem.c
@@ -48,6 +48,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/ptrace.h>
#include <sys/user.h>
@@ -88,8 +89,14 @@ procfs_rwmem(curp, p, uio)
* usage in that process can be messed up.
*/
vm = p->p_vmspace;
- if ((p->p_flag & P_WEXIT) || (vm->vm_refcnt < 1))
+ if ((p->p_flag & P_WEXIT))
return EFAULT;
+
+ mtx_lock(&vm_mtx);
+ if (vm->vm_refcnt < 1) {
+ mtx_unlock(&vm_mtx);
+ return EFAULT;
+ }
++vm->vm_refcnt;
/*
* The map we want...
@@ -207,7 +214,9 @@ procfs_rwmem(curp, p, uio)
/*
* Now do the i/o move.
*/
+ mtx_unlock(&vm_mtx);
error = uiomove((caddr_t)(kva + page_offset), len, uio);
+ mtx_lock(&vm_mtx);
pmap_kremove(kva);
@@ -226,6 +235,7 @@ procfs_rwmem(curp, p, uio)
kmem_free(kernel_map, kva, PAGE_SIZE);
vmspace_free(vm);
+ mtx_unlock(&vm_mtx);
return (error);
}
diff --git a/sys/miscfs/specfs/spec_vnops.c b/sys/miscfs/specfs/spec_vnops.c
index 2940f40..ba81229 100644
--- a/sys/miscfs/specfs/spec_vnops.c
+++ b/sys/miscfs/specfs/spec_vnops.c
@@ -731,6 +731,8 @@ spec_getpages(ap)
cnt.v_vnodein++;
cnt.v_vnodepgsin += pcount;
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
/* Do the input. */
BUF_STRATEGY(bp);
@@ -741,6 +743,8 @@ spec_getpages(ap)
tsleep(bp, PVM, "spread", 0);
splx(s);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
if ((bp->b_ioflags & BIO_ERROR) != 0) {
if (bp->b_error)
diff --git a/sys/miscfs/union/union_subr.c b/sys/miscfs/union/union_subr.c
index 869818f..3ac98bf 100644
--- a/sys/miscfs/union/union_subr.c
+++ b/sys/miscfs/union/union_subr.c
@@ -45,6 +45,7 @@
#include <sys/filedesc.h>
#include <sys/kernel.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <sys/malloc.h>
#include <sys/module.h>
#include <sys/mount.h>
diff --git a/sys/nfs/nfs_bio.c b/sys/nfs/nfs_bio.c
index cb7297f..234815c 100644
--- a/sys/nfs/nfs_bio.c
+++ b/sys/nfs/nfs_bio.c
@@ -124,8 +124,13 @@ nfs_getpages(ap)
}
if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
- (nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
+ (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
(void)nfs_fsinfo(nmp, vp, cred, p);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
+ }
npages = btoc(count);
@@ -168,7 +173,11 @@ nfs_getpages(ap)
uio.uio_rw = UIO_READ;
uio.uio_procp = p;
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
error = nfs_readrpc(vp, &uio, cred);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
pmap_qremove(kva, npages);
relpbuf(bp, &nfs_pbuf_freecnt);
@@ -280,8 +289,13 @@ nfs_putpages(ap)
offset = IDX_TO_OFF(pages[0]->pindex);
if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
- (nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
+ (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
(void)nfs_fsinfo(nmp, vp, cred, p);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
+ }
for (i = 0; i < npages; i++) {
rtvals[i] = VM_PAGER_AGAIN;
@@ -321,7 +335,11 @@ nfs_putpages(ap)
else
iomode = NFSV3WRITE_FILESYNC;
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
error = nfs_writerpc(vp, &uio, cred, &iomode, &must_commit);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
pmap_qremove(kva, npages);
relpbuf(bp, &nfs_pbuf_freecnt);
@@ -332,8 +350,13 @@ nfs_putpages(ap)
rtvals[i] = VM_PAGER_OK;
vm_page_undirty(pages[i]);
}
- if (must_commit)
+ if (must_commit) {
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
nfs_clearcommit(vp->v_mount);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
+ }
}
return rtvals[0];
}
@@ -1076,7 +1099,9 @@ again:
bp->b_dirtyoff = on;
bp->b_dirtyend = on + n;
}
+ mtx_lock(&vm_mtx);
vfs_bio_set_validclean(bp, on, n);
+ mtx_unlock(&vm_mtx);
}
/*
diff --git a/sys/nfs/nfs_common.c b/sys/nfs/nfs_common.c
index c7e6917..18cb8a9 100644
--- a/sys/nfs/nfs_common.c
+++ b/sys/nfs/nfs_common.c
@@ -2139,6 +2139,8 @@ nfs_clearcommit(mp)
int s;
s = splbio();
+ mtx_assert(&Giant, MA_OWNED);
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
loop:
for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
if (vp->v_mount != mp) /* Paranoia */
diff --git a/sys/nfs/nfs_subs.c b/sys/nfs/nfs_subs.c
index c7e6917..18cb8a9 100644
--- a/sys/nfs/nfs_subs.c
+++ b/sys/nfs/nfs_subs.c
@@ -2139,6 +2139,8 @@ nfs_clearcommit(mp)
int s;
s = splbio();
+ mtx_assert(&Giant, MA_OWNED);
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
loop:
for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
if (vp->v_mount != mp) /* Paranoia */
diff --git a/sys/nfsclient/nfs_bio.c b/sys/nfsclient/nfs_bio.c
index cb7297f..234815c 100644
--- a/sys/nfsclient/nfs_bio.c
+++ b/sys/nfsclient/nfs_bio.c
@@ -124,8 +124,13 @@ nfs_getpages(ap)
}
if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
- (nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
+ (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
(void)nfs_fsinfo(nmp, vp, cred, p);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
+ }
npages = btoc(count);
@@ -168,7 +173,11 @@ nfs_getpages(ap)
uio.uio_rw = UIO_READ;
uio.uio_procp = p;
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
error = nfs_readrpc(vp, &uio, cred);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
pmap_qremove(kva, npages);
relpbuf(bp, &nfs_pbuf_freecnt);
@@ -280,8 +289,13 @@ nfs_putpages(ap)
offset = IDX_TO_OFF(pages[0]->pindex);
if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
- (nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
+ (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
(void)nfs_fsinfo(nmp, vp, cred, p);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
+ }
for (i = 0; i < npages; i++) {
rtvals[i] = VM_PAGER_AGAIN;
@@ -321,7 +335,11 @@ nfs_putpages(ap)
else
iomode = NFSV3WRITE_FILESYNC;
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
error = nfs_writerpc(vp, &uio, cred, &iomode, &must_commit);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
pmap_qremove(kva, npages);
relpbuf(bp, &nfs_pbuf_freecnt);
@@ -332,8 +350,13 @@ nfs_putpages(ap)
rtvals[i] = VM_PAGER_OK;
vm_page_undirty(pages[i]);
}
- if (must_commit)
+ if (must_commit) {
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
nfs_clearcommit(vp->v_mount);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
+ }
}
return rtvals[0];
}
@@ -1076,7 +1099,9 @@ again:
bp->b_dirtyoff = on;
bp->b_dirtyend = on + n;
}
+ mtx_lock(&vm_mtx);
vfs_bio_set_validclean(bp, on, n);
+ mtx_unlock(&vm_mtx);
}
/*
diff --git a/sys/nfsclient/nfs_subs.c b/sys/nfsclient/nfs_subs.c
index c7e6917..18cb8a9 100644
--- a/sys/nfsclient/nfs_subs.c
+++ b/sys/nfsclient/nfs_subs.c
@@ -2139,6 +2139,8 @@ nfs_clearcommit(mp)
int s;
s = splbio();
+ mtx_assert(&Giant, MA_OWNED);
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
loop:
for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
if (vp->v_mount != mp) /* Paranoia */
diff --git a/sys/nfsserver/nfs_srvsubs.c b/sys/nfsserver/nfs_srvsubs.c
index c7e6917..18cb8a9 100644
--- a/sys/nfsserver/nfs_srvsubs.c
+++ b/sys/nfsserver/nfs_srvsubs.c
@@ -2139,6 +2139,8 @@ nfs_clearcommit(mp)
int s;
s = splbio();
+ mtx_assert(&Giant, MA_OWNED);
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
loop:
for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
if (vp->v_mount != mp) /* Paranoia */
diff --git a/sys/pci/agp.c b/sys/pci/agp.c
index 6419635..333c4c8 100644
--- a/sys/pci/agp.c
+++ b/sys/pci/agp.c
@@ -38,6 +38,7 @@
#include <sys/ioccom.h>
#include <sys/agpio.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <sys/proc.h>
#include <pci/pcivar.h>
diff --git a/sys/pci/agp_ali.c b/sys/pci/agp_ali.c
index 86e070e..aa805e1 100644
--- a/sys/pci/agp_ali.c
+++ b/sys/pci/agp_ali.c
@@ -35,6 +35,7 @@
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <pci/pcivar.h>
#include <pci/pcireg.h>
diff --git a/sys/pci/agp_amd.c b/sys/pci/agp_amd.c
index 0a498f7..4aaf4e9 100644
--- a/sys/pci/agp_amd.c
+++ b/sys/pci/agp_amd.c
@@ -35,6 +35,7 @@
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <pci/pcivar.h>
#include <pci/pcireg.h>
diff --git a/sys/pci/agp_i810.c b/sys/pci/agp_i810.c
index 79fd566..5c40493 100644
--- a/sys/pci/agp_i810.c
+++ b/sys/pci/agp_i810.c
@@ -36,6 +36,7 @@
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <pci/pcivar.h>
#include <pci/pcireg.h>
diff --git a/sys/pci/agp_intel.c b/sys/pci/agp_intel.c
index a4b9a43..dc1ef4d 100644
--- a/sys/pci/agp_intel.c
+++ b/sys/pci/agp_intel.c
@@ -35,6 +35,7 @@
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <pci/pcivar.h>
#include <pci/pcireg.h>
diff --git a/sys/pci/agp_sis.c b/sys/pci/agp_sis.c
index 1f1a50b..a6a20a4 100644
--- a/sys/pci/agp_sis.c
+++ b/sys/pci/agp_sis.c
@@ -35,6 +35,7 @@
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <pci/pcivar.h>
#include <pci/pcireg.h>
diff --git a/sys/pci/agp_via.c b/sys/pci/agp_via.c
index 983348e..086b027 100644
--- a/sys/pci/agp_via.c
+++ b/sys/pci/agp_via.c
@@ -35,6 +35,7 @@
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <pci/pcivar.h>
#include <pci/pcireg.h>
diff --git a/sys/ufs/ufs/ufs_readwrite.c b/sys/ufs/ufs/ufs_readwrite.c
index db9e239..672d0a0 100644
--- a/sys/ufs/ufs/ufs_readwrite.c
+++ b/sys/ufs/ufs/ufs_readwrite.c
@@ -114,8 +114,11 @@ READ(ap)
return 0;
}
- if (object)
+ if (object) {
+ mtx_lock(&vm_mtx);
vm_object_reference(object);
+ mtx_unlock(&vm_mtx);
+ }
#ifdef ENABLE_VFS_IOOPT
/*
@@ -147,8 +150,11 @@ READ(ap)
(vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
ip->i_flag |= IN_ACCESS;
- if (object)
+ if (object) {
+ mtx_lock(&vm_mtx);
vm_object_vndeallocate(object);
+ mtx_unlock(&vm_mtx);
+ }
return error;
}
}
@@ -192,8 +198,11 @@ READ(ap)
(vp->v_mount->mnt_flag &
MNT_NOATIME) == 0)
ip->i_flag |= IN_ACCESS;
- if (object)
+ if (object) {
+ mtx_lock(&vm_mtx);
vm_object_vndeallocate(object);
+ mtx_unlock(&vm_mtx);
+ }
return error;
}
/*
@@ -355,8 +364,11 @@ READ(ap)
}
}
- if (object)
+ if (object) {
+ mtx_lock(&vm_mtx);
vm_object_vndeallocate(object);
+ mtx_unlock(&vm_mtx);
+ }
if ((error == 0 || uio->uio_resid != orig_resid) &&
(vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
ip->i_flag |= IN_ACCESS;
@@ -395,8 +407,11 @@ WRITE(ap)
ip = VTOI(vp);
object = vp->v_object;
- if (object)
+ if (object) {
+ mtx_lock(&vm_mtx);
vm_object_reference(object);
+ mtx_unlock(&vm_mtx);
+ }
#ifdef DIAGNOSTIC
if (uio->uio_rw != UIO_WRITE)
@@ -408,8 +423,11 @@ WRITE(ap)
if (ioflag & IO_APPEND)
uio->uio_offset = ip->i_size;
if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) {
- if (object)
+ if (object) {
+ mtx_lock(&vm_mtx);
vm_object_vndeallocate(object);
+ mtx_unlock(&vm_mtx);
+ }
return (EPERM);
}
/* FALLTHROUGH */
@@ -428,8 +446,11 @@ WRITE(ap)
fs = ip->I_FS;
if (uio->uio_offset < 0 ||
(u_int64_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize) {
- if (object)
+ if (object) {
+ mtx_lock(&vm_mtx);
vm_object_vndeallocate(object);
+ mtx_unlock(&vm_mtx);
+ }
return (EFBIG);
}
/*
@@ -443,8 +464,11 @@ WRITE(ap)
PROC_LOCK(p);
psignal(p, SIGXFSZ);
PROC_UNLOCK(p);
- if (object)
+ if (object) {
+ mtx_lock(&vm_mtx);
vm_object_vndeallocate(object);
+ mtx_unlock(&vm_mtx);
+ }
return (EFBIG);
}
@@ -455,9 +479,11 @@ WRITE(ap)
flags = B_SYNC;
if (object && (object->flags & OBJ_OPT)) {
+ mtx_lock(&vm_mtx);
vm_freeze_copyopts(object,
OFF_TO_IDX(uio->uio_offset),
OFF_TO_IDX(uio->uio_offset + uio->uio_resid + PAGE_MASK));
+ mtx_unlock(&vm_mtx);
}
for (error = 0; uio->uio_resid > 0;) {
@@ -546,8 +572,11 @@ WRITE(ap)
} else if (resid > uio->uio_resid && (ioflag & IO_SYNC))
error = UFS_UPDATE(vp, 1);
- if (object)
+ if (object) {
+ mtx_lock(&vm_mtx);
vm_object_vndeallocate(object);
+ mtx_unlock(&vm_mtx);
+ }
return (error);
}
diff --git a/sys/vm/default_pager.c b/sys/vm/default_pager.c
index f5d88a5..0fb4896 100644
--- a/sys/vm/default_pager.c
+++ b/sys/vm/default_pager.c
@@ -41,6 +41,8 @@
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
#include <vm/vm.h>
#include <vm/vm_object.h>
diff --git a/sys/vm/phys_pager.c b/sys/vm/phys_pager.c
index 1f00ea0..d34672b 100644
--- a/sys/vm/phys_pager.c
+++ b/sys/vm/phys_pager.c
@@ -34,7 +34,6 @@
#include <sys/mutex.h>
#include <sys/mman.h>
#include <sys/sysctl.h>
-#include <sys/sx.h>
#include <vm/vm.h>
#include <vm/vm_object.h>
@@ -43,7 +42,7 @@
#include <vm/vm_zone.h>
/* prevent concurrant creation races */
-static struct sx phys_pager_sx;
+static int phys_pager_alloc_lock;
/* list of device pager objects */
static struct pagerlst phys_pager_object_list;
/* protect access to phys_pager_object_list */
@@ -54,7 +53,6 @@ phys_pager_init(void)
{
TAILQ_INIT(&phys_pager_object_list);
- sx_init(&phys_pager_sx, "phys_pager create");
mtx_init(&phys_pager_mtx, "phys_pager list", MTX_DEF);
}
@@ -76,8 +74,11 @@ phys_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
/*
* Lock to prevent object creation race condition.
*/
- sx_xlock(&phys_pager_sx);
-
+ while (phys_pager_alloc_lock) {
+ phys_pager_alloc_lock = -1;
+ msleep(&phys_pager_alloc_lock, &vm_mtx, PVM, "swpalc", 0);
+ }
+
/*
* Look up pager, creating as necessary.
*/
@@ -101,7 +102,10 @@ phys_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
if (OFF_TO_IDX(foff + size) > object->size)
object->size = OFF_TO_IDX(foff + size);
}
- sx_xunlock(&phys_pager_sx);
+ if (phys_pager_alloc_lock)
+ wakeup(&phys_pager_alloc_lock);
+ phys_pager_alloc_lock = 0;
+
} else {
object = vm_object_allocate(OBJT_PHYS,
OFF_TO_IDX(foff + size));
diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
index 8d343f4..44f4465 100644
--- a/sys/vm/swap_pager.c
+++ b/sys/vm/swap_pager.c
@@ -80,7 +80,6 @@
#include <sys/sysctl.h>
#include <sys/blist.h>
#include <sys/lock.h>
-#include <sys/sx.h>
#include <sys/vmmeter.h>
#ifndef MAX_PAGEOUT_CLUSTER
@@ -119,6 +118,7 @@ static int nsw_wcount_sync; /* limit write buffers / synchronous */
static int nsw_wcount_async; /* limit write buffers / asynchronous */
static int nsw_wcount_async_max;/* assigned maximum */
static int nsw_cluster_max; /* maximum VOP I/O allowed */
+static int sw_alloc_interlock; /* swap pager allocation interlock */
struct blist *swapblist;
static struct swblock **swhash;
@@ -145,7 +145,6 @@ SYSCTL_INT(_vm, OID_AUTO, swap_async_max,
#define NOBJLIST(handle) \
(&swap_pager_object_list[((int)(intptr_t)handle >> 4) & (NOBJLISTS-1)])
-static struct sx sw_alloc_sx; /* prevent concurrant creation */
static struct mtx sw_alloc_mtx; /* protect list manipulation */
static struct pagerlst swap_pager_object_list[NOBJLISTS];
struct pagerlst swap_pager_un_object_list;
@@ -233,6 +232,8 @@ static daddr_t swp_pager_meta_ctl __P((vm_object_t, vm_pindex_t, int));
static __inline void
swp_sizecheck()
{
+
+ mtx_assert(&vm_mtx, MA_OWNED);
if (vm_swap_size < nswap_lowat) {
if (swap_pager_almost_full == 0) {
printf("swap_pager: out of swap space\n");
@@ -264,7 +265,6 @@ swap_pager_init()
for (i = 0; i < NOBJLISTS; ++i)
TAILQ_INIT(&swap_pager_object_list[i]);
TAILQ_INIT(&swap_pager_un_object_list);
- sx_init(&sw_alloc_sx, "swap_pager create");
mtx_init(&sw_alloc_mtx, "swap_pager list", MTX_DEF);
/*
@@ -389,7 +389,10 @@ swap_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
* of the handle.
*/
- sx_xlock(&sw_alloc_sx);
+ while (sw_alloc_interlock) {
+ sw_alloc_interlock = -1;
+ msleep(&sw_alloc_interlock, &vm_mtx, PVM, "swpalc", 0);
+ }
object = vm_pager_object_lookup(NOBJLIST(handle), handle);
@@ -403,7 +406,9 @@ swap_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
swp_pager_meta_build(object, 0, SWAPBLK_NONE);
}
- sx_xunlock(&sw_alloc_sx);
+ if (sw_alloc_interlock < 0)
+ wakeup(&sw_alloc_interlock);
+ sw_alloc_interlock = 0;
} else {
object = vm_object_allocate(OBJT_DEFAULT,
OFF_TO_IDX(offset + PAGE_MASK + size));
@@ -478,6 +483,7 @@ swap_pager_dealloc(object)
*
* This routine may not block
* This routine must be called at splvm().
+ * vm_mtx should be held
*/
static __inline daddr_t
@@ -486,6 +492,7 @@ swp_pager_getswapspace(npages)
{
daddr_t blk;
+ mtx_assert(&vm_mtx, MA_OWNED);
if ((blk = blist_alloc(swapblist, npages)) == SWAPBLK_NONE) {
if (swap_pager_full != 2) {
printf("swap_pager_getswapspace: failed\n");
@@ -514,6 +521,7 @@ swp_pager_getswapspace(npages)
*
* This routine may not block
* This routine must be called at splvm().
+ * vm_mtx should be held
*/
static __inline void
@@ -521,6 +529,8 @@ swp_pager_freeswapspace(blk, npages)
daddr_t blk;
int npages;
{
+
+ mtx_assert(&vm_mtx, MA_OWNED);
blist_free(swapblist, blk, npages);
vm_swap_size += npages;
/* per-swap area stats */
@@ -551,6 +561,9 @@ swap_pager_freespace(object, start, size)
vm_size_t size;
{
int s = splvm();
+
+ mtx_assert(&vm_mtx, MA_OWNED);
+
swp_pager_meta_free(object, start, size);
splx(s);
}
@@ -635,6 +648,8 @@ swap_pager_copy(srcobject, dstobject, offset, destroysource)
s = splvm();
+ mtx_assert(&vm_mtx, MA_OWNED);
+
/*
* If destroysource is set, we remove the source object from the
* swap_pager internal queue now.
@@ -881,7 +896,9 @@ swap_pager_strategy(vm_object_t object, struct bio *bp)
* FREE PAGE(s) - destroy underlying swap that is no longer
* needed.
*/
+ mtx_lock(&vm_mtx);
swp_pager_meta_free(object, start, count);
+ mtx_unlock(&vm_mtx);
splx(s);
bp->bio_resid = 0;
biodone(bp);
@@ -892,6 +909,7 @@ swap_pager_strategy(vm_object_t object, struct bio *bp)
* Execute read or write
*/
+ mtx_lock(&vm_mtx);
while (count > 0) {
daddr_t blk;
@@ -954,7 +972,9 @@ swap_pager_strategy(vm_object_t object, struct bio *bp)
bp->bio_resid -= PAGE_SIZE;
} else {
if (nbp == NULL) {
+ mtx_unlock(&vm_mtx);
nbp = getchainbuf(bp, swapdev_vp, B_ASYNC);
+ mtx_lock(&vm_mtx);
nbp->b_blkno = blk;
nbp->b_bcount = 0;
nbp->b_data = data;
@@ -985,6 +1005,7 @@ swap_pager_strategy(vm_object_t object, struct bio *bp)
/* nbp = NULL; */
}
+ mtx_unlock(&vm_mtx);
/*
* Wait for completion.
*/
@@ -1281,6 +1302,7 @@ swap_pager_putpages(object, m, count, sync, rtvals)
* at this time.
*/
s = splvm();
+ mtx_unlock(&vm_mtx);
mtx_lock(&pbuf_mtx);
n -= nsw_wcount_async_max;
if (nsw_wcount_async + n >= 0) {
@@ -1289,6 +1311,7 @@ swap_pager_putpages(object, m, count, sync, rtvals)
wakeup(&nsw_wcount_async);
}
mtx_unlock(&pbuf_mtx);
+ mtx_lock(&vm_mtx);
splx(s);
}
@@ -1399,6 +1422,8 @@ swap_pager_putpages(object, m, count, sync, rtvals)
swapdev_vp->v_numoutput++;
splx(s);
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
/*
* asynchronous
@@ -1410,9 +1435,12 @@ swap_pager_putpages(object, m, count, sync, rtvals)
bp->b_iodone = swp_pager_async_iodone;
BUF_KERNPROC(bp);
BUF_STRATEGY(bp);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
for (j = 0; j < n; ++j)
rtvals[i+j] = VM_PAGER_PEND;
+ /* restart outter loop */
continue;
}
@@ -1445,6 +1473,8 @@ swap_pager_putpages(object, m, count, sync, rtvals)
* normal async completion, which frees everything up.
*/
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
swp_pager_async_iodone(bp);
splx(s);
@@ -1732,7 +1762,8 @@ swp_pager_hash(vm_object_t object, vm_pindex_t index)
*
* This routine must be called at splvm(), except when used to convert
* an OBJT_DEFAULT object into an OBJT_SWAP object.
-
+ *
+ * Requires vm_mtx.
*/
static void
@@ -1744,6 +1775,7 @@ swp_pager_meta_build(
struct swblock *swap;
struct swblock **pswap;
+ mtx_assert(&vm_mtx, MA_OWNED);
/*
* Convert default object to swap object if necessary
*/
@@ -1830,12 +1862,16 @@ retry:
* out. This routine does *NOT* operate on swap metadata associated
* with resident pages.
*
+ * mv_mtx must be held
* This routine must be called at splvm()
*/
static void
swp_pager_meta_free(vm_object_t object, vm_pindex_t index, daddr_t count)
{
+
+ mtx_assert(&vm_mtx, MA_OWNED);
+
if (object->type != OBJT_SWAP)
return;
@@ -1875,6 +1911,7 @@ swp_pager_meta_free(vm_object_t object, vm_pindex_t index, daddr_t count)
* an object.
*
* This routine must be called at splvm()
+ * Requires vm_mtx.
*/
static void
@@ -1882,6 +1919,8 @@ swp_pager_meta_free_all(vm_object_t object)
{
daddr_t index = 0;
+ mtx_assert(&vm_mtx, MA_OWNED);
+
if (object->type != OBJT_SWAP)
return;
@@ -1930,6 +1969,7 @@ swp_pager_meta_free_all(vm_object_t object)
* busy page.
*
* This routine must be called at splvm().
+ * Requires vm_mtx.
*
* SWM_FREE remove and free swap block from metadata
* SWM_POP remove from meta data but do not free.. pop it out
@@ -2032,18 +2072,24 @@ vm_pager_chain_iodone(struct buf *nbp)
* Obtain a physical buffer and chain it to its parent buffer. When
* I/O completes, the parent buffer will be B_SIGNAL'd. Errors are
* automatically propagated to the parent
+ *
+ * vm_mtx can't be held
*/
struct buf *
getchainbuf(struct bio *bp, struct vnode *vp, int flags)
{
- struct buf *nbp = getpbuf(NULL);
- u_int *count = (u_int *)&(bp->bio_caller1);
+ struct buf *nbp;
+ u_int *count;
+
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
+ nbp = getpbuf(NULL);
+ count = (u_int *)&(bp->bio_caller1);
nbp->b_caller1 = bp;
++(*count);
- if (*count > 4)
+ if (*count > 4)
waitchainbuf(bp, 4, 0);
nbp->b_iocmd = bp->bio_cmd;
@@ -2063,6 +2109,9 @@ getchainbuf(struct bio *bp, struct vnode *vp, int flags)
void
flushchainbuf(struct buf *nbp)
{
+
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
if (nbp->b_bcount) {
nbp->b_bufsize = nbp->b_bcount;
if (nbp->b_iocmd == BIO_WRITE)
@@ -2072,14 +2121,19 @@ flushchainbuf(struct buf *nbp)
} else {
bufdone(nbp);
}
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
}
-void
+static void
waitchainbuf(struct bio *bp, int limit, int done)
{
int s;
- u_int *count = (u_int *)&(bp->bio_caller1);
+ u_int *count;
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
+ mtx_lock(&Giant);
+ count = (u_int *)&(bp->bio_caller1);
s = splbio();
while (*count > limit) {
bp->bio_flags |= BIO_FLAG1;
@@ -2092,6 +2146,7 @@ waitchainbuf(struct bio *bp, int limit, int done)
}
biodone(bp);
}
+ mtx_unlock(&Giant);
splx(s);
}
diff --git a/sys/vm/vm.h b/sys/vm/vm.h
index 38f04ac..5915b29 100644
--- a/sys/vm/vm.h
+++ b/sys/vm/vm.h
@@ -95,6 +95,10 @@ typedef struct vm_map *vm_map_t;
struct vm_object;
typedef struct vm_object *vm_object_t;
+#ifdef _KERNEL
+extern struct mtx vm_mtx;
+#endif
+
#ifndef _KERNEL
/*
* This is defined in <sys/types.h> for the kernel so that non-vm kernel
diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
index a1bad69..f31f12b 100644
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
@@ -81,6 +81,8 @@
#include <sys/vnode.h>
#include <sys/resourcevar.h>
#include <sys/vmmeter.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
@@ -134,6 +136,8 @@ unlock_map(struct faultstate *fs)
static void
_unlock_things(struct faultstate *fs, int dealloc)
{
+
+ mtx_assert(&vm_mtx, MA_OWNED);
vm_object_pip_wakeup(fs->object);
if (fs->object != fs->first_object) {
vm_page_free(fs->first_m);
@@ -145,8 +149,15 @@ _unlock_things(struct faultstate *fs, int dealloc)
}
unlock_map(fs);
if (fs->vp != NULL) {
- vput(fs->vp);
+ struct vnode *vp;
+
+ vp = fs->vp;
fs->vp = NULL;
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
+ vput(vp);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
}
}
@@ -179,10 +190,41 @@ _unlock_things(struct faultstate *fs, int dealloc)
*
*
* The map in question must be referenced, and remains so.
- * Caller may hold no locks.
+ * Caller may hold no locks except the vm_mtx which will be
+ * locked if needed.
*/
+static int vm_fault1 __P((vm_map_t, vm_offset_t, vm_prot_t, int));
+
+static int vm_faults_no_vm_mtx;
+SYSCTL_INT(_vm, OID_AUTO, vm_faults_no_vm_mtx, CTLFLAG_RW,
+ &vm_faults_no_vm_mtx, 0, "");
+
+static int vm_faults_no_giant;
+SYSCTL_INT(_vm, OID_AUTO, vm_faults_no_giant, CTLFLAG_RW,
+ &vm_faults_no_giant, 0, "");
+
int
-vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, int fault_flags)
+vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
+ int fault_flags)
+{
+ int hadvmlock, ret;
+
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock) {
+ mtx_lock(&vm_mtx);
+ vm_faults_no_vm_mtx++;
+ if (!mtx_owned(&Giant))
+ vm_faults_no_giant++;
+ }
+ ret = vm_fault1(map, vaddr, fault_type, fault_flags);
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
+ return (ret);
+}
+
+static int
+vm_fault1(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
+ int fault_flags)
{
vm_prot_t prot;
int result;
@@ -194,7 +236,8 @@ vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, int fault_flags)
int faultcount;
struct faultstate fs;
- cnt.v_vm_faults++; /* needs lock XXX */
+ mtx_assert(&vm_mtx, MA_OWNED);
+ cnt.v_vm_faults++;
hardfault = 0;
RetryFault:;
@@ -251,7 +294,11 @@ RetryFault:;
vm_object_reference(fs.first_object);
vm_object_pip_add(fs.first_object, 1);
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
fs.vp = vnode_pager_lock(fs.first_object);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
if ((fault_type & VM_PROT_WRITE) &&
(fs.first_object->type == OBJT_VNODE)) {
vm_freeze_copyopts(fs.first_object,
@@ -723,7 +770,11 @@ readrest:
*/
if (fs.vp != NULL) {
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
vput(fs.vp);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
fs.vp = NULL;
}
@@ -940,6 +991,7 @@ vm_fault_user_wire(map, start, end)
register pmap_t pmap;
int rv;
+ mtx_assert(&vm_mtx, MA_OWNED);
pmap = vm_map_pmap(map);
/*
@@ -1112,6 +1164,9 @@ vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry)
*
* Return value:
* number of pages in marray
+ *
+ * This routine can't block.
+ * vm_mtx must be held.
*/
static int
vm_fault_additional_pages(m, rbehind, rahead, marray, reqpage)
@@ -1127,6 +1182,8 @@ vm_fault_additional_pages(m, rbehind, rahead, marray, reqpage)
vm_page_t rtm;
int cbehind, cahead;
+ mtx_assert(&vm_mtx, MA_OWNED);
+
object = m->object;
pindex = m->pindex;
diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c
index a180ae3..37c580a 100644
--- a/sys/vm/vm_glue.c
+++ b/sys/vm/vm_glue.c
@@ -161,6 +161,7 @@ useracc(addr, len, rw)
|| (vm_offset_t) addr + len < (vm_offset_t) addr) {
return (FALSE);
}
+ mtx_lock(&vm_mtx);
map = &curproc->p_vmspace->vm_map;
vm_map_lock_read(map);
/*
@@ -172,6 +173,7 @@ useracc(addr, len, rw)
trunc_page((vm_offset_t)addr), round_page((vm_offset_t)addr + len), prot);
map->hint = save_hint;
vm_map_unlock_read(map);
+ mtx_unlock(&vm_mtx);
return (rv == TRUE);
}
@@ -181,8 +183,12 @@ vslock(addr, len)
caddr_t addr;
u_int len;
{
- vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page((vm_offset_t)addr),
+
+ mtx_lock(&vm_mtx);
+ vm_map_pageable(&curproc->p_vmspace->vm_map,
+ trunc_page((vm_offset_t)addr),
round_page((vm_offset_t)addr + len), FALSE);
+ mtx_unlock(&vm_mtx);
}
void
@@ -190,8 +196,12 @@ vsunlock(addr, len)
caddr_t addr;
u_int len;
{
- vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page((vm_offset_t)addr),
+
+ mtx_lock(&vm_mtx);
+ vm_map_pageable(&curproc->p_vmspace->vm_map,
+ trunc_page((vm_offset_t)addr),
round_page((vm_offset_t)addr + len), TRUE);
+ mtx_unlock(&vm_mtx);
}
/*
@@ -201,6 +211,8 @@ vsunlock(addr, len)
* machine-dependent layer to fill those in and make the new process
* ready to run. The new process is set up so that it returns directly
* to user mode to avoid stack copying and relocation problems.
+ *
+ * Called without vm_mtx.
*/
void
vm_fork(p1, p2, flags)
@@ -209,6 +221,7 @@ vm_fork(p1, p2, flags)
{
register struct user *up;
+ mtx_lock(&vm_mtx);
if ((flags & RFPROC) == 0) {
/*
* Divorce the memory, if it is shared, essentially
@@ -221,6 +234,7 @@ vm_fork(p1, p2, flags)
}
}
cpu_fork(p1, p2, flags);
+ mtx_unlock(&vm_mtx);
return;
}
@@ -275,6 +289,7 @@ vm_fork(p1, p2, flags)
* and make the child ready to run.
*/
cpu_fork(p1, p2, flags);
+ mtx_unlock(&vm_mtx);
}
/*
@@ -360,10 +375,13 @@ scheduler(dummy)
mtx_assert(&Giant, MA_OWNED | MA_NOTRECURSED);
loop:
+ mtx_lock(&vm_mtx);
if (vm_page_count_min()) {
VM_WAIT;
+ mtx_unlock(&vm_mtx);
goto loop;
}
+ mtx_unlock(&vm_mtx);
mtx_unlock(&Giant);
pp = NULL;
@@ -442,6 +460,9 @@ SYSCTL_INT(_vm, OID_AUTO, swap_idle_threshold2,
* If any procs have been sleeping/stopped for at least maxslp seconds,
* they are swapped. Else, we swap the longest-sleeping or stopped process,
* if any, otherwise the longest-resident process.
+ *
+ * Can block
+ * must be called with vm_mtx
*/
void
swapout_procs(action)
@@ -452,6 +473,8 @@ int action;
int outpri, outpri2;
int didswap = 0;
+ mtx_assert(&vm_mtx, MA_OWNED);
+ mtx_unlock(&vm_mtx);
outp = outp2 = NULL;
outpri = outpri2 = INT_MIN;
sx_slock(&allproc_lock);
@@ -465,6 +488,11 @@ retry:
PROC_UNLOCK(p);
continue;
}
+ /*
+ * only aiod changes vmspace, however it will be
+ * skipped because of the if statement above checking
+ * for P_SYSTEM
+ */
vm = p->p_vmspace;
mtx_lock_spin(&sched_lock);
if ((p->p_sflag & (PS_INMEM|PS_SWAPPING)) != PS_INMEM) {
@@ -516,6 +544,7 @@ retry:
}
mtx_unlock_spin(&sched_lock);
+ mtx_lock(&vm_mtx);
#if 0
/*
* XXX: This is broken. We release the lock we
@@ -531,7 +560,7 @@ retry:
*/
if (lockmgr(&vm->vm_map.lock,
LK_EXCLUSIVE | LK_NOWAIT,
- (void *)0, curproc)) {
+ NULL, curproc)) {
vmspace_free(vm);
PROC_UNLOCK(p);
continue;
@@ -548,8 +577,10 @@ retry:
swapout(p);
vmspace_free(vm);
didswap++;
+ mtx_unlock(&vm_mtx);
goto retry;
}
+ mtx_unlock(&vm_mtx);
PROC_UNLOCK(p);
}
}
@@ -558,6 +589,7 @@ retry:
* If we swapped something out, and another process needed memory,
* then wakeup the sched process.
*/
+ mtx_lock(&vm_mtx);
if (didswap)
wakeup(&proc0);
}
diff --git a/sys/vm/vm_init.c b/sys/vm/vm_init.c
index ae336e1..35e4676 100644
--- a/sys/vm/vm_init.c
+++ b/sys/vm/vm_init.c
@@ -73,6 +73,7 @@
#include <sys/lock.h>
#include <sys/proc.h>
#include <sys/systm.h>
+#include <sys/mutex.h>
#include <vm/vm.h>
#include <vm/vm_object.h>
@@ -96,16 +97,20 @@ SYSINIT(vm_mem, SI_SUB_VM, SI_ORDER_FIRST, vm_mem_init, NULL)
* The start and end address of physical memory is passed in.
*/
+struct mtx vm_mtx;
+
/* ARGSUSED*/
static void
vm_mem_init(dummy)
void *dummy;
{
+
/*
* Initializes resident memory structures. From here on, all physical
* memory is accounted for, and we use only virtual addresses.
*/
-
+ mtx_init(&vm_mtx, "vm", MTX_DEF);
+ mtx_lock(&vm_mtx);
vm_set_page_size();
virtual_avail = vm_page_startup(avail_start, avail_end, virtual_avail);
@@ -118,4 +123,5 @@ vm_mem_init(dummy)
kmem_init(virtual_avail, virtual_end);
pmap_init(avail_start, avail_end);
vm_pager_init();
+ mtx_unlock(&vm_mtx);
}
diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c
index 14e4867..08ee486 100644
--- a/sys/vm/vm_kern.c
+++ b/sys/vm/vm_kern.c
@@ -106,11 +106,17 @@ kmem_alloc_pageable(map, size)
{
vm_offset_t addr;
int result;
+ int hadvmlock;
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
size = round_page(size);
addr = vm_map_min(map);
result = vm_map_find(map, NULL, (vm_offset_t) 0,
&addr, size, TRUE, VM_PROT_ALL, VM_PROT_ALL, 0);
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
if (result != KERN_SUCCESS) {
return (0);
}
@@ -131,10 +137,17 @@ kmem_alloc_nofault(map, size)
vm_offset_t addr;
int result;
+ int hadvmlock;
+
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
size = round_page(size);
addr = vm_map_min(map);
result = vm_map_find(map, NULL, (vm_offset_t) 0,
&addr, size, TRUE, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
if (result != KERN_SUCCESS) {
return (0);
}
@@ -153,8 +166,11 @@ kmem_alloc(map, size)
vm_offset_t addr;
vm_offset_t offset;
vm_offset_t i;
+ int hadvmlock;
- mtx_assert(&Giant, MA_OWNED);
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
size = round_page(size);
/*
@@ -170,6 +186,8 @@ kmem_alloc(map, size)
vm_map_lock(map);
if (vm_map_findspace(map, vm_map_min(map), size, &addr)) {
vm_map_unlock(map);
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
return (0);
}
offset = addr - VM_MIN_KERNEL_ADDRESS;
@@ -214,6 +232,8 @@ kmem_alloc(map, size)
(void) vm_map_pageable(map, (vm_offset_t) addr, addr + size, FALSE);
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
return (addr);
}
@@ -232,9 +252,16 @@ kmem_free(map, addr, size)
vm_offset_t addr;
vm_size_t size;
{
+ int hadvmlock;
+
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
- mtx_assert(&Giant, MA_OWNED);
(void) vm_map_remove(map, trunc_page(addr), round_page(addr + size));
+
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
}
/*
@@ -257,6 +284,11 @@ kmem_suballoc(parent, min, max, size)
{
int ret;
vm_map_t result;
+ int hadvmlock;
+
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
size = round_page(size);
@@ -274,6 +306,8 @@ kmem_suballoc(parent, min, max, size)
panic("kmem_suballoc: cannot create submap");
if (vm_map_submap(parent, *min, *max, result) != KERN_SUCCESS)
panic("kmem_suballoc: unable to change range to submap");
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
return (result);
}
@@ -308,10 +342,15 @@ kmem_malloc(map, size, flags)
vm_map_entry_t entry;
vm_offset_t addr;
vm_page_t m;
+ int hadvmlock;
if (map != kmem_map && map != mb_map)
panic("kmem_malloc: map != {kmem,mb}_map");
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
+
size = round_page(size);
addr = vm_map_min(map);
@@ -326,12 +365,12 @@ kmem_malloc(map, size, flags)
if (map == mb_map) {
mb_map_full = TRUE;
printf("Out of mbuf clusters - adjust NMBCLUSTERS or increase maxusers!\n");
- return (0);
+ goto bad;
}
if ((flags & M_NOWAIT) == 0)
panic("kmem_malloc(%ld): kmem_map too small: %ld total allocated",
(long)size, (long)map->size);
- return (0);
+ goto bad;
}
offset = addr - VM_MIN_KERNEL_ADDRESS;
vm_object_reference(kmem_object);
@@ -370,7 +409,7 @@ retry:
if (flags & M_ASLEEP) {
VM_AWAIT;
}
- return (0);
+ goto bad;
}
vm_page_flag_clear(m, PG_ZERO);
m->valid = VM_PAGE_BITS_ALL;
@@ -407,7 +446,14 @@ retry:
}
vm_map_unlock(map);
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
return (addr);
+
+bad:
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
+ return (0);
}
/*
@@ -425,6 +471,11 @@ kmem_alloc_wait(map, size)
vm_size_t size;
{
vm_offset_t addr;
+ int hadvmlock;
+
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
size = round_page(size);
@@ -439,13 +490,17 @@ kmem_alloc_wait(map, size)
/* no space now; see if we can ever get space */
if (vm_map_max(map) - vm_map_min(map) < size) {
vm_map_unlock(map);
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
return (0);
}
vm_map_unlock(map);
- tsleep(map, PVM, "kmaw", 0);
+ msleep(map, &vm_mtx, PVM, "kmaw", 0);
}
vm_map_insert(map, NULL, (vm_offset_t) 0, addr, addr + size, VM_PROT_ALL, VM_PROT_ALL, 0);
vm_map_unlock(map);
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
return (addr);
}
@@ -461,10 +516,17 @@ kmem_free_wakeup(map, addr, size)
vm_offset_t addr;
vm_size_t size;
{
+ int hadvmlock;
+
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
vm_map_lock(map);
(void) vm_map_delete(map, trunc_page(addr), round_page(addr + size));
wakeup(map);
vm_map_unlock(map);
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
}
/*
diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index b33e9e4..d07d35b 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -200,6 +200,7 @@ vmspace_free(vm)
struct vmspace *vm;
{
+ mtx_assert(&vm_mtx, MA_OWNED);
if (vm->vm_refcnt == 0)
panic("vmspace_free: attempt to free already freed vmspace");
@@ -350,6 +351,8 @@ vm_map_entry_unlink(vm_map_t map,
* in the "entry" parameter. The boolean
* result indicates whether the address is
* actually contained in the map.
+ *
+ * Doesn't block.
*/
boolean_t
vm_map_lookup_entry(map, address, entry)
@@ -439,6 +442,7 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
vm_map_entry_t temp_entry;
vm_eflags_t protoeflags;
+ mtx_assert(&vm_mtx, MA_OWNED);
/*
* Check that the start and end points are not bogus.
*/
@@ -1705,7 +1709,9 @@ vm_map_clean(map, start, end, syncio, invalidate)
int flags;
vm_object_reference(object);
+ mtx_unlock(&vm_mtx);
vn_lock(object->handle, LK_EXCLUSIVE | LK_RETRY, curproc);
+ mtx_lock(&vm_mtx);
flags = (syncio || invalidate) ? OBJPC_SYNC : 0;
flags |= invalidate ? OBJPC_INVAL : 0;
vm_object_page_clean(object,
@@ -2296,6 +2302,8 @@ vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
* the stack. Also returns KERN_SUCCESS if addr is outside the
* stack range (this is strange, but preserves compatibility with
* the grow function in vm_machdep.c).
+ *
+ * Will grab vm_mtx if needed
*/
int
vm_map_growstack (struct proc *p, vm_offset_t addr)
@@ -2309,18 +2317,29 @@ vm_map_growstack (struct proc *p, vm_offset_t addr)
int grow_amount;
int rv;
int is_procstack;
+ int hadvmlock;
+
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
+#define myreturn(rval) do { \
+ if (!hadvmlock) \
+ mtx_unlock(&vm_mtx); \
+ return (rval); \
+} while (0)
+
Retry:
vm_map_lock_read(map);
/* If addr is already in the entry range, no need to grow.*/
if (vm_map_lookup_entry(map, addr, &prev_entry)) {
vm_map_unlock_read(map);
- return (KERN_SUCCESS);
+ myreturn (KERN_SUCCESS);
}
if ((stack_entry = prev_entry->next) == &map->header) {
vm_map_unlock_read(map);
- return (KERN_SUCCESS);
+ myreturn (KERN_SUCCESS);
}
if (prev_entry == &map->header)
end = stack_entry->start - stack_entry->avail_ssize;
@@ -2338,14 +2357,14 @@ Retry:
addr >= stack_entry->start ||
addr < stack_entry->start - stack_entry->avail_ssize) {
vm_map_unlock_read(map);
- return (KERN_SUCCESS);
+ myreturn (KERN_SUCCESS);
}
/* Find the minimum grow amount */
grow_amount = roundup (stack_entry->start - addr, PAGE_SIZE);
if (grow_amount > stack_entry->avail_ssize) {
vm_map_unlock_read(map);
- return (KERN_NO_SPACE);
+ myreturn (KERN_NO_SPACE);
}
/* If there is no longer enough space between the entries
@@ -2364,7 +2383,7 @@ Retry:
stack_entry->avail_ssize = stack_entry->start - end;
vm_map_unlock(map);
- return (KERN_NO_SPACE);
+ myreturn (KERN_NO_SPACE);
}
is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr;
@@ -2375,7 +2394,7 @@ Retry:
if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
vm_map_unlock_read(map);
- return (KERN_NO_SPACE);
+ myreturn (KERN_NO_SPACE);
}
/* Round up the grow amount modulo SGROWSIZ */
@@ -2427,8 +2446,8 @@ Retry:
}
vm_map_unlock(map);
- return (rv);
-
+ myreturn (rv);
+#undef myreturn
}
/*
@@ -2501,6 +2520,9 @@ vmspace_unshare(struct proc *p) {
* specified, the map may be changed to perform virtual
* copying operations, although the data referenced will
* remain the same.
+ *
+ * Can block locking maps and while calling vm_object_shadow().
+ * Will drop/reaquire the vm_mtx.
*/
int
vm_map_lookup(vm_map_t *var_map, /* IN/OUT */
@@ -2928,6 +2950,8 @@ vm_uiomove(mapa, srcobject, cp, cnta, uaddra, npages)
* Performs the copy_on_write operations necessary to allow the virtual copies
* into user space to work. This has to be called for write(2) system calls
* from other processes, file unlinking, and file size shrinkage.
+ *
+ * Requires that the vm_mtx is held
*/
void
vm_freeze_copyopts(object, froma, toa)
@@ -2938,6 +2962,7 @@ vm_freeze_copyopts(object, froma, toa)
vm_object_t robject;
vm_pindex_t idx;
+ mtx_assert(&vm_mtx, MA_OWNED);
if ((object == NULL) ||
((object->flags & OBJ_OPT) == 0))
return;
diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h
index daf2b6e..241a80c 100644
--- a/sys/vm/vm_map.h
+++ b/sys/vm/vm_map.h
@@ -196,6 +196,7 @@ struct vmspace {
caddr_t vm_minsaddr; /* user VA at max stack growth */
};
+#ifdef _KERNEL
/*
* Macros: vm_map_lock, etc.
* Function:
@@ -211,6 +212,7 @@ struct vmspace {
do { \
lockmgr(&(map)->lock, LK_DRAIN|LK_INTERLOCK, \
&(map)->ref_lock, curproc); \
+ mtx_lock(&vm_mtx); \
(map)->timestamp++; \
} while(0)
@@ -225,27 +227,33 @@ struct vmspace {
#define vm_map_lock(map) \
do { \
vm_map_printf("locking map LK_EXCLUSIVE: %p\n", map); \
- if (lockmgr(&(map)->lock, LK_EXCLUSIVE, (void *)0, curproc) != 0) \
+ mtx_assert(&vm_mtx, MA_OWNED); \
+ if (lockmgr(&(map)->lock, LK_EXCLUSIVE | LK_INTERLOCK, \
+ &vm_mtx, curproc) != 0) \
panic("vm_map_lock: failed to get lock"); \
+ mtx_lock(&vm_mtx); \
(map)->timestamp++; \
} while(0)
#define vm_map_unlock(map) \
do { \
vm_map_printf("locking map LK_RELEASE: %p\n", map); \
- lockmgr(&(map)->lock, LK_RELEASE, (void *)0, curproc); \
+ lockmgr(&(map)->lock, LK_RELEASE, NULL, curproc); \
} while (0)
#define vm_map_lock_read(map) \
do { \
vm_map_printf("locking map LK_SHARED: %p\n", map); \
- lockmgr(&(map)->lock, LK_SHARED, (void *)0, curproc); \
+ mtx_assert(&vm_mtx, MA_OWNED); \
+ lockmgr(&(map)->lock, LK_SHARED | LK_INTERLOCK, \
+ &vm_mtx, curproc); \
+ mtx_lock(&vm_mtx); \
} while (0)
#define vm_map_unlock_read(map) \
do { \
vm_map_printf("locking map LK_RELEASE: %p\n", map); \
- lockmgr(&(map)->lock, LK_RELEASE, (void *)0, curproc); \
+ lockmgr(&(map)->lock, LK_RELEASE, NULL, curproc); \
} while (0)
static __inline__ int
@@ -253,7 +261,8 @@ _vm_map_lock_upgrade(vm_map_t map, struct proc *p) {
int error;
vm_map_printf("locking map LK_EXCLUPGRADE: %p\n", map);
- error = lockmgr(&map->lock, LK_EXCLUPGRADE, (void *)0, p);
+ error = lockmgr(&map->lock, LK_EXCLUPGRADE | LK_INTERLOCK, &vm_mtx, p);
+ mtx_lock(&vm_mtx);
if (error == 0)
map->timestamp++;
return error;
@@ -264,7 +273,7 @@ _vm_map_lock_upgrade(vm_map_t map, struct proc *p) {
#define vm_map_lock_downgrade(map) \
do { \
vm_map_printf("locking map LK_DOWNGRADE: %p\n", map); \
- lockmgr(&(map)->lock, LK_DOWNGRADE, (void *)0, curproc); \
+ lockmgr(&(map)->lock, LK_DOWNGRADE, NULL, curproc); \
} while (0)
#define vm_map_set_recursive(map) \
@@ -287,6 +296,7 @@ _vm_map_lock_upgrade(vm_map_t map, struct proc *p) {
#define vm_map_min(map) ((map)->min_offset)
#define vm_map_max(map) ((map)->max_offset)
#define vm_map_pmap(map) ((map)->pmap)
+#endif /* _KERNEL */
static __inline struct pmap *
vmspace_pmap(struct vmspace *vmspace)
diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c
index 8dcb906..0f4e107 100644
--- a/sys/vm/vm_meter.c
+++ b/sys/vm/vm_meter.c
@@ -145,8 +145,10 @@ vmtotal(SYSCTL_HANDLER_ARGS)
/*
* Mark all objects as inactive.
*/
+ mtx_lock(&vm_mtx);
TAILQ_FOREACH(object, &vm_object_list, object_list)
vm_object_clear_flag(object, OBJ_ACTIVE);
+ mtx_unlock(&vm_mtx);
/*
* Calculate process statistics.
*/
@@ -197,6 +199,7 @@ vmtotal(SYSCTL_HANDLER_ARGS)
* Note active objects.
*/
paging = 0;
+ mtx_lock(&vm_mtx);
for (map = &p->p_vmspace->vm_map, entry = map->header.next;
entry != &map->header; entry = entry->next) {
if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) ||
@@ -205,6 +208,7 @@ vmtotal(SYSCTL_HANDLER_ARGS)
vm_object_set_flag(entry->object.vm_object, OBJ_ACTIVE);
paging |= entry->object.vm_object->paging_in_progress;
}
+ mtx_unlock(&vm_mtx);
if (paging)
totalp->t_pw++;
}
@@ -212,6 +216,7 @@ vmtotal(SYSCTL_HANDLER_ARGS)
/*
* Calculate object memory usage statistics.
*/
+ mtx_lock(&vm_mtx);
TAILQ_FOREACH(object, &vm_object_list, object_list) {
/*
* devices, like /dev/mem, will badly skew our totals
@@ -235,6 +240,7 @@ vmtotal(SYSCTL_HANDLER_ARGS)
}
}
totalp->t_free = cnt.v_free_count + cnt.v_cache_count;
+ mtx_unlock(&vm_mtx);
return (sysctl_handle_opaque(oidp, totalp, sizeof total, req));
}
diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c
index 14307b3..5de25d9 100644
--- a/sys/vm/vm_mmap.c
+++ b/sys/vm/vm_mmap.c
@@ -52,6 +52,7 @@
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <sys/sysproto.h>
#include <sys/filedesc.h>
#include <sys/proc.h>
@@ -515,14 +516,17 @@ msync(p, uap)
* the range of the map entry containing addr. This can be incorrect
* if the region splits or is coalesced with a neighbor.
*/
+ mtx_lock(&vm_mtx);
if (size == 0) {
vm_map_entry_t entry;
vm_map_lock_read(map);
rv = vm_map_lookup_entry(map, addr, &entry);
vm_map_unlock_read(map);
- if (rv == FALSE)
+ if (rv == FALSE) {
+ mtx_unlock(&vm_mtx);
return (EINVAL);
+ }
addr = entry->start;
size = entry->end - entry->start;
}
@@ -533,6 +537,7 @@ msync(p, uap)
rv = vm_map_clean(map, addr, addr + size, (flags & MS_ASYNC) == 0,
(flags & MS_INVALIDATE) != 0);
+ mtx_unlock(&vm_mtx);
switch (rv) {
case KERN_SUCCESS:
break;
@@ -589,10 +594,14 @@ munmap(p, uap)
/*
* Make sure entire range is allocated.
*/
- if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE))
+ mtx_lock(&vm_mtx);
+ if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE)) {
+ mtx_unlock(&vm_mtx);
return (EINVAL);
+ }
/* returns nothing but KERN_SUCCESS anyway */
(void) vm_map_remove(map, addr, addr + size);
+ mtx_unlock(&vm_mtx);
return (0);
}
@@ -624,6 +633,7 @@ mprotect(p, uap)
vm_offset_t addr;
vm_size_t size, pageoff;
register vm_prot_t prot;
+ int ret;
addr = (vm_offset_t) uap->addr;
size = uap->len;
@@ -640,8 +650,11 @@ mprotect(p, uap)
if (addr + size < addr)
return(EINVAL);
- switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot,
- FALSE)) {
+ mtx_lock(&vm_mtx);
+ ret = vm_map_protect(&p->p_vmspace->vm_map, addr,
+ addr + size, prot, FALSE);
+ mtx_unlock(&vm_mtx);
+ switch (ret) {
case KERN_SUCCESS:
return (0);
case KERN_PROTECTION_FAILURE:
@@ -665,6 +678,7 @@ minherit(p, uap)
vm_offset_t addr;
vm_size_t size, pageoff;
register vm_inherit_t inherit;
+ int ret;
addr = (vm_offset_t)uap->addr;
size = uap->len;
@@ -677,8 +691,12 @@ minherit(p, uap)
if (addr + size < addr)
return(EINVAL);
- switch (vm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size,
- inherit)) {
+ mtx_lock(&vm_mtx);
+ ret = vm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size,
+ inherit);
+ mtx_unlock(&vm_mtx);
+
+ switch (ret) {
case KERN_SUCCESS:
return (0);
case KERN_PROTECTION_FAILURE:
@@ -702,6 +720,7 @@ madvise(p, uap)
struct madvise_args *uap;
{
vm_offset_t start, end;
+ int ret;
/*
* Check for illegal behavior
@@ -729,9 +748,10 @@ madvise(p, uap)
start = trunc_page((vm_offset_t) uap->addr);
end = round_page((vm_offset_t) uap->addr + uap->len);
- if (vm_map_madvise(&p->p_vmspace->vm_map, start, end, uap->behav))
- return (EINVAL);
- return (0);
+ mtx_lock(&vm_mtx);
+ ret = vm_map_madvise(&p->p_vmspace->vm_map, start, end, uap->behav);
+ mtx_unlock(&vm_mtx);
+ return (ret ? EINVAL : 0);
}
#ifndef _SYS_SYSPROTO_H_
@@ -777,6 +797,7 @@ mincore(p, uap)
vec = uap->vec;
map = &p->p_vmspace->vm_map;
+ mtx_lock(&vm_mtx);
pmap = vmspace_pmap(p->p_vmspace);
vm_map_lock_read(map);
@@ -856,6 +877,7 @@ RestartScan:
* the map, we release the lock.
*/
vm_map_unlock_read(map);
+ mtx_unlock(&vm_mtx);
/*
* calculate index into user supplied byte vector
@@ -886,6 +908,7 @@ RestartScan:
* If the map has changed, due to the subyte, the previous
* output may be invalid.
*/
+ mtx_lock(&vm_mtx);
vm_map_lock_read(map);
if (timestamp != map->timestamp)
goto RestartScan;
@@ -900,6 +923,7 @@ RestartScan:
* the map, we release the lock.
*/
vm_map_unlock_read(map);
+ mtx_unlock(&vm_mtx);
/*
* Zero the last entries in the byte vector.
@@ -917,10 +941,12 @@ RestartScan:
* If the map has changed, due to the subyte, the previous
* output may be invalid.
*/
+ mtx_lock(&vm_mtx);
vm_map_lock_read(map);
if (timestamp != map->timestamp)
goto RestartScan;
vm_map_unlock_read(map);
+ mtx_unlock(&vm_mtx);
return (0);
}
@@ -965,7 +991,10 @@ mlock(p, uap)
return (error);
#endif
- error = vm_map_user_pageable(&p->p_vmspace->vm_map, addr, addr + size, FALSE);
+ mtx_lock(&vm_mtx);
+ error = vm_map_user_pageable(&p->p_vmspace->vm_map, addr,
+ addr + size, FALSE);
+ mtx_unlock(&vm_mtx);
return (error == KERN_SUCCESS ? 0 : ENOMEM);
}
@@ -1030,7 +1059,10 @@ munlock(p, uap)
return (error);
#endif
- error = vm_map_user_pageable(&p->p_vmspace->vm_map, addr, addr + size, TRUE);
+ mtx_lock(&vm_mtx);
+ error = vm_map_user_pageable(&p->p_vmspace->vm_map, addr,
+ addr + size, TRUE);
+ mtx_lock(&vm_mtx);
return (error == KERN_SUCCESS ? 0 : ENOMEM);
}
@@ -1077,7 +1109,9 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
if (*addr != trunc_page(*addr))
return (EINVAL);
fitit = FALSE;
+ mtx_lock(&vm_mtx);
(void) vm_map_remove(map, *addr, *addr + size);
+ mtx_unlock(&vm_mtx);
}
/*
@@ -1099,7 +1133,9 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
struct vattr vat;
int error;
+ mtx_lock(&Giant);
error = VOP_GETATTR(vp, &vat, p->p_ucred, p);
+ mtx_unlock(&Giant);
if (error)
return (error);
objsize = round_page(vat.va_size);
@@ -1148,6 +1184,7 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
maxprot |= VM_PROT_EXECUTE;
#endif
+ mtx_lock(&vm_mtx);
if (fitit) {
*addr = pmap_addr_hint(object, *addr, size);
}
@@ -1180,6 +1217,7 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
}
}
out:
+ mtx_unlock(&vm_mtx);
switch (rv) {
case KERN_SUCCESS:
return (0);
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
index 33fe834..30ef190 100644
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@@ -146,6 +146,24 @@ _vm_object_allocate(type, size, object)
vm_object_t object;
{
int incr;
+ int hadvmlock;
+
+ /*
+ * XXX: Not all callers seem to have the lock, compensate.
+ * I'm pretty sure we need to bump the gen count before possibly
+ * nuking the data contained within while under the lock.
+ */
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
+ object->generation++;
+ if ((object->type == OBJT_DEFAULT) || (object->type == OBJT_SWAP))
+ vm_object_set_flag(object, OBJ_ONEMAPPING);
+ TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
+ vm_object_count++;
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
+
TAILQ_INIT(&object->memq);
TAILQ_INIT(&object->shadow_head);
@@ -153,8 +171,6 @@ _vm_object_allocate(type, size, object)
object->size = size;
object->ref_count = 1;
object->flags = 0;
- if ((object->type == OBJT_DEFAULT) || (object->type == OBJT_SWAP))
- vm_object_set_flag(object, OBJ_ONEMAPPING);
object->paging_in_progress = 0;
object->resident_page_count = 0;
object->shadow_count = 0;
@@ -175,10 +191,6 @@ _vm_object_allocate(type, size, object)
*/
object->hash_rand = object_hash_rand - 129;
- object->generation++;
-
- TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
- vm_object_count++;
object_hash_rand = object->hash_rand;
}
@@ -226,7 +238,6 @@ vm_object_allocate(type, size)
vm_object_t result;
result = (vm_object_t) zalloc(obj_zone);
-
_vm_object_allocate(type, size, result);
return (result);
@@ -250,18 +261,29 @@ vm_object_reference(object)
object->ref_count++;
if (object->type == OBJT_VNODE) {
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
while (vget((struct vnode *) object->handle, LK_RETRY|LK_NOOBJ, curproc)) {
printf("vm_object_reference: delay in getting object\n");
}
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
}
}
+/*
+ * handle deallocating a object of type OBJT_VNODE
+ *
+ * requires vm_mtx
+ * may block
+ */
void
vm_object_vndeallocate(object)
vm_object_t object;
{
struct vnode *vp = (struct vnode *) object->handle;
+ mtx_assert(&vm_mtx, MA_OWNED);
KASSERT(object->type == OBJT_VNODE,
("vm_object_vndeallocate: not a vnode object"));
KASSERT(vp != NULL, ("vm_object_vndeallocate: missing vp"));
@@ -277,7 +299,14 @@ vm_object_vndeallocate(object)
vp->v_flag &= ~VTEXT;
vm_object_clear_flag(object, OBJ_OPT);
}
+ /*
+ * vrele may need a vop lock
+ */
+ mtx_unlock(VM_OBJECT_MTX(object));
+ mtx_lock(&Giant);
vrele(vp);
+ mtx_unlock(&Giant);
+ mtx_lock(VM_OBJECT_MTX(object));
}
/*
@@ -290,6 +319,7 @@ vm_object_vndeallocate(object)
* may be relinquished.
*
* No object may be locked.
+ * vm_mtx must be held
*/
void
vm_object_deallocate(object)
@@ -297,6 +327,7 @@ vm_object_deallocate(object)
{
vm_object_t temp;
+ mtx_assert(&vm_mtx, MA_OWNED);
while (object != NULL) {
if (object->type == OBJT_VNODE) {
@@ -422,7 +453,11 @@ vm_object_terminate(object)
vm_object_page_clean(object, 0, 0, OBJPC_SYNC);
vp = (struct vnode *) object->handle;
+ mtx_unlock(VM_OBJECT_MTX(object));
+ mtx_lock(&Giant);
vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0);
+ mtx_unlock(&Giant);
+ mtx_lock(VM_OBJECT_MTX(object));
}
KASSERT(object->ref_count == 0,
@@ -507,6 +542,7 @@ vm_object_page_clean(object, start, end, flags)
vm_page_t ma[vm_pageout_page_count];
int curgeneration;
+ mtx_assert(VM_OBJECT_MTX(object), MA_OWNED);
if (object->type != OBJT_VNODE ||
(object->flags & OBJ_MIGHTBEDIRTY) == 0)
return;
@@ -962,6 +998,7 @@ vm_object_backing_scan(vm_object_t object, int op)
vm_pindex_t backing_offset_index;
s = splvm();
+ mtx_assert(&vm_mtx, MA_OWNED);
backing_object = object->backing_object;
backing_offset_index = OFF_TO_IDX(object->backing_object_offset);
@@ -1175,6 +1212,9 @@ void
vm_object_collapse(object)
vm_object_t object;
{
+
+ mtx_assert(&vm_mtx, MA_OWNED);
+
while (TRUE) {
vm_object_t backing_object;
@@ -1386,6 +1426,8 @@ vm_object_page_remove(object, start, end, clean_only)
unsigned int size;
int all;
+ mtx_assert(&vm_mtx, MA_OWNED);
+
if (object == NULL ||
object->resident_page_count == 0)
return;
@@ -1502,6 +1544,8 @@ vm_object_coalesce(prev_object, prev_pindex, prev_size, next_size)
{
vm_pindex_t next_pindex;
+ mtx_assert(&vm_mtx, MA_OWNED);
+
if (prev_object == NULL) {
return (TRUE);
}
diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h
index ba4c026..2b29baf 100644
--- a/sys/vm/vm_object.h
+++ b/sys/vm/vm_object.h
@@ -169,34 +169,49 @@ extern vm_object_t kmem_object;
#ifdef _KERNEL
+/*
+ * For now a global vm lock.
+ */
+#define VM_OBJECT_MTX(object) (&vm_mtx)
+
static __inline void
vm_object_set_flag(vm_object_t object, u_short bits)
{
- atomic_set_short(&object->flags, bits);
+
+ mtx_assert(VM_OBJECT_MTX(object), MA_OWNED);
+ object->flags |= bits;
}
static __inline void
vm_object_clear_flag(vm_object_t object, u_short bits)
{
- atomic_clear_short(&object->flags, bits);
+
+ mtx_assert(VM_OBJECT_MTX(object), MA_OWNED);
+ object->flags &= ~bits;
}
static __inline void
vm_object_pip_add(vm_object_t object, short i)
{
- atomic_add_short(&object->paging_in_progress, i);
+
+ mtx_assert(VM_OBJECT_MTX(object), MA_OWNED);
+ object->paging_in_progress += i;
}
static __inline void
vm_object_pip_subtract(vm_object_t object, short i)
{
- atomic_subtract_short(&object->paging_in_progress, i);
+
+ mtx_assert(VM_OBJECT_MTX(object), MA_OWNED);
+ object->paging_in_progress -= i;
}
static __inline void
vm_object_pip_wakeup(vm_object_t object)
{
- atomic_subtract_short(&object->paging_in_progress, 1);
+
+ mtx_assert(VM_OBJECT_MTX(object), MA_OWNED);
+ object->paging_in_progress--;
if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) {
vm_object_clear_flag(object, OBJ_PIPWNT);
wakeup(object);
@@ -206,8 +221,10 @@ vm_object_pip_wakeup(vm_object_t object)
static __inline void
vm_object_pip_wakeupn(vm_object_t object, short i)
{
+
+ mtx_assert(VM_OBJECT_MTX(object), MA_OWNED);
if (i)
- atomic_subtract_short(&object->paging_in_progress, i);
+ object->paging_in_progress -= i;
if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) {
vm_object_clear_flag(object, OBJ_PIPWNT);
wakeup(object);
@@ -217,11 +234,13 @@ vm_object_pip_wakeupn(vm_object_t object, short i)
static __inline void
vm_object_pip_sleep(vm_object_t object, char *waitid)
{
+
+ mtx_assert(VM_OBJECT_MTX(object), MA_OWNED);
if (object->paging_in_progress) {
int s = splvm();
if (object->paging_in_progress) {
vm_object_set_flag(object, OBJ_PIPWNT);
- tsleep(object, PVM, waitid, 0);
+ msleep(object, VM_OBJECT_MTX(object), PVM, waitid, 0);
}
splx(s);
}
@@ -230,6 +249,8 @@ vm_object_pip_sleep(vm_object_t object, char *waitid)
static __inline void
vm_object_pip_wait(vm_object_t object, char *waitid)
{
+
+ mtx_assert(VM_OBJECT_MTX(object), MA_OWNED);
while (object->paging_in_progress)
vm_object_pip_sleep(object, waitid);
}
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 5865d70..2ae0fe7 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -71,6 +71,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <sys/malloc.h>
#include <sys/proc.h>
#include <sys/vmmeter.h>
@@ -147,6 +148,7 @@ vm_set_page_size()
*
* Add a new page to the freelist for use by the system.
* Must be called at splhigh().
+ * Must be called with the vm_mtx held.
*/
vm_page_t
vm_add_new_page(pa)
@@ -154,6 +156,7 @@ vm_add_new_page(pa)
{
vm_page_t m;
+ mtx_assert(&vm_mtx, MA_OWNED);
++cnt.v_page_count;
++cnt.v_free_count;
m = PHYS_TO_VM_PAGE(pa);
@@ -360,6 +363,7 @@ vm_page_insert(m, object, pindex)
{
register struct vm_page **bucket;
+ mtx_assert(&vm_mtx, MA_OWNED);
if (m->object != NULL)
panic("vm_page_insert: already inserted");
@@ -419,6 +423,7 @@ vm_page_remove(m)
{
vm_object_t object;
+ mtx_assert(&vm_mtx, MA_OWNED);
if (m->object == NULL)
return;
@@ -482,6 +487,8 @@ vm_page_remove(m)
* an interrupt makes a change, but the generation algorithm will not
* operate properly in an SMP environment where both cpu's are able to run
* kernel code simultaneously.
+ * NOTE: under the giant vm lock we should be ok, there should be
+ * no reason to check vm_page_bucket_generation
*
* The object must be locked. No side effects.
* This routine may not block.
@@ -596,6 +603,8 @@ vm_page_unqueue(m)
{
int queue = m->queue;
struct vpgqueues *pq;
+
+ mtx_assert(&vm_mtx, MA_OWNED);
if (queue != PQ_NONE) {
m->queue = PQ_NONE;
pq = &vm_page_queues[queue];
@@ -636,6 +645,7 @@ _vm_page_list_find(basequeue, index)
vm_page_t m = NULL;
struct vpgqueues *pq;
+ mtx_assert(&vm_mtx, MA_OWNED);
pq = &vm_page_queues[basequeue];
/*
@@ -673,6 +683,7 @@ vm_page_select_cache(object, pindex)
{
vm_page_t m;
+ mtx_assert(&vm_mtx, MA_OWNED);
while (TRUE) {
m = vm_page_list_find(
PQ_CACHE,
@@ -724,7 +735,7 @@ vm_page_select_free(vm_object_t object, vm_pindex_t pindex, boolean_t prefer_zer
* VM_ALLOC_INTERRUPT interrupt time request
* VM_ALLOC_ZERO zero page
*
- * Object must be locked.
+ * vm_mtx must be locked.
* This routine may not block.
*
* Additional special handling is required when called from an
@@ -741,6 +752,7 @@ vm_page_alloc(object, pindex, page_req)
register vm_page_t m = NULL;
int s;
+ mtx_assert(&vm_mtx, MA_OWNED);
KASSERT(!vm_page_lookup(object, pindex),
("vm_page_alloc: page already allocated"));
@@ -873,13 +885,13 @@ vm_wait()
s = splvm();
if (curproc == pageproc) {
vm_pageout_pages_needed = 1;
- tsleep(&vm_pageout_pages_needed, PSWP, "VMWait", 0);
+ msleep(&vm_pageout_pages_needed, &vm_mtx, PSWP, "VMWait", 0);
} else {
if (!vm_pages_needed) {
vm_pages_needed = 1;
wakeup(&vm_pages_needed);
}
- tsleep(&cnt.v_free_count, PVM, "vmwait", 0);
+ msleep(&cnt.v_free_count, &vm_mtx, PVM, "vmwait", 0);
}
splx(s);
}
@@ -910,61 +922,6 @@ vm_await()
splx(s);
}
-#if 0
-/*
- * vm_page_sleep:
- *
- * Block until page is no longer busy.
- */
-
-int
-vm_page_sleep(vm_page_t m, char *msg, char *busy) {
- int slept = 0;
- if ((busy && *busy) || (m->flags & PG_BUSY)) {
- int s;
- s = splvm();
- if ((busy && *busy) || (m->flags & PG_BUSY)) {
- vm_page_flag_set(m, PG_WANTED);
- tsleep(m, PVM, msg, 0);
- slept = 1;
- }
- splx(s);
- }
- return slept;
-}
-
-#endif
-
-#if 0
-
-/*
- * vm_page_asleep:
- *
- * Similar to vm_page_sleep(), but does not block. Returns 0 if
- * the page is not busy, or 1 if the page is busy.
- *
- * This routine has the side effect of calling asleep() if the page
- * was busy (1 returned).
- */
-
-int
-vm_page_asleep(vm_page_t m, char *msg, char *busy) {
- int slept = 0;
- if ((busy && *busy) || (m->flags & PG_BUSY)) {
- int s;
- s = splvm();
- if ((busy && *busy) || (m->flags & PG_BUSY)) {
- vm_page_flag_set(m, PG_WANTED);
- asleep(m, PVM, msg, 0);
- slept = 1;
- }
- splx(s);
- }
- return slept;
-}
-
-#endif
-
/*
* vm_page_activate:
*
@@ -982,6 +939,7 @@ vm_page_activate(m)
int s;
s = splvm();
+ mtx_assert(&vm_mtx, MA_OWNED);
if (m->queue != PQ_ACTIVE) {
if ((m->queue - m->pc) == PQ_CACHE)
cnt.v_reactivated++;
@@ -1056,6 +1014,7 @@ vm_page_free_toq(vm_page_t m)
s = splvm();
+ mtx_assert(&vm_mtx, MA_OWNED);
cnt.v_tfree++;
if (m->busy || ((m->queue - m->pc) == PQ_FREE) ||
@@ -1293,6 +1252,7 @@ _vm_page_deactivate(vm_page_t m, int athead)
{
int s;
+ mtx_assert(&vm_mtx, MA_OWNED);
/*
* Ignore if already inactive.
*/
@@ -1330,6 +1290,8 @@ vm_page_deactivate(vm_page_t m)
int
vm_page_try_to_cache(vm_page_t m)
{
+
+ mtx_assert(VM_PAGE_MTX(m), MA_OWNED);
if (m->dirty || m->hold_count || m->busy || m->wire_count ||
(m->flags & (PG_BUSY|PG_UNMANAGED))) {
return(0);
@@ -1354,6 +1316,7 @@ vm_page_cache(m)
{
int s;
+ mtx_assert(&vm_mtx, MA_OWNED);
if ((m->flags & (PG_BUSY|PG_UNMANAGED)) || m->busy || m->wire_count) {
printf("vm_page_cache: attempting to cache busy page\n");
return;
@@ -1411,6 +1374,7 @@ vm_page_dontneed(m)
int dnw;
int head;
+ mtx_assert(&vm_mtx, MA_OWNED);
dnw = ++dnweight;
/*
@@ -1451,6 +1415,7 @@ vm_page_dontneed(m)
* to be in the object. If the page doesn't exist, allocate it.
*
* This routine may block.
+ * Requires vm_mtx.
*/
vm_page_t
vm_page_grab(object, pindex, allocflags)
@@ -1458,10 +1423,10 @@ vm_page_grab(object, pindex, allocflags)
vm_pindex_t pindex;
int allocflags;
{
-
vm_page_t m;
int s, generation;
+ mtx_assert(&vm_mtx, MA_OWNED);
retrylookup:
if ((m = vm_page_lookup(object, pindex)) != NULL) {
if (m->busy || (m->flags & PG_BUSY)) {
@@ -1471,7 +1436,7 @@ retrylookup:
while ((object->generation == generation) &&
(m->busy || (m->flags & PG_BUSY))) {
vm_page_flag_set(m, PG_WANTED | PG_REFERENCED);
- tsleep(m, PVM, "pgrbwt", 0);
+ msleep(m, &vm_mtx, PVM, "pgrbwt", 0);
if ((allocflags & VM_ALLOC_RETRY) == 0) {
splx(s);
return NULL;
@@ -1534,6 +1499,8 @@ vm_page_bits(int base, int size)
* This routine may not block.
*
* (base + size) must be less then or equal to PAGE_SIZE.
+ *
+ * vm_mtx needs to be held
*/
void
vm_page_set_validclean(m, base, size)
@@ -1545,6 +1512,7 @@ vm_page_set_validclean(m, base, size)
int frag;
int endoff;
+ mtx_assert(&vm_mtx, MA_OWNED);
if (size == 0) /* handle degenerate case */
return;
@@ -1618,6 +1586,8 @@ vm_page_clear_dirty(m, base, size)
int base;
int size;
{
+
+ mtx_assert(&vm_mtx, MA_OWNED);
m->dirty &= ~vm_page_bits(base, size);
}
@@ -1637,6 +1607,7 @@ vm_page_set_invalid(m, base, size)
{
int bits;
+ mtx_assert(&vm_mtx, MA_OWNED);
bits = vm_page_bits(base, size);
m->valid &= ~bits;
m->dirty &= ~bits;
@@ -1923,8 +1894,19 @@ contigmalloc(size, type, flags, low, high, alignment, boundary)
unsigned long alignment;
unsigned long boundary;
{
- return contigmalloc1(size, type, flags, low, high, alignment, boundary,
+ void * ret;
+ int hadvmlock;
+
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
+ ret = contigmalloc1(size, type, flags, low, high, alignment, boundary,
kernel_map);
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
+
+ return (ret);
+
}
void
@@ -1933,7 +1915,14 @@ contigfree(addr, size, type)
unsigned long size;
struct malloc_type *type;
{
+ int hadvmlock;
+
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
kmem_free(kernel_map, (vm_offset_t)addr, size);
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
}
vm_offset_t
@@ -1943,8 +1932,18 @@ vm_page_alloc_contig(size, low, high, alignment)
vm_offset_t high;
vm_offset_t alignment;
{
- return ((vm_offset_t)contigmalloc1(size, M_DEVBUF, M_NOWAIT, low, high,
+ vm_offset_t ret;
+ int hadvmlock;
+
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
+ ret = ((vm_offset_t)contigmalloc1(size, M_DEVBUF, M_NOWAIT, low, high,
alignment, 0ul, kernel_map));
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
+ return (ret);
+
}
#include "opt_ddb.h"
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
index dc8290e..e1c1cc4 100644
--- a/sys/vm/vm_page.h
+++ b/sys/vm/vm_page.h
@@ -305,19 +305,28 @@ extern long first_page; /* first physical page number */
(&vm_page_array[atop(pa) - first_page ])
/*
+ * For now, a global vm lock
+ */
+#define VM_PAGE_MTX(m) (&vm_mtx)
+
+/*
* Functions implemented as macros
*/
static __inline void
vm_page_flag_set(vm_page_t m, unsigned short bits)
{
- atomic_set_short(&(m)->flags, bits);
+
+ mtx_assert(VM_PAGE_MTX(m), MA_OWNED);
+ m->flags |= bits;
}
static __inline void
vm_page_flag_clear(vm_page_t m, unsigned short bits)
{
- atomic_clear_short(&(m)->flags, bits);
+
+ mtx_assert(VM_PAGE_MTX(m), MA_OWNED);
+ m->flags &= ~bits;
}
#if 0
@@ -332,7 +341,9 @@ vm_page_assert_wait(vm_page_t m, int interruptible)
static __inline void
vm_page_busy(vm_page_t m)
{
- KASSERT((m->flags & PG_BUSY) == 0, ("vm_page_busy: page already busy!!!"));
+
+ KASSERT((m->flags & PG_BUSY) == 0,
+ ("vm_page_busy: page already busy!!!"));
vm_page_flag_set(m, PG_BUSY);
}
@@ -375,13 +386,17 @@ vm_page_wakeup(vm_page_t m)
static __inline void
vm_page_io_start(vm_page_t m)
{
- atomic_add_char(&(m)->busy, 1);
+
+ mtx_assert(VM_PAGE_MTX(m), MA_OWNED);
+ m->busy++;
}
static __inline void
vm_page_io_finish(vm_page_t m)
{
- atomic_subtract_char(&m->busy, 1);
+
+ mtx_assert(VM_PAGE_MTX(m), MA_OWNED);
+ m->busy--;
if (m->busy == 0)
vm_page_flash(m);
}
@@ -447,12 +462,16 @@ void vm_page_free_toq(vm_page_t m);
static __inline void
vm_page_hold(vm_page_t mem)
{
+
+ mtx_assert(VM_PAGE_MTX(m), MA_OWNED);
mem->hold_count++;
}
static __inline void
vm_page_unhold(vm_page_t mem)
{
+
+ mtx_assert(VM_PAGE_MTX(m), MA_OWNED);
--mem->hold_count;
KASSERT(mem->hold_count >= 0, ("vm_page_unhold: hold count < 0!!!"));
}
@@ -565,7 +584,7 @@ vm_page_sleep_busy(vm_page_t m, int also_m_busy, const char *msg)
* Page is busy. Wait and retry.
*/
vm_page_flag_set(m, PG_WANTED | PG_REFERENCED);
- tsleep(m, PVM, msg, 0);
+ msleep(m, VM_PAGE_MTX(m), PVM, msg, 0);
}
splx(s);
return(TRUE);
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index ce333cf..60e3f21 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -447,6 +447,8 @@ vm_pageout_flush(mc, count, flags)
* backing_objects.
*
* The object and map must be locked.
+ *
+ * Requires the vm_mtx
*/
static void
vm_pageout_object_deactivate_pages(map, object, desired, map_remove_only)
@@ -460,6 +462,7 @@ vm_pageout_object_deactivate_pages(map, object, desired, map_remove_only)
int remove_mode;
int s;
+ mtx_assert(&vm_mtx, MA_OWNED);
if (object->type == OBJT_DEVICE || object->type == OBJT_PHYS)
return;
@@ -1322,7 +1325,7 @@ vm_pageout()
{
int pass;
- mtx_lock(&Giant);
+ mtx_lock(&vm_mtx);
/*
* Initialize some paging parameters.
@@ -1412,7 +1415,8 @@ vm_pageout()
*/
++pass;
if (pass > 1)
- tsleep(&vm_pages_needed, PVM, "psleep", hz/2);
+ msleep(&vm_pages_needed, &vm_mtx, PVM,
+ "psleep", hz/2);
} else {
/*
* Good enough, sleep & handle stats. Prime the pass
@@ -1422,7 +1426,7 @@ vm_pageout()
pass = 1;
else
pass = 0;
- error = tsleep(&vm_pages_needed,
+ error = msleep(&vm_pages_needed, &vm_mtx,
PVM, "psleep", vm_pageout_stats_interval * hz);
if (error && !vm_pages_needed) {
splx(s);
@@ -1466,12 +1470,13 @@ vm_daemon()
{
struct proc *p;
- mtx_lock(&Giant);
+ mtx_lock(&vm_mtx);
while (TRUE) {
- tsleep(&vm_daemon_needed, PPAUSE, "psleep", 0);
+ msleep(&vm_daemon_needed, &vm_mtx, PPAUSE, "psleep", 0);
if (vm_pageout_req_swapout) {
swapout_procs(vm_pageout_req_swapout);
+ mtx_assert(&vm_mtx, MA_OWNED);
vm_pageout_req_swapout = 0;
}
/*
@@ -1479,6 +1484,7 @@ vm_daemon()
* process is swapped out -- deactivate pages
*/
+ mtx_unlock(&vm_mtx);
sx_slock(&allproc_lock);
LIST_FOREACH(p, &allproc, p_list) {
vm_pindex_t limit, size;
@@ -1515,13 +1521,16 @@ vm_daemon()
limit = 0; /* XXX */
mtx_unlock_spin(&sched_lock);
+ mtx_lock(&vm_mtx);
size = vmspace_resident_count(p->p_vmspace);
if (limit >= 0 && size >= limit) {
vm_pageout_map_deactivate_pages(
&p->p_vmspace->vm_map, limit);
}
+ mtx_unlock(&vm_mtx);
}
sx_sunlock(&allproc_lock);
+ mtx_lock(&vm_mtx);
}
}
#endif
diff --git a/sys/vm/vm_pager.c b/sys/vm/vm_pager.c
index b13c9c0..e53a14c 100644
--- a/sys/vm/vm_pager.c
+++ b/sys/vm/vm_pager.c
@@ -240,21 +240,32 @@ vm_pager_bufferinit()
* need to perform page-level validation (e.g. the device pager).
*/
vm_object_t
-vm_pager_allocate(objtype_t type, void *handle, vm_ooffset_t size, vm_prot_t prot,
- vm_ooffset_t off)
+vm_pager_allocate(objtype_t type, void *handle, vm_ooffset_t size,
+ vm_prot_t prot, vm_ooffset_t off)
{
+ vm_object_t ret;
struct pagerops *ops;
+ int hadvmlock;
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
ops = pagertab[type];
if (ops)
- return ((*ops->pgo_alloc) (handle, size, prot, off));
- return (NULL);
+ ret = (*ops->pgo_alloc) (handle, size, prot, off);
+ else
+ ret = NULL;
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
+ return (ret);
}
void
vm_pager_deallocate(object)
vm_object_t object;
{
+
+ mtx_assert(&vm_mtx, MA_OWNED);
(*pagertab[object->type]->pgo_dealloc) (object);
}
@@ -374,6 +385,8 @@ initpbuf(struct buf *bp)
*
* NOTE: pfreecnt can be NULL, but this 'feature' will be removed
* relatively soon when the rest of the subsystems get smart about it. XXX
+ *
+ * vm_mtx can be held or unheld
*/
struct buf *
getpbuf(pfreecnt)
@@ -381,8 +394,12 @@ getpbuf(pfreecnt)
{
int s;
struct buf *bp;
+ int hadvmlock;
s = splvm();
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (hadvmlock)
+ mtx_unlock(&vm_mtx);
mtx_lock(&pbuf_mtx);
for (;;) {
@@ -407,6 +424,8 @@ getpbuf(pfreecnt)
splx(s);
initpbuf(bp);
+ if (hadvmlock)
+ mtx_lock(&vm_mtx);
return bp;
}
diff --git a/sys/vm/vm_pager.h b/sys/vm/vm_pager.h
index f54c739..b4511ca 100644
--- a/sys/vm/vm_pager.h
+++ b/sys/vm/vm_pager.h
@@ -124,10 +124,12 @@ vm_pager_get_pages(
) {
int r;
+ mtx_assert(&vm_mtx, MA_OWNED);
r = (*pagertab[object->type]->pgo_getpages)(object, m, count, reqpage);
if (r == VM_PAGER_OK && m[reqpage]->valid != VM_PAGE_BITS_ALL) {
vm_page_zero_invalid(m[reqpage], TRUE);
}
+ mtx_assert(&vm_mtx, MA_OWNED);
return(r);
}
@@ -139,8 +141,11 @@ vm_pager_put_pages(
int flags,
int *rtvals
) {
+
+ mtx_assert(&vm_mtx, MA_OWNED);
(*pagertab[object->type]->pgo_putpages)
(object, m, count, flags, rtvals);
+ mtx_assert(&vm_mtx, MA_OWNED);
}
/*
@@ -161,7 +166,13 @@ vm_pager_has_page(
int *before,
int *after
) {
- return ((*pagertab[object->type]->pgo_haspage) (object, offset, before, after));
+ boolean_t ret;
+
+ mtx_assert(&vm_mtx, MA_OWNED);
+ ret = (*pagertab[object->type]->pgo_haspage)
+ (object, offset, before, after);
+ mtx_assert(&vm_mtx, MA_OWNED);
+ return (ret);
}
/*
@@ -175,8 +186,11 @@ vm_pager_has_page(
static __inline void
vm_pager_page_unswapped(vm_page_t m)
{
+
+ mtx_assert(&vm_mtx, MA_OWNED);
if (pagertab[m->object->type]->pgo_pageunswapped)
(*pagertab[m->object->type]->pgo_pageunswapped)(m);
+ mtx_assert(&vm_mtx, MA_OWNED);
}
#endif
diff --git a/sys/vm/vm_unix.c b/sys/vm/vm_unix.c
index f9b24f8..4861306 100644
--- a/sys/vm/vm_unix.c
+++ b/sys/vm/vm_unix.c
@@ -49,6 +49,9 @@
#include <sys/sysproto.h>
#include <sys/proc.h>
#include <sys/resourcevar.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/systm.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
@@ -93,6 +96,7 @@ obreak(p, uap)
return EINVAL;
}
+ mtx_lock(&vm_mtx);
if (new > old) {
vm_size_t diff;
@@ -100,16 +104,19 @@ obreak(p, uap)
rv = vm_map_find(&vm->vm_map, NULL, 0, &old, diff, FALSE,
VM_PROT_ALL, VM_PROT_ALL, 0);
if (rv != KERN_SUCCESS) {
+ mtx_unlock(&vm_mtx);
return (ENOMEM);
}
vm->vm_dsize += btoc(diff);
} else if (new < old) {
rv = vm_map_remove(&vm->vm_map, new, old);
if (rv != KERN_SUCCESS) {
+ mtx_unlock(&vm_mtx);
return (ENOMEM);
}
vm->vm_dsize -= btoc(old - new);
}
+ mtx_unlock(&vm_mtx);
return (0);
}
diff --git a/sys/vm/vm_zone.c b/sys/vm/vm_zone.c
index 4cddadc..30fadbe 100644
--- a/sys/vm/vm_zone.c
+++ b/sys/vm/vm_zone.c
@@ -137,6 +137,7 @@ zinitna(vm_zone_t z, vm_object_t obj, char *name, int size,
* in pages as needed.
*/
if (z->zflags & ZONE_INTERRUPT) {
+ int hadvmlock;
totsize = round_page(z->zsize * nentries);
atomic_add_int(&zone_kmem_kvaspace, totsize);
@@ -145,12 +146,17 @@ zinitna(vm_zone_t z, vm_object_t obj, char *name, int size,
return 0;
z->zpagemax = totsize / PAGE_SIZE;
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
if (obj == NULL) {
z->zobj = vm_object_allocate(OBJT_DEFAULT, z->zpagemax);
} else {
z->zobj = obj;
_vm_object_allocate(OBJT_DEFAULT, z->zpagemax, obj);
}
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
z->zallocflag = VM_ALLOC_INTERRUPT;
z->zmax += nentries;
} else {
@@ -262,7 +268,6 @@ _zget(vm_zone_t z)
void *item;
KASSERT(z != NULL, ("invalid zone"));
- mtx_assert(&z->zmtx, MA_OWNED);
if (z->zflags & ZONE_INTERRUPT) {
item = (char *) z->zkva + z->zpagecount * PAGE_SIZE;
@@ -299,16 +304,13 @@ _zget(vm_zone_t z)
* We can wait, so just do normal map allocation in the appropriate
* map.
*/
+ mtx_unlock(&z->zmtx);
if (lockstatus(&kernel_map->lock, NULL)) {
- mtx_unlock(&z->zmtx);
item = (void *) kmem_malloc(kmem_map, nbytes, M_WAITOK);
- mtx_lock(&z->zmtx);
if (item != NULL)
atomic_add_int(&zone_kmem_pages, z->zalloc);
} else {
- mtx_unlock(&z->zmtx);
item = (void *) kmem_alloc(kernel_map, nbytes);
- mtx_lock(&z->zmtx);
if (item != NULL)
atomic_add_int(&zone_kern_pages, z->zalloc);
}
@@ -318,6 +320,7 @@ _zget(vm_zone_t z)
nbytes = 0;
}
nitems = nbytes / z->zsize;
+ mtx_lock(&z->zmtx);
}
z->ztotal += nitems;
@@ -361,14 +364,17 @@ void *
zalloc(vm_zone_t z)
{
void *item;
+ int hadvmlock;
KASSERT(z != NULL, ("invalid zone"));
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
mtx_lock(&z->zmtx);
if (z->zfreecnt <= z->zfreemin) {
item = _zget(z);
- mtx_unlock(&z->zmtx);
- return item;
+ goto out;
}
item = z->zitems;
@@ -381,8 +387,11 @@ zalloc(vm_zone_t z)
z->zfreecnt--;
z->znalloc++;
-
+
+out:
mtx_unlock(&z->zmtx);
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
return item;
}
@@ -392,8 +401,13 @@ zalloc(vm_zone_t z)
void
zfree(vm_zone_t z, void *item)
{
+ int hadvmlock;
+
KASSERT(z != NULL, ("invalid zone"));
KASSERT(item != NULL, ("invalid item"));
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
mtx_lock(&z->zmtx);
((void **) item)[0] = z->zitems;
@@ -405,6 +419,8 @@ zfree(vm_zone_t z, void *item)
z->zitems = item;
z->zfreecnt++;
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
mtx_unlock(&z->zmtx);
}
diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c
index e9400b8..12763c8 100644
--- a/sys/vm/vnode_pager.c
+++ b/sys/vm/vnode_pager.c
@@ -103,6 +103,7 @@ vnode_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
vm_object_t object;
struct vnode *vp;
+ mtx_assert(&vm_mtx, MA_OWNED);
/*
* Pageout to vnode, no can do yet.
*/
@@ -122,11 +123,15 @@ vnode_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
* Prevent race condition when allocating the object. This
* can happen with NFS vnodes since the nfsnode isn't locked.
*/
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
while (vp->v_flag & VOLOCK) {
vp->v_flag |= VOWANT;
tsleep(vp, PVM, "vnpobj", 0);
}
vp->v_flag |= VOLOCK;
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
/*
* If the object is being terminated, wait for it to
@@ -134,7 +139,7 @@ vnode_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
*/
while (((object = vp->v_object) != NULL) &&
(object->flags & OBJ_DEAD)) {
- tsleep(object, PVM, "vadead", 0);
+ msleep(object, &vm_mtx, PVM, "vadead", 0);
}
if (vp->v_usecount == 0)
@@ -157,11 +162,15 @@ vnode_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
vp->v_usecount++;
}
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
vp->v_flag &= ~VOLOCK;
if (vp->v_flag & VOWANT) {
vp->v_flag &= ~VOWANT;
wakeup(vp);
}
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
return (object);
}
@@ -221,8 +230,12 @@ vnode_pager_haspage(object, pindex, before, after)
blocksperpage = (PAGE_SIZE / bsize);
reqblock = pindex * blocksperpage;
}
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
err = VOP_BMAP(vp, reqblock, (struct vnode **) 0, &bn,
after, before);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
if (err)
return TRUE;
if ( bn == -1)
@@ -285,6 +298,11 @@ vnode_pager_setsize(vp, nsize)
* File has shrunk. Toss any cached pages beyond the new EOF.
*/
if (nsize < object->un_pager.vnp.vnp_size) {
+ int hadvmlock;
+
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
vm_freeze_copyopts(object, OFF_TO_IDX(nsize), object->size);
if (nobjsize < object->size) {
vm_object_page_remove(object, nobjsize, object->size,
@@ -325,6 +343,8 @@ vnode_pager_setsize(vp, nsize)
m->dirty = VM_PAGE_BITS_ALL;
}
}
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
}
object->un_pager.vnp.vnp_size = nsize;
object->size = nobjsize;
@@ -542,8 +562,8 @@ vnode_pager_input_old(object, m)
*/
/*
- * EOPNOTSUPP is no longer legal. For local media VFS's that do not
- * implement their own VOP_GETPAGES, their VOP_GETPAGES should call to
+ * Local media VFS's that do not implement their own VOP_GETPAGES
+ * should have their VOP_GETPAGES should call to
* vnode_pager_generic_getpages() to implement the previous behaviour.
*
* All other FS's should use the bypass to get to the local media
@@ -560,16 +580,11 @@ vnode_pager_getpages(object, m, count, reqpage)
struct vnode *vp;
int bytes = count * PAGE_SIZE;
+ mtx_assert(&vm_mtx, MA_OWNED);
vp = object->handle;
- /*
- * XXX temporary diagnostic message to help track stale FS code,
- * Returning EOPNOTSUPP from here may make things unhappy.
- */
rtval = VOP_GETPAGES(vp, m, bytes, reqpage, 0);
- if (rtval == EOPNOTSUPP) {
- printf("vnode_pager: *** WARNING *** stale FS getpages\n");
- rtval = vnode_pager_generic_getpages( vp, m, bytes, reqpage);
- }
+ KASSERT(rtval != EOPNOTSUPP,
+ ("vnode_pager: FS getpages not implemented\n"));
return rtval;
}
@@ -891,13 +906,19 @@ vnode_pager_putpages(object, m, count, sync, rtvals)
vp = object->handle;
if (vp->v_type != VREG)
mp = NULL;
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
(void)vn_start_write(vp, &mp, V_WAIT);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
rtval = VOP_PUTPAGES(vp, m, bytes, sync, rtvals, 0);
- if (rtval == EOPNOTSUPP) {
- printf("vnode_pager: *** WARNING *** stale FS putpages\n");
- rtval = vnode_pager_generic_putpages( vp, m, bytes, sync, rtvals);
- }
+ KASSERT(rtval != EOPNOTSUPP,
+ ("vnode_pager: stale FS putpages\n"));
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
vn_finished_write(mp);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
}
@@ -1000,6 +1021,8 @@ vnode_pager_lock(object)
{
struct proc *p = curproc; /* XXX */
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
+ mtx_assert(&Giant, MA_OWNED);
for (; object != NULL; object = object->backing_object) {
if (object->type != OBJT_VNODE)
continue;
OpenPOWER on IntegriCloud