summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/amd64/amd64/busdma_machdep.c2
-rw-r--r--sys/amd64/amd64/machdep.c4
-rw-r--r--sys/amd64/amd64/mem.c18
-rw-r--r--sys/amd64/amd64/pmap.c1
-rw-r--r--sys/amd64/amd64/trap.c13
-rw-r--r--sys/amd64/amd64/vm_machdep.c22
-rw-r--r--sys/coda/coda_namecache.c1
-rw-r--r--sys/compat/linprocfs/linprocfs.c1
-rw-r--r--sys/compat/linprocfs/linprocfs_misc.c1
-rw-r--r--sys/compat/pecoff/imgact_pecoff.c1
-rw-r--r--sys/dev/agp/agp.c1
-rw-r--r--sys/dev/agp/agp_ali.c1
-rw-r--r--sys/dev/agp/agp_amd.c1
-rw-r--r--sys/dev/agp/agp_i810.c1
-rw-r--r--sys/dev/agp/agp_intel.c1
-rw-r--r--sys/dev/agp/agp_sis.c1
-rw-r--r--sys/dev/agp/agp_via.c1
-rw-r--r--sys/dev/md/md.c1
-rw-r--r--sys/fs/coda/coda_namecache.c1
-rw-r--r--sys/fs/procfs/procfs_map.c1
-rw-r--r--sys/fs/procfs/procfs_mem.c12
-rw-r--r--sys/fs/specfs/spec_vnops.c4
-rw-r--r--sys/fs/unionfs/union_subr.c1
-rw-r--r--sys/i386/i386/busdma_machdep.c2
-rw-r--r--sys/i386/i386/machdep.c4
-rw-r--r--sys/i386/i386/mem.c18
-rw-r--r--sys/i386/i386/pmap.c1
-rw-r--r--sys/i386/i386/trap.c13
-rw-r--r--sys/i386/i386/vm_machdep.c22
-rw-r--r--sys/i386/linux/linux_sysvec.c1
-rw-r--r--sys/kern/imgact_aout.c8
-rw-r--r--sys/kern/imgact_elf.c21
-rw-r--r--sys/kern/init_main.c4
-rw-r--r--sys/kern/kern_exec.c7
-rw-r--r--sys/kern/kern_exit.c2
-rw-r--r--sys/kern/kern_fork.c2
-rw-r--r--sys/kern/kern_resource.c2
-rw-r--r--sys/kern/kern_synch.c7
-rw-r--r--sys/kern/link_elf.c8
-rw-r--r--sys/kern/link_elf_obj.c8
-rw-r--r--sys/kern/subr_blist.c1
-rw-r--r--sys/kern/subr_trap.c13
-rw-r--r--sys/kern/sys_pipe.c11
-rw-r--r--sys/kern/syscalls.master34
-rw-r--r--sys/kern/sysv_shm.c6
-rw-r--r--sys/kern/vfs_bio.c105
-rw-r--r--sys/kern/vfs_cluster.c10
-rw-r--r--sys/kern/vfs_default.c16
-rw-r--r--sys/kern/vfs_extattr.c7
-rw-r--r--sys/kern/vfs_subr.c15
-rw-r--r--sys/kern/vfs_syscalls.c7
-rw-r--r--sys/miscfs/procfs/procfs_map.c1
-rw-r--r--sys/miscfs/procfs/procfs_mem.c12
-rw-r--r--sys/miscfs/specfs/spec_vnops.c4
-rw-r--r--sys/miscfs/union/union_subr.c1
-rw-r--r--sys/nfs/nfs_bio.c31
-rw-r--r--sys/nfs/nfs_common.c2
-rw-r--r--sys/nfs/nfs_subs.c2
-rw-r--r--sys/nfsclient/nfs_bio.c31
-rw-r--r--sys/nfsclient/nfs_subs.c2
-rw-r--r--sys/nfsserver/nfs_srvsubs.c2
-rw-r--r--sys/pci/agp.c1
-rw-r--r--sys/pci/agp_ali.c1
-rw-r--r--sys/pci/agp_amd.c1
-rw-r--r--sys/pci/agp_i810.c1
-rw-r--r--sys/pci/agp_intel.c1
-rw-r--r--sys/pci/agp_sis.c1
-rw-r--r--sys/pci/agp_via.c1
-rw-r--r--sys/ufs/ufs/ufs_readwrite.c47
-rw-r--r--sys/vm/default_pager.c2
-rw-r--r--sys/vm/phys_pager.c16
-rw-r--r--sys/vm/swap_pager.c77
-rw-r--r--sys/vm/vm.h4
-rw-r--r--sys/vm/vm_fault.c65
-rw-r--r--sys/vm/vm_glue.c38
-rw-r--r--sys/vm/vm_init.c8
-rw-r--r--sys/vm/vm_kern.c74
-rw-r--r--sys/vm/vm_map.c41
-rw-r--r--sys/vm/vm_map.h22
-rw-r--r--sys/vm/vm_meter.c6
-rw-r--r--sys/vm/vm_mmap.c60
-rw-r--r--sys/vm/vm_object.c58
-rw-r--r--sys/vm/vm_object.h35
-rw-r--r--sys/vm/vm_page.c123
-rw-r--r--sys/vm/vm_page.h31
-rw-r--r--sys/vm/vm_pageout.c19
-rw-r--r--sys/vm/vm_pager.c27
-rw-r--r--sys/vm/vm_pager.h16
-rw-r--r--sys/vm/vm_unix.c7
-rw-r--r--sys/vm/vm_zone.c32
-rw-r--r--sys/vm/vnode_pager.c53
91 files changed, 1149 insertions, 253 deletions
diff --git a/sys/amd64/amd64/busdma_machdep.c b/sys/amd64/amd64/busdma_machdep.c
index 63906dd..3dc9e76 100644
--- a/sys/amd64/amd64/busdma_machdep.c
+++ b/sys/amd64/amd64/busdma_machdep.c
@@ -31,6 +31,8 @@
#include <sys/malloc.h>
#include <sys/bus.h>
#include <sys/interrupt.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
#include <vm/vm.h>
#include <vm/vm_page.h>
diff --git a/sys/amd64/amd64/machdep.c b/sys/amd64/amd64/machdep.c
index bb552a3..e02569c1 100644
--- a/sys/amd64/amd64/machdep.c
+++ b/sys/amd64/amd64/machdep.c
@@ -264,6 +264,7 @@ cpu_startup(dummy)
/*
* Good {morning,afternoon,evening,night}.
*/
+ mtx_lock(&vm_mtx);
earlysetcpuclass();
startrtclock();
printcpuinfo();
@@ -397,6 +398,7 @@ again:
exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
(16*(ARG_MAX+(PAGE_SIZE*3))));
+ mtx_unlock(&vm_mtx);
/*
* XXX: Mbuf system machine-specific initializations should
* go here, if anywhere.
@@ -2075,9 +2077,11 @@ f00f_hack(void *unused) {
r_idt.rd_base = (int)new_idt;
lidt(&r_idt);
idt = new_idt;
+ mtx_lock(&vm_mtx);
if (vm_map_protect(kernel_map, tmp, tmp + PAGE_SIZE,
VM_PROT_READ, FALSE) != KERN_SUCCESS)
panic("vm_map_protect failed");
+ mtx_unlock(&vm_mtx);
return;
}
#endif /* defined(I586_CPU) && !NO_F00F_HACK */
diff --git a/sys/amd64/amd64/mem.c b/sys/amd64/amd64/mem.c
index a5a9135..8671530 100644
--- a/sys/amd64/amd64/mem.c
+++ b/sys/amd64/amd64/mem.c
@@ -50,6 +50,8 @@
#include <sys/fcntl.h>
#include <sys/ioccom.h>
#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
#include <sys/malloc.h>
#include <sys/memrange.h>
#include <sys/proc.h>
@@ -153,13 +155,17 @@ mmrw(dev_t dev, struct uio *uio, int flags)
case 0:
v = uio->uio_offset;
v &= ~PAGE_MASK;
+ mtx_lock(&vm_mtx);
pmap_kenter((vm_offset_t)ptvmmap, v);
+ mtx_unlock(&vm_mtx);
o = (int)uio->uio_offset & PAGE_MASK;
c = (u_int)(PAGE_SIZE - ((int)iov->iov_base & PAGE_MASK));
c = min(c, (u_int)(PAGE_SIZE - o));
c = min(c, (u_int)iov->iov_len);
error = uiomove((caddr_t)&ptvmmap[o], (int)c, uio);
+ mtx_lock(&vm_mtx);
pmap_kremove((vm_offset_t)ptvmmap);
+ mtx_unlock(&vm_mtx);
continue;
/* minor device 1 is kernel memory */
@@ -177,14 +183,20 @@ mmrw(dev_t dev, struct uio *uio, int flags)
return EFAULT;
if (eaddr >= (vm_offset_t)VADDR(APTDPTDI, 0))
return EFAULT;
+ mtx_lock(&vm_mtx);
for (; addr < eaddr; addr += PAGE_SIZE)
- if (pmap_extract(kernel_pmap, addr) == 0)
+ if (pmap_extract(kernel_pmap, addr) == 0) {
+ mtx_unlock(&vm_mtx);
return EFAULT;
-
+ }
+
if (!kernacc((caddr_t)(int)uio->uio_offset, c,
uio->uio_rw == UIO_READ ?
- VM_PROT_READ : VM_PROT_WRITE))
+ VM_PROT_READ : VM_PROT_WRITE)) {
+ mtx_unlock(&vm_mtx);
return (EFAULT);
+ }
+ mtx_unlock(&vm_mtx);
error = uiomove((caddr_t)(int)uio->uio_offset, (int)c, uio);
continue;
}
diff --git a/sys/amd64/amd64/pmap.c b/sys/amd64/amd64/pmap.c
index 93807ee..488a8a5 100644
--- a/sys/amd64/amd64/pmap.c
+++ b/sys/amd64/amd64/pmap.c
@@ -75,6 +75,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <sys/mman.h>
#include <sys/msgbuf.h>
#include <sys/proc.h>
diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c
index af7bfc1..8924fa2 100644
--- a/sys/amd64/amd64/trap.c
+++ b/sys/amd64/amd64/trap.c
@@ -330,9 +330,7 @@ restart:
*/
eva = rcr2();
enable_intr();
- mtx_lock(&Giant);
i = trap_pfault(&frame, TRUE, eva);
- mtx_unlock(&Giant);
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
if (i == -2) {
/*
@@ -443,9 +441,7 @@ restart:
*/
eva = rcr2();
enable_intr();
- mtx_lock(&Giant);
(void) trap_pfault(&frame, FALSE, eva);
- mtx_unlock(&Giant);
goto out;
case T_DNA:
@@ -887,7 +883,9 @@ nogo:
frame->tf_eip = (int)PCPU_GET(curpcb)->pcb_onfault;
return (0);
}
+ mtx_lock(&Giant);
trap_fatal(frame, eva);
+ mtx_unlock(&Giant);
return (-1);
}
@@ -1147,14 +1145,17 @@ syscall(frame)
/*
* Try to run the syscall without the MP lock if the syscall
- * is MP safe. We have to obtain the MP lock no matter what if
- * we are ktracing
+ * is MP safe.
*/
if ((callp->sy_narg & SYF_MPSAFE) == 0) {
mtx_lock(&Giant);
}
#ifdef KTRACE
+ /*
+ * We have to obtain the MP lock no matter what if
+ * we are ktracing
+ */
if (KTRPOINT(p, KTR_SYSCALL)) {
if (!mtx_owned(&Giant))
mtx_lock(&Giant);
diff --git a/sys/amd64/amd64/vm_machdep.c b/sys/amd64/amd64/vm_machdep.c
index fd626a3..eda2386 100644
--- a/sys/amd64/amd64/vm_machdep.c
+++ b/sys/amd64/amd64/vm_machdep.c
@@ -290,11 +290,14 @@ void
cpu_wait(p)
struct proc *p;
{
+
+ mtx_lock(&vm_mtx);
/* drop per-process resources */
pmap_dispose_proc(p);
/* and clean-out the vmspace */
vmspace_free(p->p_vmspace);
+ mtx_unlock(&vm_mtx);
}
/*
@@ -376,6 +379,7 @@ vmapbuf(bp)
if ((bp->b_flags & B_PHYS) == 0)
panic("vmapbuf");
+ mtx_lock(&vm_mtx);
for (v = bp->b_saveaddr, addr = (caddr_t)trunc_page((vm_offset_t)bp->b_data);
addr < bp->b_data + bp->b_bufsize;
addr += PAGE_SIZE, v += PAGE_SIZE) {
@@ -391,6 +395,7 @@ vmapbuf(bp)
vm_page_hold(PHYS_TO_VM_PAGE(pa));
pmap_kenter((vm_offset_t) v, pa);
}
+ mtx_unlock(&vm_mtx);
kva = bp->b_saveaddr;
bp->b_saveaddr = bp->b_data;
@@ -411,6 +416,7 @@ vunmapbuf(bp)
if ((bp->b_flags & B_PHYS) == 0)
panic("vunmapbuf");
+ mtx_lock(&vm_mtx);
for (addr = (caddr_t)trunc_page((vm_offset_t)bp->b_data);
addr < bp->b_data + bp->b_bufsize;
addr += PAGE_SIZE) {
@@ -418,6 +424,7 @@ vunmapbuf(bp)
pmap_kremove((vm_offset_t) addr);
vm_page_unhold(PHYS_TO_VM_PAGE(pa));
}
+ mtx_unlock(&vm_mtx);
bp->b_data = bp->b_saveaddr;
}
@@ -574,12 +581,17 @@ vm_page_zero_idle()
* pages because doing so may flush our L1 and L2 caches too much.
*/
- if (zero_state && vm_page_zero_count >= ZIDLE_LO(cnt.v_free_count))
+ if (mtx_trylock(&vm_mtx) == 0)
+ return (0);
+ if (zero_state && vm_page_zero_count >= ZIDLE_LO(cnt.v_free_count)) {
+ mtx_unlock(&vm_mtx);
return(0);
- if (vm_page_zero_count >= ZIDLE_HI(cnt.v_free_count))
+ }
+ if (vm_page_zero_count >= ZIDLE_HI(cnt.v_free_count)) {
+ mtx_unlock(&vm_mtx);
return(0);
+ }
- if (mtx_trylock(&Giant)) {
s = splvm();
zero_state = 0;
m = vm_page_list_find(PQ_FREE, free_rover, FALSE);
@@ -602,10 +614,8 @@ vm_page_zero_idle()
}
free_rover = (free_rover + PQ_PRIME2) & PQ_L2_MASK;
splx(s);
- mtx_unlock(&Giant);
+ mtx_unlock(&vm_mtx);
return (1);
- }
- return (0);
}
/*
diff --git a/sys/coda/coda_namecache.c b/sys/coda/coda_namecache.c
index 3b73a67..9dfaf19 100644
--- a/sys/coda/coda_namecache.c
+++ b/sys/coda/coda_namecache.c
@@ -81,6 +81,7 @@
#include <sys/errno.h>
#include <sys/lock.h>
#include <sys/malloc.h>
+#include <sys/mutex.h>
#include <sys/ucred.h>
#include <vm/vm.h>
diff --git a/sys/compat/linprocfs/linprocfs.c b/sys/compat/linprocfs/linprocfs.c
index 004ec23..1a8e078 100644
--- a/sys/compat/linprocfs/linprocfs.c
+++ b/sys/compat/linprocfs/linprocfs.c
@@ -50,6 +50,7 @@
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
+#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/resourcevar.h>
#include <sys/sbuf.h>
diff --git a/sys/compat/linprocfs/linprocfs_misc.c b/sys/compat/linprocfs/linprocfs_misc.c
index 004ec23..1a8e078 100644
--- a/sys/compat/linprocfs/linprocfs_misc.c
+++ b/sys/compat/linprocfs/linprocfs_misc.c
@@ -50,6 +50,7 @@
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/malloc.h>
+#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/resourcevar.h>
#include <sys/sbuf.h>
diff --git a/sys/compat/pecoff/imgact_pecoff.c b/sys/compat/pecoff/imgact_pecoff.c
index 36b4288..f5cbfa8 100644
--- a/sys/compat/pecoff/imgact_pecoff.c
+++ b/sys/compat/pecoff/imgact_pecoff.c
@@ -49,6 +49,7 @@
#include <sys/imgact.h>
#include <sys/lock.h>
#include <sys/malloc.h>
+#include <sys/mutex.h>
#include <sys/mman.h>
#include <sys/namei.h>
#include <sys/proc.h>
diff --git a/sys/dev/agp/agp.c b/sys/dev/agp/agp.c
index 6419635..333c4c8 100644
--- a/sys/dev/agp/agp.c
+++ b/sys/dev/agp/agp.c
@@ -38,6 +38,7 @@
#include <sys/ioccom.h>
#include <sys/agpio.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <sys/proc.h>
#include <pci/pcivar.h>
diff --git a/sys/dev/agp/agp_ali.c b/sys/dev/agp/agp_ali.c
index 86e070e..aa805e1 100644
--- a/sys/dev/agp/agp_ali.c
+++ b/sys/dev/agp/agp_ali.c
@@ -35,6 +35,7 @@
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <pci/pcivar.h>
#include <pci/pcireg.h>
diff --git a/sys/dev/agp/agp_amd.c b/sys/dev/agp/agp_amd.c
index 0a498f7..4aaf4e9 100644
--- a/sys/dev/agp/agp_amd.c
+++ b/sys/dev/agp/agp_amd.c
@@ -35,6 +35,7 @@
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <pci/pcivar.h>
#include <pci/pcireg.h>
diff --git a/sys/dev/agp/agp_i810.c b/sys/dev/agp/agp_i810.c
index 79fd566..5c40493 100644
--- a/sys/dev/agp/agp_i810.c
+++ b/sys/dev/agp/agp_i810.c
@@ -36,6 +36,7 @@
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <pci/pcivar.h>
#include <pci/pcireg.h>
diff --git a/sys/dev/agp/agp_intel.c b/sys/dev/agp/agp_intel.c
index a4b9a43..dc1ef4d 100644
--- a/sys/dev/agp/agp_intel.c
+++ b/sys/dev/agp/agp_intel.c
@@ -35,6 +35,7 @@
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <pci/pcivar.h>
#include <pci/pcireg.h>
diff --git a/sys/dev/agp/agp_sis.c b/sys/dev/agp/agp_sis.c
index 1f1a50b..a6a20a4 100644
--- a/sys/dev/agp/agp_sis.c
+++ b/sys/dev/agp/agp_sis.c
@@ -35,6 +35,7 @@
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <pci/pcivar.h>
#include <pci/pcireg.h>
diff --git a/sys/dev/agp/agp_via.c b/sys/dev/agp/agp_via.c
index 983348e..086b027 100644
--- a/sys/dev/agp/agp_via.c
+++ b/sys/dev/agp/agp_via.c
@@ -35,6 +35,7 @@
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <pci/pcivar.h>
#include <pci/pcireg.h>
diff --git a/sys/dev/md/md.c b/sys/dev/md/md.c
index edf2890..ae783dd 100644
--- a/sys/dev/md/md.c
+++ b/sys/dev/md/md.c
@@ -71,6 +71,7 @@
#include <sys/linker.h>
#include <sys/lock.h>
#include <sys/malloc.h>
+#include <sys/mutex.h>
#include <sys/mdioctl.h>
#include <sys/namei.h>
#include <sys/proc.h>
diff --git a/sys/fs/coda/coda_namecache.c b/sys/fs/coda/coda_namecache.c
index 3b73a67..9dfaf19 100644
--- a/sys/fs/coda/coda_namecache.c
+++ b/sys/fs/coda/coda_namecache.c
@@ -81,6 +81,7 @@
#include <sys/errno.h>
#include <sys/lock.h>
#include <sys/malloc.h>
+#include <sys/mutex.h>
#include <sys/ucred.h>
#include <vm/vm.h>
diff --git a/sys/fs/procfs/procfs_map.c b/sys/fs/procfs/procfs_map.c
index 5e4a30c..5c21993 100644
--- a/sys/fs/procfs/procfs_map.c
+++ b/sys/fs/procfs/procfs_map.c
@@ -42,6 +42,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/vnode.h>
diff --git a/sys/fs/procfs/procfs_mem.c b/sys/fs/procfs/procfs_mem.c
index 3a2f8d2..1e28870 100644
--- a/sys/fs/procfs/procfs_mem.c
+++ b/sys/fs/procfs/procfs_mem.c
@@ -48,6 +48,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/ptrace.h>
#include <sys/user.h>
@@ -88,8 +89,14 @@ procfs_rwmem(curp, p, uio)
* usage in that process can be messed up.
*/
vm = p->p_vmspace;
- if ((p->p_flag & P_WEXIT) || (vm->vm_refcnt < 1))
+ if ((p->p_flag & P_WEXIT))
return EFAULT;
+
+ mtx_lock(&vm_mtx);
+ if (vm->vm_refcnt < 1) {
+ mtx_unlock(&vm_mtx);
+ return EFAULT;
+ }
++vm->vm_refcnt;
/*
* The map we want...
@@ -207,7 +214,9 @@ procfs_rwmem(curp, p, uio)
/*
* Now do the i/o move.
*/
+ mtx_unlock(&vm_mtx);
error = uiomove((caddr_t)(kva + page_offset), len, uio);
+ mtx_lock(&vm_mtx);
pmap_kremove(kva);
@@ -226,6 +235,7 @@ procfs_rwmem(curp, p, uio)
kmem_free(kernel_map, kva, PAGE_SIZE);
vmspace_free(vm);
+ mtx_unlock(&vm_mtx);
return (error);
}
diff --git a/sys/fs/specfs/spec_vnops.c b/sys/fs/specfs/spec_vnops.c
index 2940f40..ba81229 100644
--- a/sys/fs/specfs/spec_vnops.c
+++ b/sys/fs/specfs/spec_vnops.c
@@ -731,6 +731,8 @@ spec_getpages(ap)
cnt.v_vnodein++;
cnt.v_vnodepgsin += pcount;
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
/* Do the input. */
BUF_STRATEGY(bp);
@@ -741,6 +743,8 @@ spec_getpages(ap)
tsleep(bp, PVM, "spread", 0);
splx(s);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
if ((bp->b_ioflags & BIO_ERROR) != 0) {
if (bp->b_error)
diff --git a/sys/fs/unionfs/union_subr.c b/sys/fs/unionfs/union_subr.c
index 869818f..3ac98bf 100644
--- a/sys/fs/unionfs/union_subr.c
+++ b/sys/fs/unionfs/union_subr.c
@@ -45,6 +45,7 @@
#include <sys/filedesc.h>
#include <sys/kernel.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <sys/malloc.h>
#include <sys/module.h>
#include <sys/mount.h>
diff --git a/sys/i386/i386/busdma_machdep.c b/sys/i386/i386/busdma_machdep.c
index 63906dd..3dc9e76 100644
--- a/sys/i386/i386/busdma_machdep.c
+++ b/sys/i386/i386/busdma_machdep.c
@@ -31,6 +31,8 @@
#include <sys/malloc.h>
#include <sys/bus.h>
#include <sys/interrupt.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
#include <vm/vm.h>
#include <vm/vm_page.h>
diff --git a/sys/i386/i386/machdep.c b/sys/i386/i386/machdep.c
index bb552a3..e02569c1 100644
--- a/sys/i386/i386/machdep.c
+++ b/sys/i386/i386/machdep.c
@@ -264,6 +264,7 @@ cpu_startup(dummy)
/*
* Good {morning,afternoon,evening,night}.
*/
+ mtx_lock(&vm_mtx);
earlysetcpuclass();
startrtclock();
printcpuinfo();
@@ -397,6 +398,7 @@ again:
exec_map = kmem_suballoc(kernel_map, &minaddr, &maxaddr,
(16*(ARG_MAX+(PAGE_SIZE*3))));
+ mtx_unlock(&vm_mtx);
/*
* XXX: Mbuf system machine-specific initializations should
* go here, if anywhere.
@@ -2075,9 +2077,11 @@ f00f_hack(void *unused) {
r_idt.rd_base = (int)new_idt;
lidt(&r_idt);
idt = new_idt;
+ mtx_lock(&vm_mtx);
if (vm_map_protect(kernel_map, tmp, tmp + PAGE_SIZE,
VM_PROT_READ, FALSE) != KERN_SUCCESS)
panic("vm_map_protect failed");
+ mtx_unlock(&vm_mtx);
return;
}
#endif /* defined(I586_CPU) && !NO_F00F_HACK */
diff --git a/sys/i386/i386/mem.c b/sys/i386/i386/mem.c
index a5a9135..8671530 100644
--- a/sys/i386/i386/mem.c
+++ b/sys/i386/i386/mem.c
@@ -50,6 +50,8 @@
#include <sys/fcntl.h>
#include <sys/ioccom.h>
#include <sys/kernel.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
#include <sys/malloc.h>
#include <sys/memrange.h>
#include <sys/proc.h>
@@ -153,13 +155,17 @@ mmrw(dev_t dev, struct uio *uio, int flags)
case 0:
v = uio->uio_offset;
v &= ~PAGE_MASK;
+ mtx_lock(&vm_mtx);
pmap_kenter((vm_offset_t)ptvmmap, v);
+ mtx_unlock(&vm_mtx);
o = (int)uio->uio_offset & PAGE_MASK;
c = (u_int)(PAGE_SIZE - ((int)iov->iov_base & PAGE_MASK));
c = min(c, (u_int)(PAGE_SIZE - o));
c = min(c, (u_int)iov->iov_len);
error = uiomove((caddr_t)&ptvmmap[o], (int)c, uio);
+ mtx_lock(&vm_mtx);
pmap_kremove((vm_offset_t)ptvmmap);
+ mtx_unlock(&vm_mtx);
continue;
/* minor device 1 is kernel memory */
@@ -177,14 +183,20 @@ mmrw(dev_t dev, struct uio *uio, int flags)
return EFAULT;
if (eaddr >= (vm_offset_t)VADDR(APTDPTDI, 0))
return EFAULT;
+ mtx_lock(&vm_mtx);
for (; addr < eaddr; addr += PAGE_SIZE)
- if (pmap_extract(kernel_pmap, addr) == 0)
+ if (pmap_extract(kernel_pmap, addr) == 0) {
+ mtx_unlock(&vm_mtx);
return EFAULT;
-
+ }
+
if (!kernacc((caddr_t)(int)uio->uio_offset, c,
uio->uio_rw == UIO_READ ?
- VM_PROT_READ : VM_PROT_WRITE))
+ VM_PROT_READ : VM_PROT_WRITE)) {
+ mtx_unlock(&vm_mtx);
return (EFAULT);
+ }
+ mtx_unlock(&vm_mtx);
error = uiomove((caddr_t)(int)uio->uio_offset, (int)c, uio);
continue;
}
diff --git a/sys/i386/i386/pmap.c b/sys/i386/i386/pmap.c
index 93807ee..488a8a5 100644
--- a/sys/i386/i386/pmap.c
+++ b/sys/i386/i386/pmap.c
@@ -75,6 +75,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <sys/mman.h>
#include <sys/msgbuf.h>
#include <sys/proc.h>
diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c
index af7bfc1..8924fa2 100644
--- a/sys/i386/i386/trap.c
+++ b/sys/i386/i386/trap.c
@@ -330,9 +330,7 @@ restart:
*/
eva = rcr2();
enable_intr();
- mtx_lock(&Giant);
i = trap_pfault(&frame, TRUE, eva);
- mtx_unlock(&Giant);
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
if (i == -2) {
/*
@@ -443,9 +441,7 @@ restart:
*/
eva = rcr2();
enable_intr();
- mtx_lock(&Giant);
(void) trap_pfault(&frame, FALSE, eva);
- mtx_unlock(&Giant);
goto out;
case T_DNA:
@@ -887,7 +883,9 @@ nogo:
frame->tf_eip = (int)PCPU_GET(curpcb)->pcb_onfault;
return (0);
}
+ mtx_lock(&Giant);
trap_fatal(frame, eva);
+ mtx_unlock(&Giant);
return (-1);
}
@@ -1147,14 +1145,17 @@ syscall(frame)
/*
* Try to run the syscall without the MP lock if the syscall
- * is MP safe. We have to obtain the MP lock no matter what if
- * we are ktracing
+ * is MP safe.
*/
if ((callp->sy_narg & SYF_MPSAFE) == 0) {
mtx_lock(&Giant);
}
#ifdef KTRACE
+ /*
+ * We have to obtain the MP lock no matter what if
+ * we are ktracing
+ */
if (KTRPOINT(p, KTR_SYSCALL)) {
if (!mtx_owned(&Giant))
mtx_lock(&Giant);
diff --git a/sys/i386/i386/vm_machdep.c b/sys/i386/i386/vm_machdep.c
index fd626a3..eda2386 100644
--- a/sys/i386/i386/vm_machdep.c
+++ b/sys/i386/i386/vm_machdep.c
@@ -290,11 +290,14 @@ void
cpu_wait(p)
struct proc *p;
{
+
+ mtx_lock(&vm_mtx);
/* drop per-process resources */
pmap_dispose_proc(p);
/* and clean-out the vmspace */
vmspace_free(p->p_vmspace);
+ mtx_unlock(&vm_mtx);
}
/*
@@ -376,6 +379,7 @@ vmapbuf(bp)
if ((bp->b_flags & B_PHYS) == 0)
panic("vmapbuf");
+ mtx_lock(&vm_mtx);
for (v = bp->b_saveaddr, addr = (caddr_t)trunc_page((vm_offset_t)bp->b_data);
addr < bp->b_data + bp->b_bufsize;
addr += PAGE_SIZE, v += PAGE_SIZE) {
@@ -391,6 +395,7 @@ vmapbuf(bp)
vm_page_hold(PHYS_TO_VM_PAGE(pa));
pmap_kenter((vm_offset_t) v, pa);
}
+ mtx_unlock(&vm_mtx);
kva = bp->b_saveaddr;
bp->b_saveaddr = bp->b_data;
@@ -411,6 +416,7 @@ vunmapbuf(bp)
if ((bp->b_flags & B_PHYS) == 0)
panic("vunmapbuf");
+ mtx_lock(&vm_mtx);
for (addr = (caddr_t)trunc_page((vm_offset_t)bp->b_data);
addr < bp->b_data + bp->b_bufsize;
addr += PAGE_SIZE) {
@@ -418,6 +424,7 @@ vunmapbuf(bp)
pmap_kremove((vm_offset_t) addr);
vm_page_unhold(PHYS_TO_VM_PAGE(pa));
}
+ mtx_unlock(&vm_mtx);
bp->b_data = bp->b_saveaddr;
}
@@ -574,12 +581,17 @@ vm_page_zero_idle()
* pages because doing so may flush our L1 and L2 caches too much.
*/
- if (zero_state && vm_page_zero_count >= ZIDLE_LO(cnt.v_free_count))
+ if (mtx_trylock(&vm_mtx) == 0)
+ return (0);
+ if (zero_state && vm_page_zero_count >= ZIDLE_LO(cnt.v_free_count)) {
+ mtx_unlock(&vm_mtx);
return(0);
- if (vm_page_zero_count >= ZIDLE_HI(cnt.v_free_count))
+ }
+ if (vm_page_zero_count >= ZIDLE_HI(cnt.v_free_count)) {
+ mtx_unlock(&vm_mtx);
return(0);
+ }
- if (mtx_trylock(&Giant)) {
s = splvm();
zero_state = 0;
m = vm_page_list_find(PQ_FREE, free_rover, FALSE);
@@ -602,10 +614,8 @@ vm_page_zero_idle()
}
free_rover = (free_rover + PQ_PRIME2) & PQ_L2_MASK;
splx(s);
- mtx_unlock(&Giant);
+ mtx_unlock(&vm_mtx);
return (1);
- }
- return (0);
}
/*
diff --git a/sys/i386/linux/linux_sysvec.c b/sys/i386/linux/linux_sysvec.c
index 13c29f8..0734ba4 100644
--- a/sys/i386/linux/linux_sysvec.c
+++ b/sys/i386/linux/linux_sysvec.c
@@ -41,6 +41,7 @@
#include <sys/imgact_aout.h>
#include <sys/imgact_elf.h>
#include <sys/malloc.h>
+#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/signalvar.h>
#include <sys/sysent.h>
diff --git a/sys/kern/imgact_aout.c b/sys/kern/imgact_aout.c
index 9478eb3..8becda3 100644
--- a/sys/kern/imgact_aout.c
+++ b/sys/kern/imgact_aout.c
@@ -171,6 +171,7 @@ exec_aout_imgact(imgp)
if (error)
return (error);
+ mtx_lock(&vm_mtx);
/*
* Destroy old process VM and create a new one (with a new stack)
*/
@@ -184,7 +185,9 @@ exec_aout_imgact(imgp)
vp = imgp->vp;
map = &vmspace->vm_map;
vm_map_lock(map);
+ mtx_unlock(&vm_mtx);
VOP_GETVOBJECT(vp, &object);
+ mtx_lock(&vm_mtx);
vm_object_reference(object);
text_end = virtual_offset + a_out->a_text;
@@ -195,6 +198,7 @@ exec_aout_imgact(imgp)
MAP_COPY_ON_WRITE | MAP_PREFAULT);
if (error) {
vm_map_unlock(map);
+ mtx_unlock(&vm_mtx);
return (error);
}
data_end = text_end + a_out->a_data;
@@ -207,6 +211,7 @@ exec_aout_imgact(imgp)
MAP_COPY_ON_WRITE | MAP_PREFAULT);
if (error) {
vm_map_unlock(map);
+ mtx_unlock(&vm_mtx);
return (error);
}
}
@@ -217,6 +222,7 @@ exec_aout_imgact(imgp)
VM_PROT_ALL, VM_PROT_ALL, 0);
if (error) {
vm_map_unlock(map);
+ mtx_unlock(&vm_mtx);
return (error);
}
}
@@ -229,6 +235,8 @@ exec_aout_imgact(imgp)
vmspace->vm_daddr = (caddr_t) (uintptr_t)
(virtual_offset + a_out->a_text);
+ mtx_unlock(&vm_mtx);
+
/* Fill in image_params */
imgp->interpreted = 0;
imgp->entry_addr = a_out->a_entry;
diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c
index da7b9cb..2a15e9c 100644
--- a/sys/kern/imgact_elf.c
+++ b/sys/kern/imgact_elf.c
@@ -230,6 +230,7 @@ elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_o
else
map_len = round_page(offset+filsz) - file_addr;
+ mtx_lock(&vm_mtx);
if (map_len != 0) {
vm_object_reference(object);
vm_map_lock(&vmspace->vm_map);
@@ -244,12 +245,15 @@ elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_o
vm_map_unlock(&vmspace->vm_map);
if (rv != KERN_SUCCESS) {
vm_object_deallocate(object);
+ mtx_unlock(&vm_mtx);
return EINVAL;
}
/* we can stop now if we've covered it all */
- if (memsz == filsz)
+ if (memsz == filsz) {
+ mtx_unlock(&vm_mtx);
return 0;
+ }
}
@@ -270,8 +274,10 @@ elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_o
map_addr, map_addr + map_len,
VM_PROT_ALL, VM_PROT_ALL, 0);
vm_map_unlock(&vmspace->vm_map);
- if (rv != KERN_SUCCESS)
+ if (rv != KERN_SUCCESS) {
+ mtx_unlock(&vm_mtx);
return EINVAL;
+ }
}
if (copy_len != 0) {
@@ -287,14 +293,19 @@ elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_o
MAP_COPY_ON_WRITE | MAP_PREFAULT_PARTIAL);
if (rv != KERN_SUCCESS) {
vm_object_deallocate(object);
+ mtx_unlock(&vm_mtx);
return EINVAL;
}
/* send the page fragment to user space */
+ mtx_unlock(&vm_mtx);
error = copyout((caddr_t)data_buf, (caddr_t)map_addr, copy_len);
+ mtx_lock(&vm_mtx);
vm_map_remove(exec_map, data_buf, data_buf + PAGE_SIZE);
- if (error)
+ if (error) {
+ mtx_unlock(&vm_mtx);
return (error);
+ }
}
/*
@@ -303,6 +314,7 @@ elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_o
vm_map_protect(&vmspace->vm_map, map_addr, map_addr + map_len, prot,
FALSE);
+ mtx_unlock(&vm_mtx);
return error;
}
@@ -498,9 +510,11 @@ exec_elf_imgact(struct image_params *imgp)
if ((error = exec_extract_strings(imgp)) != 0)
goto fail;
+ mtx_lock(&vm_mtx);
exec_new_vmspace(imgp);
vmspace = imgp->proc->p_vmspace;
+ mtx_unlock(&vm_mtx);
for (i = 0; i < hdr->e_phnum; i++) {
switch(phdr[i].p_type) {
@@ -557,6 +571,7 @@ exec_elf_imgact(struct image_params *imgp)
}
}
+ /* XXX: lock the vm_mtx when twiddling vmspace? */
vmspace->vm_tsize = text_size >> PAGE_SHIFT;
vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr;
vmspace->vm_dsize = data_size >> PAGE_SHIFT;
diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
index f1a6a0b..6f5c653 100644
--- a/sys/kern/init_main.c
+++ b/sys/kern/init_main.c
@@ -485,11 +485,15 @@ start_init(void *dummy)
* Need just enough stack to hold the faked-up "execve()" arguments.
*/
addr = trunc_page(USRSTACK - PAGE_SIZE);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
if (vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &addr, PAGE_SIZE,
FALSE, VM_PROT_ALL, VM_PROT_ALL, 0) != 0)
panic("init: couldn't allocate argument space");
p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
p->p_vmspace->vm_ssize = 1;
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
if ((var = getenv("init_path")) != NULL) {
strncpy(init_path, var, sizeof init_path);
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index 0b1b29e..8f49538 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -412,6 +412,7 @@ exec_map_first_page(imgp)
VOP_GETVOBJECT(imgp->vp, &object);
s = splvm();
+ mtx_lock(&vm_mtx);
ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
@@ -443,6 +444,7 @@ exec_map_first_page(imgp)
vm_page_free(ma[0]);
}
splx(s);
+ mtx_unlock(&vm_mtx);
return EIO;
}
}
@@ -454,6 +456,7 @@ exec_map_first_page(imgp)
pmap_kenter((vm_offset_t) imgp->image_header, VM_PAGE_TO_PHYS(ma[0]));
imgp->firstpage = ma[0];
+ mtx_unlock(&vm_mtx);
return 0;
}
@@ -461,9 +464,12 @@ void
exec_unmap_first_page(imgp)
struct image_params *imgp;
{
+
if (imgp->firstpage) {
+ mtx_lock(&vm_mtx);
pmap_kremove((vm_offset_t) imgp->image_header);
vm_page_unwire(imgp->firstpage, 1);
+ mtx_unlock(&vm_mtx);
imgp->firstpage = NULL;
}
}
@@ -482,6 +488,7 @@ exec_new_vmspace(imgp)
caddr_t stack_addr = (caddr_t) (USRSTACK - MAXSSIZ);
vm_map_t map = &vmspace->vm_map;
+ mtx_assert(&vm_mtx, MA_OWNED);
imgp->vmspace_destroyed = 1;
/*
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
index d5dccab..1af27d2 100644
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c
@@ -222,6 +222,7 @@ exit1(p, rv)
* Can't free the entire vmspace as the kernel stack
* may be mapped within that space also.
*/
+ mtx_lock(&vm_mtx);
if (vm->vm_refcnt == 1) {
if (vm->vm_shm)
shmexit(p);
@@ -230,6 +231,7 @@ exit1(p, rv)
(void) vm_map_remove(&vm->vm_map, VM_MIN_ADDRESS,
VM_MAXUSER_ADDRESS);
}
+ mtx_unlock(&vm_mtx);
PROC_LOCK(p);
if (SESS_LEADER(p)) {
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index d3b991d..62dcc06 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -220,6 +220,7 @@ fork1(p1, flags, procp)
if ((flags & RFPROC) == 0) {
vm_fork(p1, 0, flags);
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
/*
* Close all file descriptors.
@@ -567,6 +568,7 @@ again:
* execution path later. (ie: directly into user mode)
*/
vm_fork(p1, p2, flags);
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
if (flags == (RFFDG | RFPROC)) {
cnt.v_forks++;
diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c
index 27431ab..f46313c 100644
--- a/sys/kern/kern_resource.c
+++ b/sys/kern/kern_resource.c
@@ -498,8 +498,10 @@ dosetrlimit(p, which, limp)
}
addr = trunc_page(addr);
size = round_page(size);
+ mtx_lock(&vm_mtx);
(void) vm_map_protect(&p->p_vmspace->vm_map,
addr, addr+size, prot, FALSE);
+ mtx_unlock(&vm_mtx);
}
break;
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index 7d793de..e09a377 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c
@@ -378,6 +378,13 @@ msleep(ident, mtx, priority, wmesg, timo)
int rval = 0;
WITNESS_SAVE_DECL(mtx);
+ KASSERT(ident == &proc0 || /* XXX: swapper */
+ timo != 0 || /* XXX: we might still miss a wakeup */
+ mtx_owned(&Giant) || mtx != NULL,
+ ("indefinite sleep without mutex, wmesg: \"%s\" ident: %p",
+ wmesg, ident));
+ if (mtx_owned(&vm_mtx) && mtx != &vm_mtx)
+ panic("sleeping with vm_mtx held.");
#ifdef KTRACE
if (p && KTRPOINT(p, KTR_CSW))
ktrcsw(p->p_tracep, 1, 0);
diff --git a/sys/kern/link_elf.c b/sys/kern/link_elf.c
index 344f163..613d1e4 100644
--- a/sys/kern/link_elf.c
+++ b/sys/kern/link_elf.c
@@ -653,8 +653,10 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu
ef = (elf_file_t) lf;
#ifdef SPARSE_MAPPING
+ mtx_lock(&vm_mtx);
ef->object = vm_object_allocate(OBJT_DEFAULT, mapsize >> PAGE_SHIFT);
if (ef->object == NULL) {
+ mtx_unlock(&vm_mtx);
free(ef, M_LINKER);
error = ENOMEM;
goto out;
@@ -667,9 +669,11 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu
VM_PROT_ALL, VM_PROT_ALL, 0);
if (error) {
vm_object_deallocate(ef->object);
+ mtx_unlock(&vm_mtx);
ef->object = 0;
goto out;
}
+ mtx_unlock(&vm_mtx);
#else
ef->address = malloc(mapsize, M_LINKER, M_WAITOK);
if (!ef->address) {
@@ -697,10 +701,12 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu
/*
* Wire down the pages
*/
+ mtx_lock(&vm_mtx);
vm_map_pageable(kernel_map,
(vm_offset_t) segbase,
(vm_offset_t) segbase + segs[i]->p_memsz,
FALSE);
+ mtx_unlock(&vm_mtx);
#endif
}
@@ -824,10 +830,12 @@ link_elf_unload_file(linker_file_t file)
}
#ifdef SPARSE_MAPPING
if (ef->object) {
+ mtx_lock(&vm_mtx);
vm_map_remove(kernel_map, (vm_offset_t) ef->address,
(vm_offset_t) ef->address
+ (ef->object->size << PAGE_SHIFT));
vm_object_deallocate(ef->object);
+ mtx_unlock(&vm_mtx);
}
#else
if (ef->address)
diff --git a/sys/kern/link_elf_obj.c b/sys/kern/link_elf_obj.c
index 344f163..613d1e4 100644
--- a/sys/kern/link_elf_obj.c
+++ b/sys/kern/link_elf_obj.c
@@ -653,8 +653,10 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu
ef = (elf_file_t) lf;
#ifdef SPARSE_MAPPING
+ mtx_lock(&vm_mtx);
ef->object = vm_object_allocate(OBJT_DEFAULT, mapsize >> PAGE_SHIFT);
if (ef->object == NULL) {
+ mtx_unlock(&vm_mtx);
free(ef, M_LINKER);
error = ENOMEM;
goto out;
@@ -667,9 +669,11 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu
VM_PROT_ALL, VM_PROT_ALL, 0);
if (error) {
vm_object_deallocate(ef->object);
+ mtx_unlock(&vm_mtx);
ef->object = 0;
goto out;
}
+ mtx_unlock(&vm_mtx);
#else
ef->address = malloc(mapsize, M_LINKER, M_WAITOK);
if (!ef->address) {
@@ -697,10 +701,12 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu
/*
* Wire down the pages
*/
+ mtx_lock(&vm_mtx);
vm_map_pageable(kernel_map,
(vm_offset_t) segbase,
(vm_offset_t) segbase + segs[i]->p_memsz,
FALSE);
+ mtx_unlock(&vm_mtx);
#endif
}
@@ -824,10 +830,12 @@ link_elf_unload_file(linker_file_t file)
}
#ifdef SPARSE_MAPPING
if (ef->object) {
+ mtx_lock(&vm_mtx);
vm_map_remove(kernel_map, (vm_offset_t) ef->address,
(vm_offset_t) ef->address
+ (ef->object->size << PAGE_SHIFT));
vm_object_deallocate(ef->object);
+ mtx_unlock(&vm_mtx);
}
#else
if (ef->address)
diff --git a/sys/kern/subr_blist.c b/sys/kern/subr_blist.c
index 9ac4338..061d151 100644
--- a/sys/kern/subr_blist.c
+++ b/sys/kern/subr_blist.c
@@ -71,6 +71,7 @@
#include <sys/kernel.h>
#include <sys/blist.h>
#include <sys/malloc.h>
+#include <sys/mutex.h>
#include <vm/vm.h>
#include <vm/vm_object.h>
#include <vm/vm_kern.h>
diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c
index af7bfc1..8924fa2 100644
--- a/sys/kern/subr_trap.c
+++ b/sys/kern/subr_trap.c
@@ -330,9 +330,7 @@ restart:
*/
eva = rcr2();
enable_intr();
- mtx_lock(&Giant);
i = trap_pfault(&frame, TRUE, eva);
- mtx_unlock(&Giant);
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
if (i == -2) {
/*
@@ -443,9 +441,7 @@ restart:
*/
eva = rcr2();
enable_intr();
- mtx_lock(&Giant);
(void) trap_pfault(&frame, FALSE, eva);
- mtx_unlock(&Giant);
goto out;
case T_DNA:
@@ -887,7 +883,9 @@ nogo:
frame->tf_eip = (int)PCPU_GET(curpcb)->pcb_onfault;
return (0);
}
+ mtx_lock(&Giant);
trap_fatal(frame, eva);
+ mtx_unlock(&Giant);
return (-1);
}
@@ -1147,14 +1145,17 @@ syscall(frame)
/*
* Try to run the syscall without the MP lock if the syscall
- * is MP safe. We have to obtain the MP lock no matter what if
- * we are ktracing
+ * is MP safe.
*/
if ((callp->sy_narg & SYF_MPSAFE) == 0) {
mtx_lock(&Giant);
}
#ifdef KTRACE
+ /*
+ * We have to obtain the MP lock no matter what if
+ * we are ktracing
+ */
if (KTRPOINT(p, KTR_SYSCALL)) {
if (!mtx_owned(&Giant))
mtx_lock(&Giant);
diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c
index 0c32402..a788448 100644
--- a/sys/kern/sys_pipe.c
+++ b/sys/kern/sys_pipe.c
@@ -56,6 +56,7 @@
#include <sys/filedesc.h>
#include <sys/filio.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <sys/ttycom.h>
#include <sys/stat.h>
#include <sys/poll.h>
@@ -253,6 +254,7 @@ pipespace(cpipe, size)
* kernel_object.
* XXX -- minor change needed here for NetBSD/OpenBSD VM systems.
*/
+ mtx_lock(&vm_mtx);
object = vm_object_allocate(OBJT_DEFAULT, npages);
buffer = (caddr_t) vm_map_min(kernel_map);
@@ -264,6 +266,7 @@ pipespace(cpipe, size)
error = vm_map_find(kernel_map, object, 0,
(vm_offset_t *) &buffer, size, 1,
VM_PROT_ALL, VM_PROT_ALL, 0);
+ mtx_unlock(&vm_mtx);
if (error != KERN_SUCCESS) {
vm_object_deallocate(object);
@@ -551,6 +554,7 @@ pipe_build_write_buffer(wpipe, uio)
size = wpipe->pipe_buffer.size;
endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size);
+ mtx_lock(&vm_mtx);
addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base);
for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) {
vm_page_t m;
@@ -561,6 +565,7 @@ pipe_build_write_buffer(wpipe, uio)
for (j = 0; j < i; j++)
vm_page_unwire(wpipe->pipe_map.ms[j], 1);
+ mtx_unlock(&vm_mtx);
return (EFAULT);
}
@@ -592,6 +597,7 @@ pipe_build_write_buffer(wpipe, uio)
pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms,
wpipe->pipe_map.npages);
+ mtx_unlock(&vm_mtx);
/*
* and update the uio data
*/
@@ -625,8 +631,10 @@ pipe_destroy_write_buffer(wpipe)
amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE;
}
}
+ mtx_lock(&vm_mtx);
for (i = 0; i < wpipe->pipe_map.npages; i++)
vm_page_unwire(wpipe->pipe_map.ms[i], 1);
+ mtx_unlock(&vm_mtx);
}
/*
@@ -1199,12 +1207,13 @@ pipeclose(cpipe)
wakeup(ppipe);
ppipe->pipe_peer = NULL;
}
-
/*
* free resources
*/
+ mtx_lock(&vm_mtx);
pipe_free_kmem(cpipe);
zfree(pipe_zone, cpipe);
+ mtx_unlock(&vm_mtx);
}
}
diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master
index 32255bc..269814c 100644
--- a/sys/kern/syscalls.master
+++ b/sys/kern/syscalls.master
@@ -61,7 +61,7 @@
14 STD POSIX { int mknod(char *path, int mode, int dev); }
15 STD POSIX { int chmod(char *path, int mode); }
16 STD POSIX { int chown(char *path, int uid, int gid); }
-17 STD BSD { int obreak(char *nsize); } break obreak_args int
+17 MPSAFE STD BSD { int obreak(char *nsize); } break obreak_args int
18 STD BSD { int getfsstat(struct statfs *buf, long bufsize, \
int flags); }
19 COMPAT POSIX { long lseek(int fd, long offset, int whence); }
@@ -121,23 +121,23 @@
62 COMPAT POSIX { int fstat(int fd, struct ostat *sb); }
63 COMPAT BSD { int getkerninfo(int op, char *where, size_t *size, \
int arg); } getkerninfo getkerninfo_args int
-64 COMPAT BSD { int getpagesize(void); } \
+64 MPSAFE COMPAT BSD { int getpagesize(void); } \
getpagesize getpagesize_args int
65 STD BSD { int msync(void *addr, size_t len, int flags); }
66 STD BSD { int vfork(void); }
67 OBSOL NOHIDE vread
68 OBSOL NOHIDE vwrite
-69 STD BSD { int sbrk(int incr); }
-70 STD BSD { int sstk(int incr); }
-71 COMPAT BSD { int mmap(void *addr, int len, int prot, \
+69 MPSAFE STD BSD { int sbrk(int incr); }
+70 MPSAFE STD BSD { int sstk(int incr); }
+71 MPSAFE COMPAT BSD { int mmap(void *addr, int len, int prot, \
int flags, int fd, long pos); }
-72 STD BSD { int ovadvise(int anom); } vadvise ovadvise_args int
-73 STD BSD { int munmap(void *addr, size_t len); }
-74 STD BSD { int mprotect(const void *addr, size_t len, int prot); }
-75 STD BSD { int madvise(void *addr, size_t len, int behav); }
+72 MPSAFE STD BSD { int ovadvise(int anom); } vadvise ovadvise_args int
+73 MPSAFE STD BSD { int munmap(void *addr, size_t len); }
+74 MPSAFE STD BSD { int mprotect(const void *addr, size_t len, int prot); }
+75 MPSAFE STD BSD { int madvise(void *addr, size_t len, int behav); }
76 OBSOL NOHIDE vhangup
77 OBSOL NOHIDE vlimit
-78 STD BSD { int mincore(const void *addr, size_t len, \
+78 MPSAFE STD BSD { int mincore(const void *addr, size_t len, \
char *vec); }
79 STD POSIX { int getgroups(u_int gidsetsize, gid_t *gidset); }
80 STD POSIX { int setgroups(u_int gidsetsize, gid_t *gidset); }
@@ -306,7 +306,7 @@
setrlimit __setrlimit_args int
196 STD BSD { int getdirentries(int fd, char *buf, u_int count, \
long *basep); }
-197 STD BSD { caddr_t mmap(caddr_t addr, size_t len, int prot, \
+197 MPSAFE STD BSD { caddr_t mmap(caddr_t addr, size_t len, int prot, \
int flags, int fd, int pad, off_t pos); }
198 STD NOHIDE { int nosys(void); } __syscall __syscall_args int
199 STD POSIX { off_t lseek(int fd, int pad, off_t offset, \
@@ -318,8 +318,8 @@
__sysctl sysctl_args int
; properly, __sysctl should be a NOHIDE, but making an exception
; here allows to avoid one in libc/sys/Makefile.inc.
-203 STD BSD { int mlock(const void *addr, size_t len); }
-204 STD BSD { int munlock(const void *addr, size_t len); }
+203 MPSAFE STD BSD { int mlock(const void *addr, size_t len); }
+204 MPSAFE STD BSD { int munlock(const void *addr, size_t len); }
205 STD BSD { int undelete(char *path); }
206 STD BSD { int futimes(int fd, struct timeval *tptr); }
207 STD BSD { int getpgid(pid_t pid); }
@@ -386,7 +386,7 @@
248 UNIMPL NOHIDE nosys
249 UNIMPL NOHIDE nosys
; syscall numbers initially used in OpenBSD
-250 STD BSD { int minherit(void *addr, size_t len, int inherit); }
+250 MPSAFE STD BSD { int minherit(void *addr, size_t len, int inherit); }
251 STD BSD { int rfork(int flags); }
252 STD BSD { int openbsd_poll(struct pollfd *fds, u_int nfds, \
int timeout); }
@@ -414,7 +414,7 @@
274 STD BSD { int lchmod(char *path, mode_t mode); }
275 NOPROTO BSD { int lchown(char *path, uid_t uid, gid_t gid); } netbsd_lchown lchown_args int
276 STD BSD { int lutimes(char *path, struct timeval *tptr); }
-277 NOPROTO BSD { int msync(void *addr, size_t len, int flags); } netbsd_msync msync_args int
+277 MPSAFE NOPROTO BSD { int msync(void *addr, size_t len, int flags); } netbsd_msync msync_args int
278 STD BSD { int nstat(char *path, struct nstat *ub); }
279 STD BSD { int nfstat(int fd, struct nstat *sb); }
280 STD BSD { int nlstat(char *path, struct nstat *ub); }
@@ -463,8 +463,8 @@
321 STD BSD { int yield(void); }
322 OBSOL NOHIDE thr_sleep
323 OBSOL NOHIDE thr_wakeup
-324 STD BSD { int mlockall(int how); }
-325 STD BSD { int munlockall(void); }
+324 MPSAFE STD BSD { int mlockall(int how); }
+325 MPSAFE STD BSD { int munlockall(void); }
326 STD BSD { int __getcwd(u_char *buf, u_int buflen); }
327 STD POSIX { int sched_setparam (pid_t pid, const struct sched_param *param); }
diff --git a/sys/kern/sysv_shm.c b/sys/kern/sysv_shm.c
index fab53a8..0a9abda 100644
--- a/sys/kern/sysv_shm.c
+++ b/sys/kern/sysv_shm.c
@@ -43,6 +43,7 @@
#include <sys/shm.h>
#include <sys/proc.h>
#include <sys/malloc.h>
+#include <sys/mutex.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/syscall.h>
@@ -314,14 +315,17 @@ shmat(p, uap)
}
shm_handle = shmseg->shm_internal;
+ mtx_lock(&vm_mtx);
vm_object_reference(shm_handle->shm_object);
rv = vm_map_find(&p->p_vmspace->vm_map, shm_handle->shm_object,
0, &attach_va, size, (flags & MAP_FIXED)?0:1, prot, prot, 0);
if (rv != KERN_SUCCESS) {
+ mtx_unlock(&vm_mtx);
return ENOMEM;
}
vm_map_inherit(&p->p_vmspace->vm_map,
attach_va, attach_va + size, VM_INHERIT_SHARE);
+ mtx_unlock(&vm_mtx);
shmmap_s->va = attach_va;
shmmap_s->shmid = uap->shmid;
@@ -549,6 +553,7 @@ shmget_allocate_segment(p, uap, mode)
* We make sure that we have allocated a pager before we need
* to.
*/
+ mtx_lock(&vm_mtx);
if (shm_use_phys) {
shm_handle->shm_object =
vm_pager_allocate(OBJT_PHYS, 0, size, VM_PROT_DEFAULT, 0);
@@ -558,6 +563,7 @@ shmget_allocate_segment(p, uap, mode)
}
vm_object_clear_flag(shm_handle->shm_object, OBJ_ONEMAPPING);
vm_object_set_flag(shm_handle->shm_object, OBJ_NOSPLIT);
+ mtx_unlock(&vm_mtx);
shmseg->shm_internal = shm_handle;
shmseg->shm_perm.cuid = shmseg->shm_perm.uid = cred->cr_uid;
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index c1b53d8..a980330 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -281,6 +281,8 @@ waitrunningbufspace(void)
* Called when a buffer is extended. This function clears the B_CACHE
* bit if the newly extended portion of the buffer does not contain
* valid data.
+ *
+ * must be called with vm_mtx held
*/
static __inline__
void
@@ -426,11 +428,13 @@ bufinit(void)
* from buf_daemon.
*/
+ mtx_lock(&vm_mtx);
bogus_offset = kmem_alloc_pageable(kernel_map, PAGE_SIZE);
bogus_page = vm_page_alloc(kernel_object,
((bogus_offset - VM_MIN_KERNEL_ADDRESS) >> PAGE_SHIFT),
VM_ALLOC_NORMAL);
cnt.v_wire_count++;
+ mtx_unlock(&vm_mtx);
}
@@ -441,17 +445,27 @@ bufinit(void)
* buffer_map.
*
* Since this call frees up buffer space, we call bufspacewakeup().
+ *
+ * Can be called with or without the vm_mtx.
*/
static void
bfreekva(struct buf * bp)
{
+
if (bp->b_kvasize) {
+ int hadvmlock;
+
++buffreekvacnt;
bufspace -= bp->b_kvasize;
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
vm_map_delete(buffer_map,
(vm_offset_t) bp->b_kvabase,
(vm_offset_t) bp->b_kvabase + bp->b_kvasize
);
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
bp->b_kvasize = 0;
bufspacewakeup();
}
@@ -807,6 +821,7 @@ bdwrite(struct buf * bp)
VOP_BMAP(bp->b_vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL, NULL);
}
+ mtx_lock(&vm_mtx);
/*
* Set the *dirty* buffer range based upon the VM system dirty pages.
*/
@@ -820,6 +835,7 @@ bdwrite(struct buf * bp)
* out on the next sync, or perhaps the cluster will be completed.
*/
vfs_clean_pages(bp);
+ mtx_unlock(&vm_mtx);
bqrelse(bp);
/*
@@ -973,12 +989,15 @@ buf_dirty_count_severe(void)
* Release a busy buffer and, if requested, free its resources. The
* buffer will be stashed in the appropriate bufqueue[] allowing it
* to be accessed later as a cache entity or reused for other purposes.
+ *
+ * vm_mtx must be not be held.
*/
void
brelse(struct buf * bp)
{
int s;
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
KASSERT(!(bp->b_flags & (B_CLUSTER|B_PAGING)), ("brelse: inappropriate B_PAGING or B_CLUSTER bp %p", bp));
s = splbio();
@@ -1088,6 +1107,7 @@ brelse(struct buf * bp)
resid = bp->b_bufsize;
foff = bp->b_offset;
+ mtx_lock(&vm_mtx);
for (i = 0; i < bp->b_npages; i++) {
int had_bogus = 0;
@@ -1099,10 +1119,12 @@ brelse(struct buf * bp)
* now.
*/
if (m == bogus_page) {
+ mtx_unlock(&vm_mtx);
VOP_GETVOBJECT(vp, &obj);
poff = OFF_TO_IDX(bp->b_offset);
had_bogus = 1;
+ mtx_lock(&vm_mtx);
for (j = i; j < bp->b_npages; j++) {
vm_page_t mtmp;
mtmp = bp->b_pages[j];
@@ -1136,11 +1158,15 @@ brelse(struct buf * bp)
if (bp->b_flags & (B_INVAL | B_RELBUF))
vfs_vmio_release(bp);
+ mtx_unlock(&vm_mtx);
} else if (bp->b_flags & B_VMIO) {
- if (bp->b_flags & (B_INVAL | B_RELBUF))
+ if (bp->b_flags & (B_INVAL | B_RELBUF)) {
+ mtx_lock(&vm_mtx);
vfs_vmio_release(bp);
+ mtx_unlock(&vm_mtx);
+ }
}
@@ -1302,6 +1328,9 @@ bqrelse(struct buf * bp)
splx(s);
}
+/*
+ * Must be called with vm_mtx held.
+ */
static void
vfs_vmio_release(bp)
struct buf *bp;
@@ -1310,6 +1339,7 @@ vfs_vmio_release(bp)
vm_page_t m;
s = splvm();
+ mtx_assert(&vm_mtx, MA_OWNED);
for (i = 0; i < bp->b_npages; i++) {
m = bp->b_pages[i];
bp->b_pages[i] = NULL;
@@ -1343,6 +1373,9 @@ vfs_vmio_release(bp)
}
splx(s);
pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages);
+
+ /* could drop vm_mtx here */
+
if (bp->b_bufsize) {
bufspacewakeup();
bp->b_bufsize = 0;
@@ -1614,7 +1647,9 @@ restart:
if (qindex == QUEUE_CLEAN) {
if (bp->b_flags & B_VMIO) {
bp->b_flags &= ~B_ASYNC;
+ mtx_lock(&vm_mtx);
vfs_vmio_release(bp);
+ mtx_unlock(&vm_mtx);
}
if (bp->b_vp)
brelvp(bp);
@@ -1735,6 +1770,8 @@ restart:
if (maxsize != bp->b_kvasize) {
vm_offset_t addr = 0;
+ /* we'll hold the lock over some vm ops */
+ mtx_lock(&vm_mtx);
bfreekva(bp);
if (vm_map_findspace(buffer_map,
@@ -1743,6 +1780,7 @@ restart:
* Uh oh. Buffer map is to fragmented. We
* must defragment the map.
*/
+ mtx_unlock(&vm_mtx);
++bufdefragcnt;
defrag = 1;
bp->b_flags |= B_INVAL;
@@ -1759,6 +1797,7 @@ restart:
bufspace += bp->b_kvasize;
++bufreusecnt;
}
+ mtx_unlock(&vm_mtx);
}
bp->b_data = bp->b_kvabase;
}
@@ -1936,18 +1975,24 @@ inmem(struct vnode * vp, daddr_t blkno)
size = vp->v_mount->mnt_stat.f_iosize;
off = (vm_ooffset_t)blkno * (vm_ooffset_t)vp->v_mount->mnt_stat.f_iosize;
+ mtx_lock(&vm_mtx);
for (toff = 0; toff < vp->v_mount->mnt_stat.f_iosize; toff += tinc) {
m = vm_page_lookup(obj, OFF_TO_IDX(off + toff));
if (!m)
- return 0;
+ goto notinmem;
tinc = size;
if (tinc > PAGE_SIZE - ((toff + off) & PAGE_MASK))
tinc = PAGE_SIZE - ((toff + off) & PAGE_MASK);
if (vm_page_is_valid(m,
(vm_offset_t) ((toff + off) & PAGE_MASK), tinc) == 0)
- return 0;
+ goto notinmem;
}
+ mtx_unlock(&vm_mtx);
return 1;
+
+notinmem:
+ mtx_unlock(&vm_mtx);
+ return (0);
}
/*
@@ -1960,11 +2005,14 @@ inmem(struct vnode * vp, daddr_t blkno)
*
* This routine is primarily used by NFS, but is generalized for the
* B_VMIO case.
+ *
+ * Can be called with or without vm_mtx
*/
static void
vfs_setdirty(struct buf *bp)
{
int i;
+ int hadvmlock;
vm_object_t object;
/*
@@ -1983,6 +2031,10 @@ vfs_setdirty(struct buf *bp)
if ((bp->b_flags & B_VMIO) == 0)
return;
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
+
object = bp->b_pages[0]->object;
if ((object->flags & OBJ_WRITEABLE) && !(object->flags & OBJ_MIGHTBEDIRTY))
@@ -2040,6 +2092,8 @@ vfs_setdirty(struct buf *bp)
bp->b_dirtyend = eoffset;
}
}
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
}
/*
@@ -2441,6 +2495,7 @@ allocbuf(struct buf *bp, int size)
* DEV_BSIZE aligned existing buffer size. Figure out
* if we have to remove any pages.
*/
+ mtx_lock(&vm_mtx);
if (desiredpages < bp->b_npages) {
for (i = desiredpages; i < bp->b_npages; i++) {
/*
@@ -2461,6 +2516,7 @@ allocbuf(struct buf *bp, int size)
(desiredpages << PAGE_SHIFT), (bp->b_npages - desiredpages));
bp->b_npages = desiredpages;
}
+ mtx_unlock(&vm_mtx);
} else if (size > bp->b_bcount) {
/*
* We are growing the buffer, possibly in a
@@ -2481,6 +2537,7 @@ allocbuf(struct buf *bp, int size)
vp = bp->b_vp;
VOP_GETVOBJECT(vp, &obj);
+ mtx_lock(&vm_mtx);
while (bp->b_npages < desiredpages) {
vm_page_t m;
vm_pindex_t pi;
@@ -2589,6 +2646,9 @@ allocbuf(struct buf *bp, int size)
bp->b_pages,
bp->b_npages
);
+
+ mtx_unlock(&vm_mtx);
+
bp->b_data = (caddr_t)((vm_offset_t)bp->b_data |
(vm_offset_t)(bp->b_offset & PAGE_MASK));
}
@@ -2726,6 +2786,7 @@ bufdone(struct buf *bp)
if (error) {
panic("biodone: no object");
}
+ mtx_lock(&vm_mtx);
#if defined(VFS_BIO_DEBUG)
if (obj->paging_in_progress < bp->b_npages) {
printf("biodone: paging in progress(%d) < bp->b_npages(%d)\n",
@@ -2814,6 +2875,7 @@ bufdone(struct buf *bp)
}
if (obj)
vm_object_pip_wakeupn(obj, 0);
+ mtx_unlock(&vm_mtx);
}
/*
@@ -2837,12 +2899,15 @@ bufdone(struct buf *bp)
* This routine is called in lieu of iodone in the case of
* incomplete I/O. This keeps the busy status for pages
* consistant.
+ *
+ * vm_mtx should not be held
*/
void
vfs_unbusy_pages(struct buf * bp)
{
int i;
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
runningbufwakeup(bp);
if (bp->b_flags & B_VMIO) {
struct vnode *vp = bp->b_vp;
@@ -2850,6 +2915,7 @@ vfs_unbusy_pages(struct buf * bp)
VOP_GETVOBJECT(vp, &obj);
+ mtx_lock(&vm_mtx);
for (i = 0; i < bp->b_npages; i++) {
vm_page_t m = bp->b_pages[i];
@@ -2866,6 +2932,7 @@ vfs_unbusy_pages(struct buf * bp)
vm_page_io_finish(m);
}
vm_object_pip_wakeupn(obj, 0);
+ mtx_unlock(&vm_mtx);
}
}
@@ -2876,12 +2943,15 @@ vfs_unbusy_pages(struct buf * bp)
* range is restricted to the buffer's size.
*
* This routine is typically called after a read completes.
+ *
+ * vm_mtx should be held
*/
static void
vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, int pageno, vm_page_t m)
{
vm_ooffset_t soff, eoff;
+ mtx_assert(&vm_mtx, MA_OWNED);
/*
* Start and end offsets in buffer. eoff - soff may not cross a
* page boundry or cross the end of the buffer. The end of the
@@ -2917,12 +2987,15 @@ vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, int pageno, vm_page_t m)
* Since I/O has not been initiated yet, certain buffer flags
* such as BIO_ERROR or B_INVAL may be in an inconsistant state
* and should be ignored.
+ *
+ * vm_mtx should not be held
*/
void
vfs_busy_pages(struct buf * bp, int clear_modify)
{
int i, bogus;
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
if (bp->b_flags & B_VMIO) {
struct vnode *vp = bp->b_vp;
vm_object_t obj;
@@ -2932,6 +3005,7 @@ vfs_busy_pages(struct buf * bp, int clear_modify)
foff = bp->b_offset;
KASSERT(bp->b_offset != NOOFFSET,
("vfs_busy_pages: no buffer offset"));
+ mtx_lock(&vm_mtx);
vfs_setdirty(bp);
retry:
@@ -2979,6 +3053,7 @@ retry:
}
if (bogus)
pmap_qenter(trunc_page((vm_offset_t)bp->b_data), bp->b_pages, bp->b_npages);
+ mtx_unlock(&vm_mtx);
}
}
@@ -2989,12 +3064,15 @@ retry:
*
* Note that while we only really need to clean through to b_bcount, we
* just go ahead and clean through to b_bufsize.
+ *
+ * should be called with vm_mtx held
*/
static void
vfs_clean_pages(struct buf * bp)
{
int i;
+ mtx_assert(&vm_mtx, MA_OWNED);
if (bp->b_flags & B_VMIO) {
vm_ooffset_t foff;
@@ -3021,6 +3099,7 @@ vfs_clean_pages(struct buf * bp)
* Set the range within the buffer to valid and clean. The range is
* relative to the beginning of the buffer, b_offset. Note that b_offset
* itself may be offset from the beginning of the first page.
+ *
*/
void
@@ -3061,13 +3140,18 @@ vfs_bio_set_validclean(struct buf *bp, int base, int size)
*
* Note that while we only theoretically need to clear through b_bcount,
* we go ahead and clear through b_bufsize.
+ *
+ * We'll get vm_mtx here for safety if processing a VMIO buffer.
+ * I don't think vm_mtx is needed, but we're twiddling vm_page flags.
*/
void
vfs_bio_clrbuf(struct buf *bp) {
int i, mask = 0;
caddr_t sa, ea;
+
if ((bp->b_flags & (B_VMIO | B_MALLOC)) == B_VMIO) {
+ mtx_lock(&vm_mtx);
bp->b_flags &= ~B_INVAL;
bp->b_ioflags &= ~BIO_ERROR;
if( (bp->b_npages == 1) && (bp->b_bufsize < PAGE_SIZE) &&
@@ -3079,6 +3163,7 @@ vfs_bio_clrbuf(struct buf *bp) {
}
bp->b_pages[0]->valid |= mask;
bp->b_resid = 0;
+ mtx_unlock(&vm_mtx);
return;
}
ea = sa = bp->b_data;
@@ -3106,6 +3191,7 @@ vfs_bio_clrbuf(struct buf *bp) {
vm_page_flag_clear(bp->b_pages[i], PG_ZERO);
}
bp->b_resid = 0;
+ mtx_unlock(&vm_mtx);
} else {
clrbuf(bp);
}
@@ -3115,18 +3201,22 @@ vfs_bio_clrbuf(struct buf *bp) {
* vm_hold_load_pages and vm_hold_unload pages get pages into
* a buffers address space. The pages are anonymous and are
* not associated with a file object.
+ *
+ * vm_mtx should not be held
*/
-void
+static void
vm_hold_load_pages(struct buf * bp, vm_offset_t from, vm_offset_t to)
{
vm_offset_t pg;
vm_page_t p;
int index;
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
to = round_page(to);
from = round_page(from);
index = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT;
+ mtx_lock(&vm_mtx);
for (pg = from; pg < to; pg += PAGE_SIZE, index++) {
tryagain:
@@ -3152,6 +3242,7 @@ tryagain:
vm_page_wakeup(p);
}
bp->b_npages = index;
+ mtx_unlock(&vm_mtx);
}
void
@@ -3160,11 +3251,15 @@ vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to)
vm_offset_t pg;
vm_page_t p;
int index, newnpages;
+ int hadvmlock;
from = round_page(from);
to = round_page(to);
newnpages = index = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT;
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
for (pg = from; pg < to; pg += PAGE_SIZE, index++) {
p = bp->b_pages[index];
if (p && (index < bp->b_npages)) {
@@ -3180,6 +3275,8 @@ vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to)
}
}
bp->b_npages = newnpages;
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
}
diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c
index 8a6e045..0eb47bd 100644
--- a/sys/kern/vfs_cluster.c
+++ b/sys/kern/vfs_cluster.c
@@ -433,6 +433,7 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp)
BUF_KERNPROC(tbp);
TAILQ_INSERT_TAIL(&bp->b_cluster.cluster_head,
tbp, b_cluster.cluster_entry);
+ mtx_lock(&vm_mtx);
for (j = 0; j < tbp->b_npages; j += 1) {
vm_page_t m;
m = tbp->b_pages[j];
@@ -446,10 +447,12 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp)
if ((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL)
tbp->b_pages[j] = bogus_page;
}
+ mtx_unlock(&vm_mtx);
bp->b_bcount += tbp->b_bcount;
bp->b_bufsize += tbp->b_bufsize;
}
+ mtx_lock(&vm_mtx);
for(j=0;j<bp->b_npages;j++) {
if ((bp->b_pages[j]->valid & VM_PAGE_BITS_ALL) ==
VM_PAGE_BITS_ALL)
@@ -462,6 +465,7 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp)
pmap_qenter(trunc_page((vm_offset_t) bp->b_data),
(vm_page_t *)bp->b_pages, bp->b_npages);
+ mtx_unlock(&vm_mtx);
return (bp);
}
@@ -484,7 +488,9 @@ cluster_callback(bp)
if (bp->b_ioflags & BIO_ERROR)
error = bp->b_error;
+ mtx_lock(&vm_mtx);
pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages);
+ mtx_unlock(&vm_mtx);
/*
* Move memory from the large cluster buffer into the component
* buffers and mark IO as done on these.
@@ -851,6 +857,7 @@ cluster_wbuild(vp, size, start_lbn, len)
}
}
+ mtx_lock(&vm_mtx);
for (j = 0; j < tbp->b_npages; j += 1) {
m = tbp->b_pages[j];
vm_page_io_start(m);
@@ -861,6 +868,7 @@ cluster_wbuild(vp, size, start_lbn, len)
bp->b_npages++;
}
}
+ mtx_unlock(&vm_mtx);
}
bp->b_bcount += size;
bp->b_bufsize += size;
@@ -879,8 +887,10 @@ cluster_wbuild(vp, size, start_lbn, len)
tbp, b_cluster.cluster_entry);
}
finishcluster:
+ mtx_lock(&vm_mtx);
pmap_qenter(trunc_page((vm_offset_t) bp->b_data),
(vm_page_t *) bp->b_pages, bp->b_npages);
+ mtx_unlock(&vm_mtx);
if (bp->b_bufsize > bp->b_kvasize)
panic(
"cluster_wbuild: b_bufsize(%ld) > b_kvasize(%d)\n",
diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c
index 328a9b1..d17e934 100644
--- a/sys/kern/vfs_default.c
+++ b/sys/kern/vfs_default.c
@@ -535,14 +535,18 @@ retry:
if (vp->v_type == VREG || vp->v_type == VDIR) {
if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0)
goto retn;
+ mtx_lock(&vm_mtx);
object = vnode_pager_alloc(vp, vat.va_size, 0, 0);
+ mtx_unlock(&vm_mtx);
} else if (devsw(vp->v_rdev) != NULL) {
/*
* This simply allocates the biggest object possible
* for a disk vnode. This should be fixed, but doesn't
* cause any problems (yet).
*/
+ mtx_lock(&vm_mtx);
object = vnode_pager_alloc(vp, IDX_TO_OFF(INT_MAX), 0, 0);
+ mtx_unlock(&vm_mtx);
} else {
goto retn;
}
@@ -550,15 +554,23 @@ retry:
* Dereference the reference we just created. This assumes
* that the object is associated with the vp.
*/
+ mtx_lock(&vm_mtx);
object->ref_count--;
+ mtx_unlock(&vm_mtx);
vp->v_usecount--;
} else {
+ /*
+ * XXX: safe to hold vm mutex through VOP_UNLOCK?
+ */
+ mtx_lock(&vm_mtx);
if (object->flags & OBJ_DEAD) {
VOP_UNLOCK(vp, 0, p);
- tsleep(object, PVM, "vodead", 0);
+ msleep(object, VM_OBJECT_MTX(object), PVM, "vodead", 0);
+ mtx_unlock(&vm_mtx);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
goto retry;
}
+ mtx_unlock(&vm_mtx);
}
KASSERT(vp->v_object != NULL, ("vfs_object_create: NULL object"));
@@ -580,6 +592,7 @@ vop_stddestroyvobject(ap)
if (vp->v_object == NULL)
return (0);
+ mtx_lock(&vm_mtx);
if (obj->ref_count == 0) {
/*
* vclean() may be called twice. The first time
@@ -594,6 +607,7 @@ vop_stddestroyvobject(ap)
*/
vm_pager_deallocate(obj);
}
+ mtx_unlock(&vm_mtx);
return (0);
}
diff --git a/sys/kern/vfs_extattr.c b/sys/kern/vfs_extattr.c
index 6b73258..3f97551 100644
--- a/sys/kern/vfs_extattr.c
+++ b/sys/kern/vfs_extattr.c
@@ -2770,8 +2770,13 @@ fsync(p, uap)
if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
return (error);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
- if (VOP_GETVOBJECT(vp, &obj) == 0)
+ if (VOP_GETVOBJECT(vp, &obj) == 0) {
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
vm_object_page_clean(obj, 0, 0, 0);
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
+ }
error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);
#ifdef SOFTUPDATES
if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 2f4dc8d..6c050ba 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -711,6 +711,8 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
int s, error;
vm_object_t object;
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
+
if (flags & V_SAVE) {
s = splbio();
while (vp->v_numoutput) {
@@ -797,8 +799,10 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
*/
mtx_lock(&vp->v_interlock);
if (VOP_GETVOBJECT(vp, &object) == 0) {
+ mtx_lock(&vm_mtx);
vm_object_page_remove(object, 0, 0,
(flags & V_SAVE) ? TRUE : FALSE);
+ mtx_unlock(&vm_mtx);
}
mtx_unlock(&vp->v_interlock);
@@ -1132,6 +1136,8 @@ speedup_syncer()
* Also sets B_PAGING flag to indicate that vnode is not fully associated
* with the buffer. i.e. the bp has not been linked into the vnode or
* ref-counted.
+ *
+ * Doesn't block, only vnode seems to need a lock.
*/
void
pbgetvp(vp, bp)
@@ -1554,6 +1560,7 @@ vput(vp)
{
struct proc *p = curproc; /* XXX */
+ mtx_assert(&Giant, MA_OWNED);
KASSERT(vp != NULL, ("vput: null vp"));
mtx_lock(&vp->v_interlock);
/* Skip this v_writecount check if we're going to panic below. */
@@ -2382,7 +2389,11 @@ loop:
if (!vget(vp,
LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) {
if (VOP_GETVOBJECT(vp, &obj) == 0) {
- vm_object_page_clean(obj, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : OBJPC_NOSYNC);
+ mtx_lock(&vm_mtx);
+ vm_object_page_clean(obj, 0, 0,
+ flags == MNT_WAIT ?
+ OBJPC_SYNC : OBJPC_NOSYNC);
+ mtx_unlock(&vm_mtx);
anyio = 1;
}
vput(vp);
@@ -2409,6 +2420,8 @@ vfs_object_create(vp, p, cred)
struct proc *p;
struct ucred *cred;
{
+
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
return (VOP_CREATEVOBJECT(vp, cred, p));
}
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index 6b73258..3f97551 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -2770,8 +2770,13 @@ fsync(p, uap)
if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
return (error);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
- if (VOP_GETVOBJECT(vp, &obj) == 0)
+ if (VOP_GETVOBJECT(vp, &obj) == 0) {
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
vm_object_page_clean(obj, 0, 0, 0);
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
+ }
error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);
#ifdef SOFTUPDATES
if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
diff --git a/sys/miscfs/procfs/procfs_map.c b/sys/miscfs/procfs/procfs_map.c
index 5e4a30c..5c21993 100644
--- a/sys/miscfs/procfs/procfs_map.c
+++ b/sys/miscfs/procfs/procfs_map.c
@@ -42,6 +42,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/vnode.h>
diff --git a/sys/miscfs/procfs/procfs_mem.c b/sys/miscfs/procfs/procfs_mem.c
index 3a2f8d2..1e28870 100644
--- a/sys/miscfs/procfs/procfs_mem.c
+++ b/sys/miscfs/procfs/procfs_mem.c
@@ -48,6 +48,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <sys/proc.h>
#include <sys/ptrace.h>
#include <sys/user.h>
@@ -88,8 +89,14 @@ procfs_rwmem(curp, p, uio)
* usage in that process can be messed up.
*/
vm = p->p_vmspace;
- if ((p->p_flag & P_WEXIT) || (vm->vm_refcnt < 1))
+ if ((p->p_flag & P_WEXIT))
return EFAULT;
+
+ mtx_lock(&vm_mtx);
+ if (vm->vm_refcnt < 1) {
+ mtx_unlock(&vm_mtx);
+ return EFAULT;
+ }
++vm->vm_refcnt;
/*
* The map we want...
@@ -207,7 +214,9 @@ procfs_rwmem(curp, p, uio)
/*
* Now do the i/o move.
*/
+ mtx_unlock(&vm_mtx);
error = uiomove((caddr_t)(kva + page_offset), len, uio);
+ mtx_lock(&vm_mtx);
pmap_kremove(kva);
@@ -226,6 +235,7 @@ procfs_rwmem(curp, p, uio)
kmem_free(kernel_map, kva, PAGE_SIZE);
vmspace_free(vm);
+ mtx_unlock(&vm_mtx);
return (error);
}
diff --git a/sys/miscfs/specfs/spec_vnops.c b/sys/miscfs/specfs/spec_vnops.c
index 2940f40..ba81229 100644
--- a/sys/miscfs/specfs/spec_vnops.c
+++ b/sys/miscfs/specfs/spec_vnops.c
@@ -731,6 +731,8 @@ spec_getpages(ap)
cnt.v_vnodein++;
cnt.v_vnodepgsin += pcount;
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
/* Do the input. */
BUF_STRATEGY(bp);
@@ -741,6 +743,8 @@ spec_getpages(ap)
tsleep(bp, PVM, "spread", 0);
splx(s);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
if ((bp->b_ioflags & BIO_ERROR) != 0) {
if (bp->b_error)
diff --git a/sys/miscfs/union/union_subr.c b/sys/miscfs/union/union_subr.c
index 869818f..3ac98bf 100644
--- a/sys/miscfs/union/union_subr.c
+++ b/sys/miscfs/union/union_subr.c
@@ -45,6 +45,7 @@
#include <sys/filedesc.h>
#include <sys/kernel.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <sys/malloc.h>
#include <sys/module.h>
#include <sys/mount.h>
diff --git a/sys/nfs/nfs_bio.c b/sys/nfs/nfs_bio.c
index cb7297f..234815c 100644
--- a/sys/nfs/nfs_bio.c
+++ b/sys/nfs/nfs_bio.c
@@ -124,8 +124,13 @@ nfs_getpages(ap)
}
if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
- (nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
+ (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
(void)nfs_fsinfo(nmp, vp, cred, p);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
+ }
npages = btoc(count);
@@ -168,7 +173,11 @@ nfs_getpages(ap)
uio.uio_rw = UIO_READ;
uio.uio_procp = p;
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
error = nfs_readrpc(vp, &uio, cred);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
pmap_qremove(kva, npages);
relpbuf(bp, &nfs_pbuf_freecnt);
@@ -280,8 +289,13 @@ nfs_putpages(ap)
offset = IDX_TO_OFF(pages[0]->pindex);
if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
- (nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
+ (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
(void)nfs_fsinfo(nmp, vp, cred, p);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
+ }
for (i = 0; i < npages; i++) {
rtvals[i] = VM_PAGER_AGAIN;
@@ -321,7 +335,11 @@ nfs_putpages(ap)
else
iomode = NFSV3WRITE_FILESYNC;
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
error = nfs_writerpc(vp, &uio, cred, &iomode, &must_commit);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
pmap_qremove(kva, npages);
relpbuf(bp, &nfs_pbuf_freecnt);
@@ -332,8 +350,13 @@ nfs_putpages(ap)
rtvals[i] = VM_PAGER_OK;
vm_page_undirty(pages[i]);
}
- if (must_commit)
+ if (must_commit) {
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
nfs_clearcommit(vp->v_mount);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
+ }
}
return rtvals[0];
}
@@ -1076,7 +1099,9 @@ again:
bp->b_dirtyoff = on;
bp->b_dirtyend = on + n;
}
+ mtx_lock(&vm_mtx);
vfs_bio_set_validclean(bp, on, n);
+ mtx_unlock(&vm_mtx);
}
/*
diff --git a/sys/nfs/nfs_common.c b/sys/nfs/nfs_common.c
index c7e6917..18cb8a9 100644
--- a/sys/nfs/nfs_common.c
+++ b/sys/nfs/nfs_common.c
@@ -2139,6 +2139,8 @@ nfs_clearcommit(mp)
int s;
s = splbio();
+ mtx_assert(&Giant, MA_OWNED);
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
loop:
for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
if (vp->v_mount != mp) /* Paranoia */
diff --git a/sys/nfs/nfs_subs.c b/sys/nfs/nfs_subs.c
index c7e6917..18cb8a9 100644
--- a/sys/nfs/nfs_subs.c
+++ b/sys/nfs/nfs_subs.c
@@ -2139,6 +2139,8 @@ nfs_clearcommit(mp)
int s;
s = splbio();
+ mtx_assert(&Giant, MA_OWNED);
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
loop:
for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
if (vp->v_mount != mp) /* Paranoia */
diff --git a/sys/nfsclient/nfs_bio.c b/sys/nfsclient/nfs_bio.c
index cb7297f..234815c 100644
--- a/sys/nfsclient/nfs_bio.c
+++ b/sys/nfsclient/nfs_bio.c
@@ -124,8 +124,13 @@ nfs_getpages(ap)
}
if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
- (nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
+ (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
(void)nfs_fsinfo(nmp, vp, cred, p);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
+ }
npages = btoc(count);
@@ -168,7 +173,11 @@ nfs_getpages(ap)
uio.uio_rw = UIO_READ;
uio.uio_procp = p;
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
error = nfs_readrpc(vp, &uio, cred);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
pmap_qremove(kva, npages);
relpbuf(bp, &nfs_pbuf_freecnt);
@@ -280,8 +289,13 @@ nfs_putpages(ap)
offset = IDX_TO_OFF(pages[0]->pindex);
if ((nmp->nm_flag & NFSMNT_NFSV3) != 0 &&
- (nmp->nm_state & NFSSTA_GOTFSINFO) == 0)
+ (nmp->nm_state & NFSSTA_GOTFSINFO) == 0) {
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
(void)nfs_fsinfo(nmp, vp, cred, p);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
+ }
for (i = 0; i < npages; i++) {
rtvals[i] = VM_PAGER_AGAIN;
@@ -321,7 +335,11 @@ nfs_putpages(ap)
else
iomode = NFSV3WRITE_FILESYNC;
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
error = nfs_writerpc(vp, &uio, cred, &iomode, &must_commit);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
pmap_qremove(kva, npages);
relpbuf(bp, &nfs_pbuf_freecnt);
@@ -332,8 +350,13 @@ nfs_putpages(ap)
rtvals[i] = VM_PAGER_OK;
vm_page_undirty(pages[i]);
}
- if (must_commit)
+ if (must_commit) {
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
nfs_clearcommit(vp->v_mount);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
+ }
}
return rtvals[0];
}
@@ -1076,7 +1099,9 @@ again:
bp->b_dirtyoff = on;
bp->b_dirtyend = on + n;
}
+ mtx_lock(&vm_mtx);
vfs_bio_set_validclean(bp, on, n);
+ mtx_unlock(&vm_mtx);
}
/*
diff --git a/sys/nfsclient/nfs_subs.c b/sys/nfsclient/nfs_subs.c
index c7e6917..18cb8a9 100644
--- a/sys/nfsclient/nfs_subs.c
+++ b/sys/nfsclient/nfs_subs.c
@@ -2139,6 +2139,8 @@ nfs_clearcommit(mp)
int s;
s = splbio();
+ mtx_assert(&Giant, MA_OWNED);
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
loop:
for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
if (vp->v_mount != mp) /* Paranoia */
diff --git a/sys/nfsserver/nfs_srvsubs.c b/sys/nfsserver/nfs_srvsubs.c
index c7e6917..18cb8a9 100644
--- a/sys/nfsserver/nfs_srvsubs.c
+++ b/sys/nfsserver/nfs_srvsubs.c
@@ -2139,6 +2139,8 @@ nfs_clearcommit(mp)
int s;
s = splbio();
+ mtx_assert(&Giant, MA_OWNED);
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
loop:
for (vp = mp->mnt_vnodelist.lh_first; vp; vp = nvp) {
if (vp->v_mount != mp) /* Paranoia */
diff --git a/sys/pci/agp.c b/sys/pci/agp.c
index 6419635..333c4c8 100644
--- a/sys/pci/agp.c
+++ b/sys/pci/agp.c
@@ -38,6 +38,7 @@
#include <sys/ioccom.h>
#include <sys/agpio.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <sys/proc.h>
#include <pci/pcivar.h>
diff --git a/sys/pci/agp_ali.c b/sys/pci/agp_ali.c
index 86e070e..aa805e1 100644
--- a/sys/pci/agp_ali.c
+++ b/sys/pci/agp_ali.c
@@ -35,6 +35,7 @@
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <pci/pcivar.h>
#include <pci/pcireg.h>
diff --git a/sys/pci/agp_amd.c b/sys/pci/agp_amd.c
index 0a498f7..4aaf4e9 100644
--- a/sys/pci/agp_amd.c
+++ b/sys/pci/agp_amd.c
@@ -35,6 +35,7 @@
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <pci/pcivar.h>
#include <pci/pcireg.h>
diff --git a/sys/pci/agp_i810.c b/sys/pci/agp_i810.c
index 79fd566..5c40493 100644
--- a/sys/pci/agp_i810.c
+++ b/sys/pci/agp_i810.c
@@ -36,6 +36,7 @@
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <pci/pcivar.h>
#include <pci/pcireg.h>
diff --git a/sys/pci/agp_intel.c b/sys/pci/agp_intel.c
index a4b9a43..dc1ef4d 100644
--- a/sys/pci/agp_intel.c
+++ b/sys/pci/agp_intel.c
@@ -35,6 +35,7 @@
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <pci/pcivar.h>
#include <pci/pcireg.h>
diff --git a/sys/pci/agp_sis.c b/sys/pci/agp_sis.c
index 1f1a50b..a6a20a4 100644
--- a/sys/pci/agp_sis.c
+++ b/sys/pci/agp_sis.c
@@ -35,6 +35,7 @@
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <pci/pcivar.h>
#include <pci/pcireg.h>
diff --git a/sys/pci/agp_via.c b/sys/pci/agp_via.c
index 983348e..086b027 100644
--- a/sys/pci/agp_via.c
+++ b/sys/pci/agp_via.c
@@ -35,6 +35,7 @@
#include <sys/kernel.h>
#include <sys/bus.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <pci/pcivar.h>
#include <pci/pcireg.h>
diff --git a/sys/ufs/ufs/ufs_readwrite.c b/sys/ufs/ufs/ufs_readwrite.c
index db9e239..672d0a0 100644
--- a/sys/ufs/ufs/ufs_readwrite.c
+++ b/sys/ufs/ufs/ufs_readwrite.c
@@ -114,8 +114,11 @@ READ(ap)
return 0;
}
- if (object)
+ if (object) {
+ mtx_lock(&vm_mtx);
vm_object_reference(object);
+ mtx_unlock(&vm_mtx);
+ }
#ifdef ENABLE_VFS_IOOPT
/*
@@ -147,8 +150,11 @@ READ(ap)
(vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
ip->i_flag |= IN_ACCESS;
- if (object)
+ if (object) {
+ mtx_lock(&vm_mtx);
vm_object_vndeallocate(object);
+ mtx_unlock(&vm_mtx);
+ }
return error;
}
}
@@ -192,8 +198,11 @@ READ(ap)
(vp->v_mount->mnt_flag &
MNT_NOATIME) == 0)
ip->i_flag |= IN_ACCESS;
- if (object)
+ if (object) {
+ mtx_lock(&vm_mtx);
vm_object_vndeallocate(object);
+ mtx_unlock(&vm_mtx);
+ }
return error;
}
/*
@@ -355,8 +364,11 @@ READ(ap)
}
}
- if (object)
+ if (object) {
+ mtx_lock(&vm_mtx);
vm_object_vndeallocate(object);
+ mtx_unlock(&vm_mtx);
+ }
if ((error == 0 || uio->uio_resid != orig_resid) &&
(vp->v_mount->mnt_flag & MNT_NOATIME) == 0)
ip->i_flag |= IN_ACCESS;
@@ -395,8 +407,11 @@ WRITE(ap)
ip = VTOI(vp);
object = vp->v_object;
- if (object)
+ if (object) {
+ mtx_lock(&vm_mtx);
vm_object_reference(object);
+ mtx_unlock(&vm_mtx);
+ }
#ifdef DIAGNOSTIC
if (uio->uio_rw != UIO_WRITE)
@@ -408,8 +423,11 @@ WRITE(ap)
if (ioflag & IO_APPEND)
uio->uio_offset = ip->i_size;
if ((ip->i_flags & APPEND) && uio->uio_offset != ip->i_size) {
- if (object)
+ if (object) {
+ mtx_lock(&vm_mtx);
vm_object_vndeallocate(object);
+ mtx_unlock(&vm_mtx);
+ }
return (EPERM);
}
/* FALLTHROUGH */
@@ -428,8 +446,11 @@ WRITE(ap)
fs = ip->I_FS;
if (uio->uio_offset < 0 ||
(u_int64_t)uio->uio_offset + uio->uio_resid > fs->fs_maxfilesize) {
- if (object)
+ if (object) {
+ mtx_lock(&vm_mtx);
vm_object_vndeallocate(object);
+ mtx_unlock(&vm_mtx);
+ }
return (EFBIG);
}
/*
@@ -443,8 +464,11 @@ WRITE(ap)
PROC_LOCK(p);
psignal(p, SIGXFSZ);
PROC_UNLOCK(p);
- if (object)
+ if (object) {
+ mtx_lock(&vm_mtx);
vm_object_vndeallocate(object);
+ mtx_unlock(&vm_mtx);
+ }
return (EFBIG);
}
@@ -455,9 +479,11 @@ WRITE(ap)
flags = B_SYNC;
if (object && (object->flags & OBJ_OPT)) {
+ mtx_lock(&vm_mtx);
vm_freeze_copyopts(object,
OFF_TO_IDX(uio->uio_offset),
OFF_TO_IDX(uio->uio_offset + uio->uio_resid + PAGE_MASK));
+ mtx_unlock(&vm_mtx);
}
for (error = 0; uio->uio_resid > 0;) {
@@ -546,8 +572,11 @@ WRITE(ap)
} else if (resid > uio->uio_resid && (ioflag & IO_SYNC))
error = UFS_UPDATE(vp, 1);
- if (object)
+ if (object) {
+ mtx_lock(&vm_mtx);
vm_object_vndeallocate(object);
+ mtx_unlock(&vm_mtx);
+ }
return (error);
}
diff --git a/sys/vm/default_pager.c b/sys/vm/default_pager.c
index f5d88a5..0fb4896 100644
--- a/sys/vm/default_pager.c
+++ b/sys/vm/default_pager.c
@@ -41,6 +41,8 @@
#include <sys/param.h>
#include <sys/systm.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
#include <vm/vm.h>
#include <vm/vm_object.h>
diff --git a/sys/vm/phys_pager.c b/sys/vm/phys_pager.c
index 1f00ea0..d34672b 100644
--- a/sys/vm/phys_pager.c
+++ b/sys/vm/phys_pager.c
@@ -34,7 +34,6 @@
#include <sys/mutex.h>
#include <sys/mman.h>
#include <sys/sysctl.h>
-#include <sys/sx.h>
#include <vm/vm.h>
#include <vm/vm_object.h>
@@ -43,7 +42,7 @@
#include <vm/vm_zone.h>
/* prevent concurrant creation races */
-static struct sx phys_pager_sx;
+static int phys_pager_alloc_lock;
/* list of device pager objects */
static struct pagerlst phys_pager_object_list;
/* protect access to phys_pager_object_list */
@@ -54,7 +53,6 @@ phys_pager_init(void)
{
TAILQ_INIT(&phys_pager_object_list);
- sx_init(&phys_pager_sx, "phys_pager create");
mtx_init(&phys_pager_mtx, "phys_pager list", MTX_DEF);
}
@@ -76,8 +74,11 @@ phys_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
/*
* Lock to prevent object creation race condition.
*/
- sx_xlock(&phys_pager_sx);
-
+ while (phys_pager_alloc_lock) {
+ phys_pager_alloc_lock = -1;
+ msleep(&phys_pager_alloc_lock, &vm_mtx, PVM, "swpalc", 0);
+ }
+
/*
* Look up pager, creating as necessary.
*/
@@ -101,7 +102,10 @@ phys_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
if (OFF_TO_IDX(foff + size) > object->size)
object->size = OFF_TO_IDX(foff + size);
}
- sx_xunlock(&phys_pager_sx);
+ if (phys_pager_alloc_lock)
+ wakeup(&phys_pager_alloc_lock);
+ phys_pager_alloc_lock = 0;
+
} else {
object = vm_object_allocate(OBJT_PHYS,
OFF_TO_IDX(foff + size));
diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c
index 8d343f4..44f4465 100644
--- a/sys/vm/swap_pager.c
+++ b/sys/vm/swap_pager.c
@@ -80,7 +80,6 @@
#include <sys/sysctl.h>
#include <sys/blist.h>
#include <sys/lock.h>
-#include <sys/sx.h>
#include <sys/vmmeter.h>
#ifndef MAX_PAGEOUT_CLUSTER
@@ -119,6 +118,7 @@ static int nsw_wcount_sync; /* limit write buffers / synchronous */
static int nsw_wcount_async; /* limit write buffers / asynchronous */
static int nsw_wcount_async_max;/* assigned maximum */
static int nsw_cluster_max; /* maximum VOP I/O allowed */
+static int sw_alloc_interlock; /* swap pager allocation interlock */
struct blist *swapblist;
static struct swblock **swhash;
@@ -145,7 +145,6 @@ SYSCTL_INT(_vm, OID_AUTO, swap_async_max,
#define NOBJLIST(handle) \
(&swap_pager_object_list[((int)(intptr_t)handle >> 4) & (NOBJLISTS-1)])
-static struct sx sw_alloc_sx; /* prevent concurrant creation */
static struct mtx sw_alloc_mtx; /* protect list manipulation */
static struct pagerlst swap_pager_object_list[NOBJLISTS];
struct pagerlst swap_pager_un_object_list;
@@ -233,6 +232,8 @@ static daddr_t swp_pager_meta_ctl __P((vm_object_t, vm_pindex_t, int));
static __inline void
swp_sizecheck()
{
+
+ mtx_assert(&vm_mtx, MA_OWNED);
if (vm_swap_size < nswap_lowat) {
if (swap_pager_almost_full == 0) {
printf("swap_pager: out of swap space\n");
@@ -264,7 +265,6 @@ swap_pager_init()
for (i = 0; i < NOBJLISTS; ++i)
TAILQ_INIT(&swap_pager_object_list[i]);
TAILQ_INIT(&swap_pager_un_object_list);
- sx_init(&sw_alloc_sx, "swap_pager create");
mtx_init(&sw_alloc_mtx, "swap_pager list", MTX_DEF);
/*
@@ -389,7 +389,10 @@ swap_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
* of the handle.
*/
- sx_xlock(&sw_alloc_sx);
+ while (sw_alloc_interlock) {
+ sw_alloc_interlock = -1;
+ msleep(&sw_alloc_interlock, &vm_mtx, PVM, "swpalc", 0);
+ }
object = vm_pager_object_lookup(NOBJLIST(handle), handle);
@@ -403,7 +406,9 @@ swap_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
swp_pager_meta_build(object, 0, SWAPBLK_NONE);
}
- sx_xunlock(&sw_alloc_sx);
+ if (sw_alloc_interlock < 0)
+ wakeup(&sw_alloc_interlock);
+ sw_alloc_interlock = 0;
} else {
object = vm_object_allocate(OBJT_DEFAULT,
OFF_TO_IDX(offset + PAGE_MASK + size));
@@ -478,6 +483,7 @@ swap_pager_dealloc(object)
*
* This routine may not block
* This routine must be called at splvm().
+ * vm_mtx should be held
*/
static __inline daddr_t
@@ -486,6 +492,7 @@ swp_pager_getswapspace(npages)
{
daddr_t blk;
+ mtx_assert(&vm_mtx, MA_OWNED);
if ((blk = blist_alloc(swapblist, npages)) == SWAPBLK_NONE) {
if (swap_pager_full != 2) {
printf("swap_pager_getswapspace: failed\n");
@@ -514,6 +521,7 @@ swp_pager_getswapspace(npages)
*
* This routine may not block
* This routine must be called at splvm().
+ * vm_mtx should be held
*/
static __inline void
@@ -521,6 +529,8 @@ swp_pager_freeswapspace(blk, npages)
daddr_t blk;
int npages;
{
+
+ mtx_assert(&vm_mtx, MA_OWNED);
blist_free(swapblist, blk, npages);
vm_swap_size += npages;
/* per-swap area stats */
@@ -551,6 +561,9 @@ swap_pager_freespace(object, start, size)
vm_size_t size;
{
int s = splvm();
+
+ mtx_assert(&vm_mtx, MA_OWNED);
+
swp_pager_meta_free(object, start, size);
splx(s);
}
@@ -635,6 +648,8 @@ swap_pager_copy(srcobject, dstobject, offset, destroysource)
s = splvm();
+ mtx_assert(&vm_mtx, MA_OWNED);
+
/*
* If destroysource is set, we remove the source object from the
* swap_pager internal queue now.
@@ -881,7 +896,9 @@ swap_pager_strategy(vm_object_t object, struct bio *bp)
* FREE PAGE(s) - destroy underlying swap that is no longer
* needed.
*/
+ mtx_lock(&vm_mtx);
swp_pager_meta_free(object, start, count);
+ mtx_unlock(&vm_mtx);
splx(s);
bp->bio_resid = 0;
biodone(bp);
@@ -892,6 +909,7 @@ swap_pager_strategy(vm_object_t object, struct bio *bp)
* Execute read or write
*/
+ mtx_lock(&vm_mtx);
while (count > 0) {
daddr_t blk;
@@ -954,7 +972,9 @@ swap_pager_strategy(vm_object_t object, struct bio *bp)
bp->bio_resid -= PAGE_SIZE;
} else {
if (nbp == NULL) {
+ mtx_unlock(&vm_mtx);
nbp = getchainbuf(bp, swapdev_vp, B_ASYNC);
+ mtx_lock(&vm_mtx);
nbp->b_blkno = blk;
nbp->b_bcount = 0;
nbp->b_data = data;
@@ -985,6 +1005,7 @@ swap_pager_strategy(vm_object_t object, struct bio *bp)
/* nbp = NULL; */
}
+ mtx_unlock(&vm_mtx);
/*
* Wait for completion.
*/
@@ -1281,6 +1302,7 @@ swap_pager_putpages(object, m, count, sync, rtvals)
* at this time.
*/
s = splvm();
+ mtx_unlock(&vm_mtx);
mtx_lock(&pbuf_mtx);
n -= nsw_wcount_async_max;
if (nsw_wcount_async + n >= 0) {
@@ -1289,6 +1311,7 @@ swap_pager_putpages(object, m, count, sync, rtvals)
wakeup(&nsw_wcount_async);
}
mtx_unlock(&pbuf_mtx);
+ mtx_lock(&vm_mtx);
splx(s);
}
@@ -1399,6 +1422,8 @@ swap_pager_putpages(object, m, count, sync, rtvals)
swapdev_vp->v_numoutput++;
splx(s);
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
/*
* asynchronous
@@ -1410,9 +1435,12 @@ swap_pager_putpages(object, m, count, sync, rtvals)
bp->b_iodone = swp_pager_async_iodone;
BUF_KERNPROC(bp);
BUF_STRATEGY(bp);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
for (j = 0; j < n; ++j)
rtvals[i+j] = VM_PAGER_PEND;
+ /* restart outter loop */
continue;
}
@@ -1445,6 +1473,8 @@ swap_pager_putpages(object, m, count, sync, rtvals)
* normal async completion, which frees everything up.
*/
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
swp_pager_async_iodone(bp);
splx(s);
@@ -1732,7 +1762,8 @@ swp_pager_hash(vm_object_t object, vm_pindex_t index)
*
* This routine must be called at splvm(), except when used to convert
* an OBJT_DEFAULT object into an OBJT_SWAP object.
-
+ *
+ * Requires vm_mtx.
*/
static void
@@ -1744,6 +1775,7 @@ swp_pager_meta_build(
struct swblock *swap;
struct swblock **pswap;
+ mtx_assert(&vm_mtx, MA_OWNED);
/*
* Convert default object to swap object if necessary
*/
@@ -1830,12 +1862,16 @@ retry:
* out. This routine does *NOT* operate on swap metadata associated
* with resident pages.
*
+ * mv_mtx must be held
* This routine must be called at splvm()
*/
static void
swp_pager_meta_free(vm_object_t object, vm_pindex_t index, daddr_t count)
{
+
+ mtx_assert(&vm_mtx, MA_OWNED);
+
if (object->type != OBJT_SWAP)
return;
@@ -1875,6 +1911,7 @@ swp_pager_meta_free(vm_object_t object, vm_pindex_t index, daddr_t count)
* an object.
*
* This routine must be called at splvm()
+ * Requires vm_mtx.
*/
static void
@@ -1882,6 +1919,8 @@ swp_pager_meta_free_all(vm_object_t object)
{
daddr_t index = 0;
+ mtx_assert(&vm_mtx, MA_OWNED);
+
if (object->type != OBJT_SWAP)
return;
@@ -1930,6 +1969,7 @@ swp_pager_meta_free_all(vm_object_t object)
* busy page.
*
* This routine must be called at splvm().
+ * Requires vm_mtx.
*
* SWM_FREE remove and free swap block from metadata
* SWM_POP remove from meta data but do not free.. pop it out
@@ -2032,18 +2072,24 @@ vm_pager_chain_iodone(struct buf *nbp)
* Obtain a physical buffer and chain it to its parent buffer. When
* I/O completes, the parent buffer will be B_SIGNAL'd. Errors are
* automatically propagated to the parent
+ *
+ * vm_mtx can't be held
*/
struct buf *
getchainbuf(struct bio *bp, struct vnode *vp, int flags)
{
- struct buf *nbp = getpbuf(NULL);
- u_int *count = (u_int *)&(bp->bio_caller1);
+ struct buf *nbp;
+ u_int *count;
+
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
+ nbp = getpbuf(NULL);
+ count = (u_int *)&(bp->bio_caller1);
nbp->b_caller1 = bp;
++(*count);
- if (*count > 4)
+ if (*count > 4)
waitchainbuf(bp, 4, 0);
nbp->b_iocmd = bp->bio_cmd;
@@ -2063,6 +2109,9 @@ getchainbuf(struct bio *bp, struct vnode *vp, int flags)
void
flushchainbuf(struct buf *nbp)
{
+
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
if (nbp->b_bcount) {
nbp->b_bufsize = nbp->b_bcount;
if (nbp->b_iocmd == BIO_WRITE)
@@ -2072,14 +2121,19 @@ flushchainbuf(struct buf *nbp)
} else {
bufdone(nbp);
}
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
}
-void
+static void
waitchainbuf(struct bio *bp, int limit, int done)
{
int s;
- u_int *count = (u_int *)&(bp->bio_caller1);
+ u_int *count;
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
+ mtx_lock(&Giant);
+ count = (u_int *)&(bp->bio_caller1);
s = splbio();
while (*count > limit) {
bp->bio_flags |= BIO_FLAG1;
@@ -2092,6 +2146,7 @@ waitchainbuf(struct bio *bp, int limit, int done)
}
biodone(bp);
}
+ mtx_unlock(&Giant);
splx(s);
}
diff --git a/sys/vm/vm.h b/sys/vm/vm.h
index 38f04ac..5915b29 100644
--- a/sys/vm/vm.h
+++ b/sys/vm/vm.h
@@ -95,6 +95,10 @@ typedef struct vm_map *vm_map_t;
struct vm_object;
typedef struct vm_object *vm_object_t;
+#ifdef _KERNEL
+extern struct mtx vm_mtx;
+#endif
+
#ifndef _KERNEL
/*
* This is defined in <sys/types.h> for the kernel so that non-vm kernel
diff --git a/sys/vm/vm_fault.c b/sys/vm/vm_fault.c
index a1bad69..f31f12b 100644
--- a/sys/vm/vm_fault.c
+++ b/sys/vm/vm_fault.c
@@ -81,6 +81,8 @@
#include <sys/vnode.h>
#include <sys/resourcevar.h>
#include <sys/vmmeter.h>
+#include <sys/kernel.h>
+#include <sys/sysctl.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
@@ -134,6 +136,8 @@ unlock_map(struct faultstate *fs)
static void
_unlock_things(struct faultstate *fs, int dealloc)
{
+
+ mtx_assert(&vm_mtx, MA_OWNED);
vm_object_pip_wakeup(fs->object);
if (fs->object != fs->first_object) {
vm_page_free(fs->first_m);
@@ -145,8 +149,15 @@ _unlock_things(struct faultstate *fs, int dealloc)
}
unlock_map(fs);
if (fs->vp != NULL) {
- vput(fs->vp);
+ struct vnode *vp;
+
+ vp = fs->vp;
fs->vp = NULL;
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
+ vput(vp);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
}
}
@@ -179,10 +190,41 @@ _unlock_things(struct faultstate *fs, int dealloc)
*
*
* The map in question must be referenced, and remains so.
- * Caller may hold no locks.
+ * Caller may hold no locks except the vm_mtx which will be
+ * locked if needed.
*/
+static int vm_fault1 __P((vm_map_t, vm_offset_t, vm_prot_t, int));
+
+static int vm_faults_no_vm_mtx;
+SYSCTL_INT(_vm, OID_AUTO, vm_faults_no_vm_mtx, CTLFLAG_RW,
+ &vm_faults_no_vm_mtx, 0, "");
+
+static int vm_faults_no_giant;
+SYSCTL_INT(_vm, OID_AUTO, vm_faults_no_giant, CTLFLAG_RW,
+ &vm_faults_no_giant, 0, "");
+
int
-vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, int fault_flags)
+vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
+ int fault_flags)
+{
+ int hadvmlock, ret;
+
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock) {
+ mtx_lock(&vm_mtx);
+ vm_faults_no_vm_mtx++;
+ if (!mtx_owned(&Giant))
+ vm_faults_no_giant++;
+ }
+ ret = vm_fault1(map, vaddr, fault_type, fault_flags);
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
+ return (ret);
+}
+
+static int
+vm_fault1(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type,
+ int fault_flags)
{
vm_prot_t prot;
int result;
@@ -194,7 +236,8 @@ vm_fault(vm_map_t map, vm_offset_t vaddr, vm_prot_t fault_type, int fault_flags)
int faultcount;
struct faultstate fs;
- cnt.v_vm_faults++; /* needs lock XXX */
+ mtx_assert(&vm_mtx, MA_OWNED);
+ cnt.v_vm_faults++;
hardfault = 0;
RetryFault:;
@@ -251,7 +294,11 @@ RetryFault:;
vm_object_reference(fs.first_object);
vm_object_pip_add(fs.first_object, 1);
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
fs.vp = vnode_pager_lock(fs.first_object);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
if ((fault_type & VM_PROT_WRITE) &&
(fs.first_object->type == OBJT_VNODE)) {
vm_freeze_copyopts(fs.first_object,
@@ -723,7 +770,11 @@ readrest:
*/
if (fs.vp != NULL) {
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
vput(fs.vp);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
fs.vp = NULL;
}
@@ -940,6 +991,7 @@ vm_fault_user_wire(map, start, end)
register pmap_t pmap;
int rv;
+ mtx_assert(&vm_mtx, MA_OWNED);
pmap = vm_map_pmap(map);
/*
@@ -1112,6 +1164,9 @@ vm_fault_copy_entry(dst_map, src_map, dst_entry, src_entry)
*
* Return value:
* number of pages in marray
+ *
+ * This routine can't block.
+ * vm_mtx must be held.
*/
static int
vm_fault_additional_pages(m, rbehind, rahead, marray, reqpage)
@@ -1127,6 +1182,8 @@ vm_fault_additional_pages(m, rbehind, rahead, marray, reqpage)
vm_page_t rtm;
int cbehind, cahead;
+ mtx_assert(&vm_mtx, MA_OWNED);
+
object = m->object;
pindex = m->pindex;
diff --git a/sys/vm/vm_glue.c b/sys/vm/vm_glue.c
index a180ae3..37c580a 100644
--- a/sys/vm/vm_glue.c
+++ b/sys/vm/vm_glue.c
@@ -161,6 +161,7 @@ useracc(addr, len, rw)
|| (vm_offset_t) addr + len < (vm_offset_t) addr) {
return (FALSE);
}
+ mtx_lock(&vm_mtx);
map = &curproc->p_vmspace->vm_map;
vm_map_lock_read(map);
/*
@@ -172,6 +173,7 @@ useracc(addr, len, rw)
trunc_page((vm_offset_t)addr), round_page((vm_offset_t)addr + len), prot);
map->hint = save_hint;
vm_map_unlock_read(map);
+ mtx_unlock(&vm_mtx);
return (rv == TRUE);
}
@@ -181,8 +183,12 @@ vslock(addr, len)
caddr_t addr;
u_int len;
{
- vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page((vm_offset_t)addr),
+
+ mtx_lock(&vm_mtx);
+ vm_map_pageable(&curproc->p_vmspace->vm_map,
+ trunc_page((vm_offset_t)addr),
round_page((vm_offset_t)addr + len), FALSE);
+ mtx_unlock(&vm_mtx);
}
void
@@ -190,8 +196,12 @@ vsunlock(addr, len)
caddr_t addr;
u_int len;
{
- vm_map_pageable(&curproc->p_vmspace->vm_map, trunc_page((vm_offset_t)addr),
+
+ mtx_lock(&vm_mtx);
+ vm_map_pageable(&curproc->p_vmspace->vm_map,
+ trunc_page((vm_offset_t)addr),
round_page((vm_offset_t)addr + len), TRUE);
+ mtx_unlock(&vm_mtx);
}
/*
@@ -201,6 +211,8 @@ vsunlock(addr, len)
* machine-dependent layer to fill those in and make the new process
* ready to run. The new process is set up so that it returns directly
* to user mode to avoid stack copying and relocation problems.
+ *
+ * Called without vm_mtx.
*/
void
vm_fork(p1, p2, flags)
@@ -209,6 +221,7 @@ vm_fork(p1, p2, flags)
{
register struct user *up;
+ mtx_lock(&vm_mtx);
if ((flags & RFPROC) == 0) {
/*
* Divorce the memory, if it is shared, essentially
@@ -221,6 +234,7 @@ vm_fork(p1, p2, flags)
}
}
cpu_fork(p1, p2, flags);
+ mtx_unlock(&vm_mtx);
return;
}
@@ -275,6 +289,7 @@ vm_fork(p1, p2, flags)
* and make the child ready to run.
*/
cpu_fork(p1, p2, flags);
+ mtx_unlock(&vm_mtx);
}
/*
@@ -360,10 +375,13 @@ scheduler(dummy)
mtx_assert(&Giant, MA_OWNED | MA_NOTRECURSED);
loop:
+ mtx_lock(&vm_mtx);
if (vm_page_count_min()) {
VM_WAIT;
+ mtx_unlock(&vm_mtx);
goto loop;
}
+ mtx_unlock(&vm_mtx);
mtx_unlock(&Giant);
pp = NULL;
@@ -442,6 +460,9 @@ SYSCTL_INT(_vm, OID_AUTO, swap_idle_threshold2,
* If any procs have been sleeping/stopped for at least maxslp seconds,
* they are swapped. Else, we swap the longest-sleeping or stopped process,
* if any, otherwise the longest-resident process.
+ *
+ * Can block
+ * must be called with vm_mtx
*/
void
swapout_procs(action)
@@ -452,6 +473,8 @@ int action;
int outpri, outpri2;
int didswap = 0;
+ mtx_assert(&vm_mtx, MA_OWNED);
+ mtx_unlock(&vm_mtx);
outp = outp2 = NULL;
outpri = outpri2 = INT_MIN;
sx_slock(&allproc_lock);
@@ -465,6 +488,11 @@ retry:
PROC_UNLOCK(p);
continue;
}
+ /*
+ * only aiod changes vmspace, however it will be
+ * skipped because of the if statement above checking
+ * for P_SYSTEM
+ */
vm = p->p_vmspace;
mtx_lock_spin(&sched_lock);
if ((p->p_sflag & (PS_INMEM|PS_SWAPPING)) != PS_INMEM) {
@@ -516,6 +544,7 @@ retry:
}
mtx_unlock_spin(&sched_lock);
+ mtx_lock(&vm_mtx);
#if 0
/*
* XXX: This is broken. We release the lock we
@@ -531,7 +560,7 @@ retry:
*/
if (lockmgr(&vm->vm_map.lock,
LK_EXCLUSIVE | LK_NOWAIT,
- (void *)0, curproc)) {
+ NULL, curproc)) {
vmspace_free(vm);
PROC_UNLOCK(p);
continue;
@@ -548,8 +577,10 @@ retry:
swapout(p);
vmspace_free(vm);
didswap++;
+ mtx_unlock(&vm_mtx);
goto retry;
}
+ mtx_unlock(&vm_mtx);
PROC_UNLOCK(p);
}
}
@@ -558,6 +589,7 @@ retry:
* If we swapped something out, and another process needed memory,
* then wakeup the sched process.
*/
+ mtx_lock(&vm_mtx);
if (didswap)
wakeup(&proc0);
}
diff --git a/sys/vm/vm_init.c b/sys/vm/vm_init.c
index ae336e1..35e4676 100644
--- a/sys/vm/vm_init.c
+++ b/sys/vm/vm_init.c
@@ -73,6 +73,7 @@
#include <sys/lock.h>
#include <sys/proc.h>
#include <sys/systm.h>
+#include <sys/mutex.h>
#include <vm/vm.h>
#include <vm/vm_object.h>
@@ -96,16 +97,20 @@ SYSINIT(vm_mem, SI_SUB_VM, SI_ORDER_FIRST, vm_mem_init, NULL)
* The start and end address of physical memory is passed in.
*/
+struct mtx vm_mtx;
+
/* ARGSUSED*/
static void
vm_mem_init(dummy)
void *dummy;
{
+
/*
* Initializes resident memory structures. From here on, all physical
* memory is accounted for, and we use only virtual addresses.
*/
-
+ mtx_init(&vm_mtx, "vm", MTX_DEF);
+ mtx_lock(&vm_mtx);
vm_set_page_size();
virtual_avail = vm_page_startup(avail_start, avail_end, virtual_avail);
@@ -118,4 +123,5 @@ vm_mem_init(dummy)
kmem_init(virtual_avail, virtual_end);
pmap_init(avail_start, avail_end);
vm_pager_init();
+ mtx_unlock(&vm_mtx);
}
diff --git a/sys/vm/vm_kern.c b/sys/vm/vm_kern.c
index 14e4867..08ee486 100644
--- a/sys/vm/vm_kern.c
+++ b/sys/vm/vm_kern.c
@@ -106,11 +106,17 @@ kmem_alloc_pageable(map, size)
{
vm_offset_t addr;
int result;
+ int hadvmlock;
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
size = round_page(size);
addr = vm_map_min(map);
result = vm_map_find(map, NULL, (vm_offset_t) 0,
&addr, size, TRUE, VM_PROT_ALL, VM_PROT_ALL, 0);
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
if (result != KERN_SUCCESS) {
return (0);
}
@@ -131,10 +137,17 @@ kmem_alloc_nofault(map, size)
vm_offset_t addr;
int result;
+ int hadvmlock;
+
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
size = round_page(size);
addr = vm_map_min(map);
result = vm_map_find(map, NULL, (vm_offset_t) 0,
&addr, size, TRUE, VM_PROT_ALL, VM_PROT_ALL, MAP_NOFAULT);
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
if (result != KERN_SUCCESS) {
return (0);
}
@@ -153,8 +166,11 @@ kmem_alloc(map, size)
vm_offset_t addr;
vm_offset_t offset;
vm_offset_t i;
+ int hadvmlock;
- mtx_assert(&Giant, MA_OWNED);
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
size = round_page(size);
/*
@@ -170,6 +186,8 @@ kmem_alloc(map, size)
vm_map_lock(map);
if (vm_map_findspace(map, vm_map_min(map), size, &addr)) {
vm_map_unlock(map);
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
return (0);
}
offset = addr - VM_MIN_KERNEL_ADDRESS;
@@ -214,6 +232,8 @@ kmem_alloc(map, size)
(void) vm_map_pageable(map, (vm_offset_t) addr, addr + size, FALSE);
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
return (addr);
}
@@ -232,9 +252,16 @@ kmem_free(map, addr, size)
vm_offset_t addr;
vm_size_t size;
{
+ int hadvmlock;
+
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
- mtx_assert(&Giant, MA_OWNED);
(void) vm_map_remove(map, trunc_page(addr), round_page(addr + size));
+
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
}
/*
@@ -257,6 +284,11 @@ kmem_suballoc(parent, min, max, size)
{
int ret;
vm_map_t result;
+ int hadvmlock;
+
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
size = round_page(size);
@@ -274,6 +306,8 @@ kmem_suballoc(parent, min, max, size)
panic("kmem_suballoc: cannot create submap");
if (vm_map_submap(parent, *min, *max, result) != KERN_SUCCESS)
panic("kmem_suballoc: unable to change range to submap");
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
return (result);
}
@@ -308,10 +342,15 @@ kmem_malloc(map, size, flags)
vm_map_entry_t entry;
vm_offset_t addr;
vm_page_t m;
+ int hadvmlock;
if (map != kmem_map && map != mb_map)
panic("kmem_malloc: map != {kmem,mb}_map");
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
+
size = round_page(size);
addr = vm_map_min(map);
@@ -326,12 +365,12 @@ kmem_malloc(map, size, flags)
if (map == mb_map) {
mb_map_full = TRUE;
printf("Out of mbuf clusters - adjust NMBCLUSTERS or increase maxusers!\n");
- return (0);
+ goto bad;
}
if ((flags & M_NOWAIT) == 0)
panic("kmem_malloc(%ld): kmem_map too small: %ld total allocated",
(long)size, (long)map->size);
- return (0);
+ goto bad;
}
offset = addr - VM_MIN_KERNEL_ADDRESS;
vm_object_reference(kmem_object);
@@ -370,7 +409,7 @@ retry:
if (flags & M_ASLEEP) {
VM_AWAIT;
}
- return (0);
+ goto bad;
}
vm_page_flag_clear(m, PG_ZERO);
m->valid = VM_PAGE_BITS_ALL;
@@ -407,7 +446,14 @@ retry:
}
vm_map_unlock(map);
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
return (addr);
+
+bad:
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
+ return (0);
}
/*
@@ -425,6 +471,11 @@ kmem_alloc_wait(map, size)
vm_size_t size;
{
vm_offset_t addr;
+ int hadvmlock;
+
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
size = round_page(size);
@@ -439,13 +490,17 @@ kmem_alloc_wait(map, size)
/* no space now; see if we can ever get space */
if (vm_map_max(map) - vm_map_min(map) < size) {
vm_map_unlock(map);
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
return (0);
}
vm_map_unlock(map);
- tsleep(map, PVM, "kmaw", 0);
+ msleep(map, &vm_mtx, PVM, "kmaw", 0);
}
vm_map_insert(map, NULL, (vm_offset_t) 0, addr, addr + size, VM_PROT_ALL, VM_PROT_ALL, 0);
vm_map_unlock(map);
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
return (addr);
}
@@ -461,10 +516,17 @@ kmem_free_wakeup(map, addr, size)
vm_offset_t addr;
vm_size_t size;
{
+ int hadvmlock;
+
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
vm_map_lock(map);
(void) vm_map_delete(map, trunc_page(addr), round_page(addr + size));
wakeup(map);
vm_map_unlock(map);
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
}
/*
diff --git a/sys/vm/vm_map.c b/sys/vm/vm_map.c
index b33e9e4..d07d35b 100644
--- a/sys/vm/vm_map.c
+++ b/sys/vm/vm_map.c
@@ -200,6 +200,7 @@ vmspace_free(vm)
struct vmspace *vm;
{
+ mtx_assert(&vm_mtx, MA_OWNED);
if (vm->vm_refcnt == 0)
panic("vmspace_free: attempt to free already freed vmspace");
@@ -350,6 +351,8 @@ vm_map_entry_unlink(vm_map_t map,
* in the "entry" parameter. The boolean
* result indicates whether the address is
* actually contained in the map.
+ *
+ * Doesn't block.
*/
boolean_t
vm_map_lookup_entry(map, address, entry)
@@ -439,6 +442,7 @@ vm_map_insert(vm_map_t map, vm_object_t object, vm_ooffset_t offset,
vm_map_entry_t temp_entry;
vm_eflags_t protoeflags;
+ mtx_assert(&vm_mtx, MA_OWNED);
/*
* Check that the start and end points are not bogus.
*/
@@ -1705,7 +1709,9 @@ vm_map_clean(map, start, end, syncio, invalidate)
int flags;
vm_object_reference(object);
+ mtx_unlock(&vm_mtx);
vn_lock(object->handle, LK_EXCLUSIVE | LK_RETRY, curproc);
+ mtx_lock(&vm_mtx);
flags = (syncio || invalidate) ? OBJPC_SYNC : 0;
flags |= invalidate ? OBJPC_INVAL : 0;
vm_object_page_clean(object,
@@ -2296,6 +2302,8 @@ vm_map_stack (vm_map_t map, vm_offset_t addrbos, vm_size_t max_ssize,
* the stack. Also returns KERN_SUCCESS if addr is outside the
* stack range (this is strange, but preserves compatibility with
* the grow function in vm_machdep.c).
+ *
+ * Will grab vm_mtx if needed
*/
int
vm_map_growstack (struct proc *p, vm_offset_t addr)
@@ -2309,18 +2317,29 @@ vm_map_growstack (struct proc *p, vm_offset_t addr)
int grow_amount;
int rv;
int is_procstack;
+ int hadvmlock;
+
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
+#define myreturn(rval) do { \
+ if (!hadvmlock) \
+ mtx_unlock(&vm_mtx); \
+ return (rval); \
+} while (0)
+
Retry:
vm_map_lock_read(map);
/* If addr is already in the entry range, no need to grow.*/
if (vm_map_lookup_entry(map, addr, &prev_entry)) {
vm_map_unlock_read(map);
- return (KERN_SUCCESS);
+ myreturn (KERN_SUCCESS);
}
if ((stack_entry = prev_entry->next) == &map->header) {
vm_map_unlock_read(map);
- return (KERN_SUCCESS);
+ myreturn (KERN_SUCCESS);
}
if (prev_entry == &map->header)
end = stack_entry->start - stack_entry->avail_ssize;
@@ -2338,14 +2357,14 @@ Retry:
addr >= stack_entry->start ||
addr < stack_entry->start - stack_entry->avail_ssize) {
vm_map_unlock_read(map);
- return (KERN_SUCCESS);
+ myreturn (KERN_SUCCESS);
}
/* Find the minimum grow amount */
grow_amount = roundup (stack_entry->start - addr, PAGE_SIZE);
if (grow_amount > stack_entry->avail_ssize) {
vm_map_unlock_read(map);
- return (KERN_NO_SPACE);
+ myreturn (KERN_NO_SPACE);
}
/* If there is no longer enough space between the entries
@@ -2364,7 +2383,7 @@ Retry:
stack_entry->avail_ssize = stack_entry->start - end;
vm_map_unlock(map);
- return (KERN_NO_SPACE);
+ myreturn (KERN_NO_SPACE);
}
is_procstack = addr >= (vm_offset_t)vm->vm_maxsaddr;
@@ -2375,7 +2394,7 @@ Retry:
if (is_procstack && (ctob(vm->vm_ssize) + grow_amount >
p->p_rlimit[RLIMIT_STACK].rlim_cur)) {
vm_map_unlock_read(map);
- return (KERN_NO_SPACE);
+ myreturn (KERN_NO_SPACE);
}
/* Round up the grow amount modulo SGROWSIZ */
@@ -2427,8 +2446,8 @@ Retry:
}
vm_map_unlock(map);
- return (rv);
-
+ myreturn (rv);
+#undef myreturn
}
/*
@@ -2501,6 +2520,9 @@ vmspace_unshare(struct proc *p) {
* specified, the map may be changed to perform virtual
* copying operations, although the data referenced will
* remain the same.
+ *
+ * Can block locking maps and while calling vm_object_shadow().
+ * Will drop/reaquire the vm_mtx.
*/
int
vm_map_lookup(vm_map_t *var_map, /* IN/OUT */
@@ -2928,6 +2950,8 @@ vm_uiomove(mapa, srcobject, cp, cnta, uaddra, npages)
* Performs the copy_on_write operations necessary to allow the virtual copies
* into user space to work. This has to be called for write(2) system calls
* from other processes, file unlinking, and file size shrinkage.
+ *
+ * Requires that the vm_mtx is held
*/
void
vm_freeze_copyopts(object, froma, toa)
@@ -2938,6 +2962,7 @@ vm_freeze_copyopts(object, froma, toa)
vm_object_t robject;
vm_pindex_t idx;
+ mtx_assert(&vm_mtx, MA_OWNED);
if ((object == NULL) ||
((object->flags & OBJ_OPT) == 0))
return;
diff --git a/sys/vm/vm_map.h b/sys/vm/vm_map.h
index daf2b6e..241a80c 100644
--- a/sys/vm/vm_map.h
+++ b/sys/vm/vm_map.h
@@ -196,6 +196,7 @@ struct vmspace {
caddr_t vm_minsaddr; /* user VA at max stack growth */
};
+#ifdef _KERNEL
/*
* Macros: vm_map_lock, etc.
* Function:
@@ -211,6 +212,7 @@ struct vmspace {
do { \
lockmgr(&(map)->lock, LK_DRAIN|LK_INTERLOCK, \
&(map)->ref_lock, curproc); \
+ mtx_lock(&vm_mtx); \
(map)->timestamp++; \
} while(0)
@@ -225,27 +227,33 @@ struct vmspace {
#define vm_map_lock(map) \
do { \
vm_map_printf("locking map LK_EXCLUSIVE: %p\n", map); \
- if (lockmgr(&(map)->lock, LK_EXCLUSIVE, (void *)0, curproc) != 0) \
+ mtx_assert(&vm_mtx, MA_OWNED); \
+ if (lockmgr(&(map)->lock, LK_EXCLUSIVE | LK_INTERLOCK, \
+ &vm_mtx, curproc) != 0) \
panic("vm_map_lock: failed to get lock"); \
+ mtx_lock(&vm_mtx); \
(map)->timestamp++; \
} while(0)
#define vm_map_unlock(map) \
do { \
vm_map_printf("locking map LK_RELEASE: %p\n", map); \
- lockmgr(&(map)->lock, LK_RELEASE, (void *)0, curproc); \
+ lockmgr(&(map)->lock, LK_RELEASE, NULL, curproc); \
} while (0)
#define vm_map_lock_read(map) \
do { \
vm_map_printf("locking map LK_SHARED: %p\n", map); \
- lockmgr(&(map)->lock, LK_SHARED, (void *)0, curproc); \
+ mtx_assert(&vm_mtx, MA_OWNED); \
+ lockmgr(&(map)->lock, LK_SHARED | LK_INTERLOCK, \
+ &vm_mtx, curproc); \
+ mtx_lock(&vm_mtx); \
} while (0)
#define vm_map_unlock_read(map) \
do { \
vm_map_printf("locking map LK_RELEASE: %p\n", map); \
- lockmgr(&(map)->lock, LK_RELEASE, (void *)0, curproc); \
+ lockmgr(&(map)->lock, LK_RELEASE, NULL, curproc); \
} while (0)
static __inline__ int
@@ -253,7 +261,8 @@ _vm_map_lock_upgrade(vm_map_t map, struct proc *p) {
int error;
vm_map_printf("locking map LK_EXCLUPGRADE: %p\n", map);
- error = lockmgr(&map->lock, LK_EXCLUPGRADE, (void *)0, p);
+ error = lockmgr(&map->lock, LK_EXCLUPGRADE | LK_INTERLOCK, &vm_mtx, p);
+ mtx_lock(&vm_mtx);
if (error == 0)
map->timestamp++;
return error;
@@ -264,7 +273,7 @@ _vm_map_lock_upgrade(vm_map_t map, struct proc *p) {
#define vm_map_lock_downgrade(map) \
do { \
vm_map_printf("locking map LK_DOWNGRADE: %p\n", map); \
- lockmgr(&(map)->lock, LK_DOWNGRADE, (void *)0, curproc); \
+ lockmgr(&(map)->lock, LK_DOWNGRADE, NULL, curproc); \
} while (0)
#define vm_map_set_recursive(map) \
@@ -287,6 +296,7 @@ _vm_map_lock_upgrade(vm_map_t map, struct proc *p) {
#define vm_map_min(map) ((map)->min_offset)
#define vm_map_max(map) ((map)->max_offset)
#define vm_map_pmap(map) ((map)->pmap)
+#endif /* _KERNEL */
static __inline struct pmap *
vmspace_pmap(struct vmspace *vmspace)
diff --git a/sys/vm/vm_meter.c b/sys/vm/vm_meter.c
index 8dcb906..0f4e107 100644
--- a/sys/vm/vm_meter.c
+++ b/sys/vm/vm_meter.c
@@ -145,8 +145,10 @@ vmtotal(SYSCTL_HANDLER_ARGS)
/*
* Mark all objects as inactive.
*/
+ mtx_lock(&vm_mtx);
TAILQ_FOREACH(object, &vm_object_list, object_list)
vm_object_clear_flag(object, OBJ_ACTIVE);
+ mtx_unlock(&vm_mtx);
/*
* Calculate process statistics.
*/
@@ -197,6 +199,7 @@ vmtotal(SYSCTL_HANDLER_ARGS)
* Note active objects.
*/
paging = 0;
+ mtx_lock(&vm_mtx);
for (map = &p->p_vmspace->vm_map, entry = map->header.next;
entry != &map->header; entry = entry->next) {
if ((entry->eflags & MAP_ENTRY_IS_SUB_MAP) ||
@@ -205,6 +208,7 @@ vmtotal(SYSCTL_HANDLER_ARGS)
vm_object_set_flag(entry->object.vm_object, OBJ_ACTIVE);
paging |= entry->object.vm_object->paging_in_progress;
}
+ mtx_unlock(&vm_mtx);
if (paging)
totalp->t_pw++;
}
@@ -212,6 +216,7 @@ vmtotal(SYSCTL_HANDLER_ARGS)
/*
* Calculate object memory usage statistics.
*/
+ mtx_lock(&vm_mtx);
TAILQ_FOREACH(object, &vm_object_list, object_list) {
/*
* devices, like /dev/mem, will badly skew our totals
@@ -235,6 +240,7 @@ vmtotal(SYSCTL_HANDLER_ARGS)
}
}
totalp->t_free = cnt.v_free_count + cnt.v_cache_count;
+ mtx_unlock(&vm_mtx);
return (sysctl_handle_opaque(oidp, totalp, sizeof total, req));
}
diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c
index 14307b3..5de25d9 100644
--- a/sys/vm/vm_mmap.c
+++ b/sys/vm/vm_mmap.c
@@ -52,6 +52,7 @@
#include <sys/systm.h>
#include <sys/kernel.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <sys/sysproto.h>
#include <sys/filedesc.h>
#include <sys/proc.h>
@@ -515,14 +516,17 @@ msync(p, uap)
* the range of the map entry containing addr. This can be incorrect
* if the region splits or is coalesced with a neighbor.
*/
+ mtx_lock(&vm_mtx);
if (size == 0) {
vm_map_entry_t entry;
vm_map_lock_read(map);
rv = vm_map_lookup_entry(map, addr, &entry);
vm_map_unlock_read(map);
- if (rv == FALSE)
+ if (rv == FALSE) {
+ mtx_unlock(&vm_mtx);
return (EINVAL);
+ }
addr = entry->start;
size = entry->end - entry->start;
}
@@ -533,6 +537,7 @@ msync(p, uap)
rv = vm_map_clean(map, addr, addr + size, (flags & MS_ASYNC) == 0,
(flags & MS_INVALIDATE) != 0);
+ mtx_unlock(&vm_mtx);
switch (rv) {
case KERN_SUCCESS:
break;
@@ -589,10 +594,14 @@ munmap(p, uap)
/*
* Make sure entire range is allocated.
*/
- if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE))
+ mtx_lock(&vm_mtx);
+ if (!vm_map_check_protection(map, addr, addr + size, VM_PROT_NONE)) {
+ mtx_unlock(&vm_mtx);
return (EINVAL);
+ }
/* returns nothing but KERN_SUCCESS anyway */
(void) vm_map_remove(map, addr, addr + size);
+ mtx_unlock(&vm_mtx);
return (0);
}
@@ -624,6 +633,7 @@ mprotect(p, uap)
vm_offset_t addr;
vm_size_t size, pageoff;
register vm_prot_t prot;
+ int ret;
addr = (vm_offset_t) uap->addr;
size = uap->len;
@@ -640,8 +650,11 @@ mprotect(p, uap)
if (addr + size < addr)
return(EINVAL);
- switch (vm_map_protect(&p->p_vmspace->vm_map, addr, addr + size, prot,
- FALSE)) {
+ mtx_lock(&vm_mtx);
+ ret = vm_map_protect(&p->p_vmspace->vm_map, addr,
+ addr + size, prot, FALSE);
+ mtx_unlock(&vm_mtx);
+ switch (ret) {
case KERN_SUCCESS:
return (0);
case KERN_PROTECTION_FAILURE:
@@ -665,6 +678,7 @@ minherit(p, uap)
vm_offset_t addr;
vm_size_t size, pageoff;
register vm_inherit_t inherit;
+ int ret;
addr = (vm_offset_t)uap->addr;
size = uap->len;
@@ -677,8 +691,12 @@ minherit(p, uap)
if (addr + size < addr)
return(EINVAL);
- switch (vm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size,
- inherit)) {
+ mtx_lock(&vm_mtx);
+ ret = vm_map_inherit(&p->p_vmspace->vm_map, addr, addr+size,
+ inherit);
+ mtx_unlock(&vm_mtx);
+
+ switch (ret) {
case KERN_SUCCESS:
return (0);
case KERN_PROTECTION_FAILURE:
@@ -702,6 +720,7 @@ madvise(p, uap)
struct madvise_args *uap;
{
vm_offset_t start, end;
+ int ret;
/*
* Check for illegal behavior
@@ -729,9 +748,10 @@ madvise(p, uap)
start = trunc_page((vm_offset_t) uap->addr);
end = round_page((vm_offset_t) uap->addr + uap->len);
- if (vm_map_madvise(&p->p_vmspace->vm_map, start, end, uap->behav))
- return (EINVAL);
- return (0);
+ mtx_lock(&vm_mtx);
+ ret = vm_map_madvise(&p->p_vmspace->vm_map, start, end, uap->behav);
+ mtx_unlock(&vm_mtx);
+ return (ret ? EINVAL : 0);
}
#ifndef _SYS_SYSPROTO_H_
@@ -777,6 +797,7 @@ mincore(p, uap)
vec = uap->vec;
map = &p->p_vmspace->vm_map;
+ mtx_lock(&vm_mtx);
pmap = vmspace_pmap(p->p_vmspace);
vm_map_lock_read(map);
@@ -856,6 +877,7 @@ RestartScan:
* the map, we release the lock.
*/
vm_map_unlock_read(map);
+ mtx_unlock(&vm_mtx);
/*
* calculate index into user supplied byte vector
@@ -886,6 +908,7 @@ RestartScan:
* If the map has changed, due to the subyte, the previous
* output may be invalid.
*/
+ mtx_lock(&vm_mtx);
vm_map_lock_read(map);
if (timestamp != map->timestamp)
goto RestartScan;
@@ -900,6 +923,7 @@ RestartScan:
* the map, we release the lock.
*/
vm_map_unlock_read(map);
+ mtx_unlock(&vm_mtx);
/*
* Zero the last entries in the byte vector.
@@ -917,10 +941,12 @@ RestartScan:
* If the map has changed, due to the subyte, the previous
* output may be invalid.
*/
+ mtx_lock(&vm_mtx);
vm_map_lock_read(map);
if (timestamp != map->timestamp)
goto RestartScan;
vm_map_unlock_read(map);
+ mtx_unlock(&vm_mtx);
return (0);
}
@@ -965,7 +991,10 @@ mlock(p, uap)
return (error);
#endif
- error = vm_map_user_pageable(&p->p_vmspace->vm_map, addr, addr + size, FALSE);
+ mtx_lock(&vm_mtx);
+ error = vm_map_user_pageable(&p->p_vmspace->vm_map, addr,
+ addr + size, FALSE);
+ mtx_unlock(&vm_mtx);
return (error == KERN_SUCCESS ? 0 : ENOMEM);
}
@@ -1030,7 +1059,10 @@ munlock(p, uap)
return (error);
#endif
- error = vm_map_user_pageable(&p->p_vmspace->vm_map, addr, addr + size, TRUE);
+ mtx_lock(&vm_mtx);
+ error = vm_map_user_pageable(&p->p_vmspace->vm_map, addr,
+ addr + size, TRUE);
+ mtx_lock(&vm_mtx);
return (error == KERN_SUCCESS ? 0 : ENOMEM);
}
@@ -1077,7 +1109,9 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
if (*addr != trunc_page(*addr))
return (EINVAL);
fitit = FALSE;
+ mtx_lock(&vm_mtx);
(void) vm_map_remove(map, *addr, *addr + size);
+ mtx_unlock(&vm_mtx);
}
/*
@@ -1099,7 +1133,9 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
struct vattr vat;
int error;
+ mtx_lock(&Giant);
error = VOP_GETATTR(vp, &vat, p->p_ucred, p);
+ mtx_unlock(&Giant);
if (error)
return (error);
objsize = round_page(vat.va_size);
@@ -1148,6 +1184,7 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
maxprot |= VM_PROT_EXECUTE;
#endif
+ mtx_lock(&vm_mtx);
if (fitit) {
*addr = pmap_addr_hint(object, *addr, size);
}
@@ -1180,6 +1217,7 @@ vm_mmap(vm_map_t map, vm_offset_t *addr, vm_size_t size, vm_prot_t prot,
}
}
out:
+ mtx_unlock(&vm_mtx);
switch (rv) {
case KERN_SUCCESS:
return (0);
diff --git a/sys/vm/vm_object.c b/sys/vm/vm_object.c
index 33fe834..30ef190 100644
--- a/sys/vm/vm_object.c
+++ b/sys/vm/vm_object.c
@@ -146,6 +146,24 @@ _vm_object_allocate(type, size, object)
vm_object_t object;
{
int incr;
+ int hadvmlock;
+
+ /*
+ * XXX: Not all callers seem to have the lock, compensate.
+ * I'm pretty sure we need to bump the gen count before possibly
+ * nuking the data contained within while under the lock.
+ */
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
+ object->generation++;
+ if ((object->type == OBJT_DEFAULT) || (object->type == OBJT_SWAP))
+ vm_object_set_flag(object, OBJ_ONEMAPPING);
+ TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
+ vm_object_count++;
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
+
TAILQ_INIT(&object->memq);
TAILQ_INIT(&object->shadow_head);
@@ -153,8 +171,6 @@ _vm_object_allocate(type, size, object)
object->size = size;
object->ref_count = 1;
object->flags = 0;
- if ((object->type == OBJT_DEFAULT) || (object->type == OBJT_SWAP))
- vm_object_set_flag(object, OBJ_ONEMAPPING);
object->paging_in_progress = 0;
object->resident_page_count = 0;
object->shadow_count = 0;
@@ -175,10 +191,6 @@ _vm_object_allocate(type, size, object)
*/
object->hash_rand = object_hash_rand - 129;
- object->generation++;
-
- TAILQ_INSERT_TAIL(&vm_object_list, object, object_list);
- vm_object_count++;
object_hash_rand = object->hash_rand;
}
@@ -226,7 +238,6 @@ vm_object_allocate(type, size)
vm_object_t result;
result = (vm_object_t) zalloc(obj_zone);
-
_vm_object_allocate(type, size, result);
return (result);
@@ -250,18 +261,29 @@ vm_object_reference(object)
object->ref_count++;
if (object->type == OBJT_VNODE) {
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
while (vget((struct vnode *) object->handle, LK_RETRY|LK_NOOBJ, curproc)) {
printf("vm_object_reference: delay in getting object\n");
}
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
}
}
+/*
+ * handle deallocating a object of type OBJT_VNODE
+ *
+ * requires vm_mtx
+ * may block
+ */
void
vm_object_vndeallocate(object)
vm_object_t object;
{
struct vnode *vp = (struct vnode *) object->handle;
+ mtx_assert(&vm_mtx, MA_OWNED);
KASSERT(object->type == OBJT_VNODE,
("vm_object_vndeallocate: not a vnode object"));
KASSERT(vp != NULL, ("vm_object_vndeallocate: missing vp"));
@@ -277,7 +299,14 @@ vm_object_vndeallocate(object)
vp->v_flag &= ~VTEXT;
vm_object_clear_flag(object, OBJ_OPT);
}
+ /*
+ * vrele may need a vop lock
+ */
+ mtx_unlock(VM_OBJECT_MTX(object));
+ mtx_lock(&Giant);
vrele(vp);
+ mtx_unlock(&Giant);
+ mtx_lock(VM_OBJECT_MTX(object));
}
/*
@@ -290,6 +319,7 @@ vm_object_vndeallocate(object)
* may be relinquished.
*
* No object may be locked.
+ * vm_mtx must be held
*/
void
vm_object_deallocate(object)
@@ -297,6 +327,7 @@ vm_object_deallocate(object)
{
vm_object_t temp;
+ mtx_assert(&vm_mtx, MA_OWNED);
while (object != NULL) {
if (object->type == OBJT_VNODE) {
@@ -422,7 +453,11 @@ vm_object_terminate(object)
vm_object_page_clean(object, 0, 0, OBJPC_SYNC);
vp = (struct vnode *) object->handle;
+ mtx_unlock(VM_OBJECT_MTX(object));
+ mtx_lock(&Giant);
vinvalbuf(vp, V_SAVE, NOCRED, NULL, 0, 0);
+ mtx_unlock(&Giant);
+ mtx_lock(VM_OBJECT_MTX(object));
}
KASSERT(object->ref_count == 0,
@@ -507,6 +542,7 @@ vm_object_page_clean(object, start, end, flags)
vm_page_t ma[vm_pageout_page_count];
int curgeneration;
+ mtx_assert(VM_OBJECT_MTX(object), MA_OWNED);
if (object->type != OBJT_VNODE ||
(object->flags & OBJ_MIGHTBEDIRTY) == 0)
return;
@@ -962,6 +998,7 @@ vm_object_backing_scan(vm_object_t object, int op)
vm_pindex_t backing_offset_index;
s = splvm();
+ mtx_assert(&vm_mtx, MA_OWNED);
backing_object = object->backing_object;
backing_offset_index = OFF_TO_IDX(object->backing_object_offset);
@@ -1175,6 +1212,9 @@ void
vm_object_collapse(object)
vm_object_t object;
{
+
+ mtx_assert(&vm_mtx, MA_OWNED);
+
while (TRUE) {
vm_object_t backing_object;
@@ -1386,6 +1426,8 @@ vm_object_page_remove(object, start, end, clean_only)
unsigned int size;
int all;
+ mtx_assert(&vm_mtx, MA_OWNED);
+
if (object == NULL ||
object->resident_page_count == 0)
return;
@@ -1502,6 +1544,8 @@ vm_object_coalesce(prev_object, prev_pindex, prev_size, next_size)
{
vm_pindex_t next_pindex;
+ mtx_assert(&vm_mtx, MA_OWNED);
+
if (prev_object == NULL) {
return (TRUE);
}
diff --git a/sys/vm/vm_object.h b/sys/vm/vm_object.h
index ba4c026..2b29baf 100644
--- a/sys/vm/vm_object.h
+++ b/sys/vm/vm_object.h
@@ -169,34 +169,49 @@ extern vm_object_t kmem_object;
#ifdef _KERNEL
+/*
+ * For now a global vm lock.
+ */
+#define VM_OBJECT_MTX(object) (&vm_mtx)
+
static __inline void
vm_object_set_flag(vm_object_t object, u_short bits)
{
- atomic_set_short(&object->flags, bits);
+
+ mtx_assert(VM_OBJECT_MTX(object), MA_OWNED);
+ object->flags |= bits;
}
static __inline void
vm_object_clear_flag(vm_object_t object, u_short bits)
{
- atomic_clear_short(&object->flags, bits);
+
+ mtx_assert(VM_OBJECT_MTX(object), MA_OWNED);
+ object->flags &= ~bits;
}
static __inline void
vm_object_pip_add(vm_object_t object, short i)
{
- atomic_add_short(&object->paging_in_progress, i);
+
+ mtx_assert(VM_OBJECT_MTX(object), MA_OWNED);
+ object->paging_in_progress += i;
}
static __inline void
vm_object_pip_subtract(vm_object_t object, short i)
{
- atomic_subtract_short(&object->paging_in_progress, i);
+
+ mtx_assert(VM_OBJECT_MTX(object), MA_OWNED);
+ object->paging_in_progress -= i;
}
static __inline void
vm_object_pip_wakeup(vm_object_t object)
{
- atomic_subtract_short(&object->paging_in_progress, 1);
+
+ mtx_assert(VM_OBJECT_MTX(object), MA_OWNED);
+ object->paging_in_progress--;
if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) {
vm_object_clear_flag(object, OBJ_PIPWNT);
wakeup(object);
@@ -206,8 +221,10 @@ vm_object_pip_wakeup(vm_object_t object)
static __inline void
vm_object_pip_wakeupn(vm_object_t object, short i)
{
+
+ mtx_assert(VM_OBJECT_MTX(object), MA_OWNED);
if (i)
- atomic_subtract_short(&object->paging_in_progress, i);
+ object->paging_in_progress -= i;
if ((object->flags & OBJ_PIPWNT) && object->paging_in_progress == 0) {
vm_object_clear_flag(object, OBJ_PIPWNT);
wakeup(object);
@@ -217,11 +234,13 @@ vm_object_pip_wakeupn(vm_object_t object, short i)
static __inline void
vm_object_pip_sleep(vm_object_t object, char *waitid)
{
+
+ mtx_assert(VM_OBJECT_MTX(object), MA_OWNED);
if (object->paging_in_progress) {
int s = splvm();
if (object->paging_in_progress) {
vm_object_set_flag(object, OBJ_PIPWNT);
- tsleep(object, PVM, waitid, 0);
+ msleep(object, VM_OBJECT_MTX(object), PVM, waitid, 0);
}
splx(s);
}
@@ -230,6 +249,8 @@ vm_object_pip_sleep(vm_object_t object, char *waitid)
static __inline void
vm_object_pip_wait(vm_object_t object, char *waitid)
{
+
+ mtx_assert(VM_OBJECT_MTX(object), MA_OWNED);
while (object->paging_in_progress)
vm_object_pip_sleep(object, waitid);
}
diff --git a/sys/vm/vm_page.c b/sys/vm/vm_page.c
index 5865d70..2ae0fe7 100644
--- a/sys/vm/vm_page.c
+++ b/sys/vm/vm_page.c
@@ -71,6 +71,7 @@
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <sys/malloc.h>
#include <sys/proc.h>
#include <sys/vmmeter.h>
@@ -147,6 +148,7 @@ vm_set_page_size()
*
* Add a new page to the freelist for use by the system.
* Must be called at splhigh().
+ * Must be called with the vm_mtx held.
*/
vm_page_t
vm_add_new_page(pa)
@@ -154,6 +156,7 @@ vm_add_new_page(pa)
{
vm_page_t m;
+ mtx_assert(&vm_mtx, MA_OWNED);
++cnt.v_page_count;
++cnt.v_free_count;
m = PHYS_TO_VM_PAGE(pa);
@@ -360,6 +363,7 @@ vm_page_insert(m, object, pindex)
{
register struct vm_page **bucket;
+ mtx_assert(&vm_mtx, MA_OWNED);
if (m->object != NULL)
panic("vm_page_insert: already inserted");
@@ -419,6 +423,7 @@ vm_page_remove(m)
{
vm_object_t object;
+ mtx_assert(&vm_mtx, MA_OWNED);
if (m->object == NULL)
return;
@@ -482,6 +487,8 @@ vm_page_remove(m)
* an interrupt makes a change, but the generation algorithm will not
* operate properly in an SMP environment where both cpu's are able to run
* kernel code simultaneously.
+ * NOTE: under the giant vm lock we should be ok, there should be
+ * no reason to check vm_page_bucket_generation
*
* The object must be locked. No side effects.
* This routine may not block.
@@ -596,6 +603,8 @@ vm_page_unqueue(m)
{
int queue = m->queue;
struct vpgqueues *pq;
+
+ mtx_assert(&vm_mtx, MA_OWNED);
if (queue != PQ_NONE) {
m->queue = PQ_NONE;
pq = &vm_page_queues[queue];
@@ -636,6 +645,7 @@ _vm_page_list_find(basequeue, index)
vm_page_t m = NULL;
struct vpgqueues *pq;
+ mtx_assert(&vm_mtx, MA_OWNED);
pq = &vm_page_queues[basequeue];
/*
@@ -673,6 +683,7 @@ vm_page_select_cache(object, pindex)
{
vm_page_t m;
+ mtx_assert(&vm_mtx, MA_OWNED);
while (TRUE) {
m = vm_page_list_find(
PQ_CACHE,
@@ -724,7 +735,7 @@ vm_page_select_free(vm_object_t object, vm_pindex_t pindex, boolean_t prefer_zer
* VM_ALLOC_INTERRUPT interrupt time request
* VM_ALLOC_ZERO zero page
*
- * Object must be locked.
+ * vm_mtx must be locked.
* This routine may not block.
*
* Additional special handling is required when called from an
@@ -741,6 +752,7 @@ vm_page_alloc(object, pindex, page_req)
register vm_page_t m = NULL;
int s;
+ mtx_assert(&vm_mtx, MA_OWNED);
KASSERT(!vm_page_lookup(object, pindex),
("vm_page_alloc: page already allocated"));
@@ -873,13 +885,13 @@ vm_wait()
s = splvm();
if (curproc == pageproc) {
vm_pageout_pages_needed = 1;
- tsleep(&vm_pageout_pages_needed, PSWP, "VMWait", 0);
+ msleep(&vm_pageout_pages_needed, &vm_mtx, PSWP, "VMWait", 0);
} else {
if (!vm_pages_needed) {
vm_pages_needed = 1;
wakeup(&vm_pages_needed);
}
- tsleep(&cnt.v_free_count, PVM, "vmwait", 0);
+ msleep(&cnt.v_free_count, &vm_mtx, PVM, "vmwait", 0);
}
splx(s);
}
@@ -910,61 +922,6 @@ vm_await()
splx(s);
}
-#if 0
-/*
- * vm_page_sleep:
- *
- * Block until page is no longer busy.
- */
-
-int
-vm_page_sleep(vm_page_t m, char *msg, char *busy) {
- int slept = 0;
- if ((busy && *busy) || (m->flags & PG_BUSY)) {
- int s;
- s = splvm();
- if ((busy && *busy) || (m->flags & PG_BUSY)) {
- vm_page_flag_set(m, PG_WANTED);
- tsleep(m, PVM, msg, 0);
- slept = 1;
- }
- splx(s);
- }
- return slept;
-}
-
-#endif
-
-#if 0
-
-/*
- * vm_page_asleep:
- *
- * Similar to vm_page_sleep(), but does not block. Returns 0 if
- * the page is not busy, or 1 if the page is busy.
- *
- * This routine has the side effect of calling asleep() if the page
- * was busy (1 returned).
- */
-
-int
-vm_page_asleep(vm_page_t m, char *msg, char *busy) {
- int slept = 0;
- if ((busy && *busy) || (m->flags & PG_BUSY)) {
- int s;
- s = splvm();
- if ((busy && *busy) || (m->flags & PG_BUSY)) {
- vm_page_flag_set(m, PG_WANTED);
- asleep(m, PVM, msg, 0);
- slept = 1;
- }
- splx(s);
- }
- return slept;
-}
-
-#endif
-
/*
* vm_page_activate:
*
@@ -982,6 +939,7 @@ vm_page_activate(m)
int s;
s = splvm();
+ mtx_assert(&vm_mtx, MA_OWNED);
if (m->queue != PQ_ACTIVE) {
if ((m->queue - m->pc) == PQ_CACHE)
cnt.v_reactivated++;
@@ -1056,6 +1014,7 @@ vm_page_free_toq(vm_page_t m)
s = splvm();
+ mtx_assert(&vm_mtx, MA_OWNED);
cnt.v_tfree++;
if (m->busy || ((m->queue - m->pc) == PQ_FREE) ||
@@ -1293,6 +1252,7 @@ _vm_page_deactivate(vm_page_t m, int athead)
{
int s;
+ mtx_assert(&vm_mtx, MA_OWNED);
/*
* Ignore if already inactive.
*/
@@ -1330,6 +1290,8 @@ vm_page_deactivate(vm_page_t m)
int
vm_page_try_to_cache(vm_page_t m)
{
+
+ mtx_assert(VM_PAGE_MTX(m), MA_OWNED);
if (m->dirty || m->hold_count || m->busy || m->wire_count ||
(m->flags & (PG_BUSY|PG_UNMANAGED))) {
return(0);
@@ -1354,6 +1316,7 @@ vm_page_cache(m)
{
int s;
+ mtx_assert(&vm_mtx, MA_OWNED);
if ((m->flags & (PG_BUSY|PG_UNMANAGED)) || m->busy || m->wire_count) {
printf("vm_page_cache: attempting to cache busy page\n");
return;
@@ -1411,6 +1374,7 @@ vm_page_dontneed(m)
int dnw;
int head;
+ mtx_assert(&vm_mtx, MA_OWNED);
dnw = ++dnweight;
/*
@@ -1451,6 +1415,7 @@ vm_page_dontneed(m)
* to be in the object. If the page doesn't exist, allocate it.
*
* This routine may block.
+ * Requires vm_mtx.
*/
vm_page_t
vm_page_grab(object, pindex, allocflags)
@@ -1458,10 +1423,10 @@ vm_page_grab(object, pindex, allocflags)
vm_pindex_t pindex;
int allocflags;
{
-
vm_page_t m;
int s, generation;
+ mtx_assert(&vm_mtx, MA_OWNED);
retrylookup:
if ((m = vm_page_lookup(object, pindex)) != NULL) {
if (m->busy || (m->flags & PG_BUSY)) {
@@ -1471,7 +1436,7 @@ retrylookup:
while ((object->generation == generation) &&
(m->busy || (m->flags & PG_BUSY))) {
vm_page_flag_set(m, PG_WANTED | PG_REFERENCED);
- tsleep(m, PVM, "pgrbwt", 0);
+ msleep(m, &vm_mtx, PVM, "pgrbwt", 0);
if ((allocflags & VM_ALLOC_RETRY) == 0) {
splx(s);
return NULL;
@@ -1534,6 +1499,8 @@ vm_page_bits(int base, int size)
* This routine may not block.
*
* (base + size) must be less then or equal to PAGE_SIZE.
+ *
+ * vm_mtx needs to be held
*/
void
vm_page_set_validclean(m, base, size)
@@ -1545,6 +1512,7 @@ vm_page_set_validclean(m, base, size)
int frag;
int endoff;
+ mtx_assert(&vm_mtx, MA_OWNED);
if (size == 0) /* handle degenerate case */
return;
@@ -1618,6 +1586,8 @@ vm_page_clear_dirty(m, base, size)
int base;
int size;
{
+
+ mtx_assert(&vm_mtx, MA_OWNED);
m->dirty &= ~vm_page_bits(base, size);
}
@@ -1637,6 +1607,7 @@ vm_page_set_invalid(m, base, size)
{
int bits;
+ mtx_assert(&vm_mtx, MA_OWNED);
bits = vm_page_bits(base, size);
m->valid &= ~bits;
m->dirty &= ~bits;
@@ -1923,8 +1894,19 @@ contigmalloc(size, type, flags, low, high, alignment, boundary)
unsigned long alignment;
unsigned long boundary;
{
- return contigmalloc1(size, type, flags, low, high, alignment, boundary,
+ void * ret;
+ int hadvmlock;
+
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
+ ret = contigmalloc1(size, type, flags, low, high, alignment, boundary,
kernel_map);
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
+
+ return (ret);
+
}
void
@@ -1933,7 +1915,14 @@ contigfree(addr, size, type)
unsigned long size;
struct malloc_type *type;
{
+ int hadvmlock;
+
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
kmem_free(kernel_map, (vm_offset_t)addr, size);
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
}
vm_offset_t
@@ -1943,8 +1932,18 @@ vm_page_alloc_contig(size, low, high, alignment)
vm_offset_t high;
vm_offset_t alignment;
{
- return ((vm_offset_t)contigmalloc1(size, M_DEVBUF, M_NOWAIT, low, high,
+ vm_offset_t ret;
+ int hadvmlock;
+
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
+ ret = ((vm_offset_t)contigmalloc1(size, M_DEVBUF, M_NOWAIT, low, high,
alignment, 0ul, kernel_map));
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
+ return (ret);
+
}
#include "opt_ddb.h"
diff --git a/sys/vm/vm_page.h b/sys/vm/vm_page.h
index dc8290e..e1c1cc4 100644
--- a/sys/vm/vm_page.h
+++ b/sys/vm/vm_page.h
@@ -305,19 +305,28 @@ extern long first_page; /* first physical page number */
(&vm_page_array[atop(pa) - first_page ])
/*
+ * For now, a global vm lock
+ */
+#define VM_PAGE_MTX(m) (&vm_mtx)
+
+/*
* Functions implemented as macros
*/
static __inline void
vm_page_flag_set(vm_page_t m, unsigned short bits)
{
- atomic_set_short(&(m)->flags, bits);
+
+ mtx_assert(VM_PAGE_MTX(m), MA_OWNED);
+ m->flags |= bits;
}
static __inline void
vm_page_flag_clear(vm_page_t m, unsigned short bits)
{
- atomic_clear_short(&(m)->flags, bits);
+
+ mtx_assert(VM_PAGE_MTX(m), MA_OWNED);
+ m->flags &= ~bits;
}
#if 0
@@ -332,7 +341,9 @@ vm_page_assert_wait(vm_page_t m, int interruptible)
static __inline void
vm_page_busy(vm_page_t m)
{
- KASSERT((m->flags & PG_BUSY) == 0, ("vm_page_busy: page already busy!!!"));
+
+ KASSERT((m->flags & PG_BUSY) == 0,
+ ("vm_page_busy: page already busy!!!"));
vm_page_flag_set(m, PG_BUSY);
}
@@ -375,13 +386,17 @@ vm_page_wakeup(vm_page_t m)
static __inline void
vm_page_io_start(vm_page_t m)
{
- atomic_add_char(&(m)->busy, 1);
+
+ mtx_assert(VM_PAGE_MTX(m), MA_OWNED);
+ m->busy++;
}
static __inline void
vm_page_io_finish(vm_page_t m)
{
- atomic_subtract_char(&m->busy, 1);
+
+ mtx_assert(VM_PAGE_MTX(m), MA_OWNED);
+ m->busy--;
if (m->busy == 0)
vm_page_flash(m);
}
@@ -447,12 +462,16 @@ void vm_page_free_toq(vm_page_t m);
static __inline void
vm_page_hold(vm_page_t mem)
{
+
+ mtx_assert(VM_PAGE_MTX(m), MA_OWNED);
mem->hold_count++;
}
static __inline void
vm_page_unhold(vm_page_t mem)
{
+
+ mtx_assert(VM_PAGE_MTX(m), MA_OWNED);
--mem->hold_count;
KASSERT(mem->hold_count >= 0, ("vm_page_unhold: hold count < 0!!!"));
}
@@ -565,7 +584,7 @@ vm_page_sleep_busy(vm_page_t m, int also_m_busy, const char *msg)
* Page is busy. Wait and retry.
*/
vm_page_flag_set(m, PG_WANTED | PG_REFERENCED);
- tsleep(m, PVM, msg, 0);
+ msleep(m, VM_PAGE_MTX(m), PVM, msg, 0);
}
splx(s);
return(TRUE);
diff --git a/sys/vm/vm_pageout.c b/sys/vm/vm_pageout.c
index ce333cf..60e3f21 100644
--- a/sys/vm/vm_pageout.c
+++ b/sys/vm/vm_pageout.c
@@ -447,6 +447,8 @@ vm_pageout_flush(mc, count, flags)
* backing_objects.
*
* The object and map must be locked.
+ *
+ * Requires the vm_mtx
*/
static void
vm_pageout_object_deactivate_pages(map, object, desired, map_remove_only)
@@ -460,6 +462,7 @@ vm_pageout_object_deactivate_pages(map, object, desired, map_remove_only)
int remove_mode;
int s;
+ mtx_assert(&vm_mtx, MA_OWNED);
if (object->type == OBJT_DEVICE || object->type == OBJT_PHYS)
return;
@@ -1322,7 +1325,7 @@ vm_pageout()
{
int pass;
- mtx_lock(&Giant);
+ mtx_lock(&vm_mtx);
/*
* Initialize some paging parameters.
@@ -1412,7 +1415,8 @@ vm_pageout()
*/
++pass;
if (pass > 1)
- tsleep(&vm_pages_needed, PVM, "psleep", hz/2);
+ msleep(&vm_pages_needed, &vm_mtx, PVM,
+ "psleep", hz/2);
} else {
/*
* Good enough, sleep & handle stats. Prime the pass
@@ -1422,7 +1426,7 @@ vm_pageout()
pass = 1;
else
pass = 0;
- error = tsleep(&vm_pages_needed,
+ error = msleep(&vm_pages_needed, &vm_mtx,
PVM, "psleep", vm_pageout_stats_interval * hz);
if (error && !vm_pages_needed) {
splx(s);
@@ -1466,12 +1470,13 @@ vm_daemon()
{
struct proc *p;
- mtx_lock(&Giant);
+ mtx_lock(&vm_mtx);
while (TRUE) {
- tsleep(&vm_daemon_needed, PPAUSE, "psleep", 0);
+ msleep(&vm_daemon_needed, &vm_mtx, PPAUSE, "psleep", 0);
if (vm_pageout_req_swapout) {
swapout_procs(vm_pageout_req_swapout);
+ mtx_assert(&vm_mtx, MA_OWNED);
vm_pageout_req_swapout = 0;
}
/*
@@ -1479,6 +1484,7 @@ vm_daemon()
* process is swapped out -- deactivate pages
*/
+ mtx_unlock(&vm_mtx);
sx_slock(&allproc_lock);
LIST_FOREACH(p, &allproc, p_list) {
vm_pindex_t limit, size;
@@ -1515,13 +1521,16 @@ vm_daemon()
limit = 0; /* XXX */
mtx_unlock_spin(&sched_lock);
+ mtx_lock(&vm_mtx);
size = vmspace_resident_count(p->p_vmspace);
if (limit >= 0 && size >= limit) {
vm_pageout_map_deactivate_pages(
&p->p_vmspace->vm_map, limit);
}
+ mtx_unlock(&vm_mtx);
}
sx_sunlock(&allproc_lock);
+ mtx_lock(&vm_mtx);
}
}
#endif
diff --git a/sys/vm/vm_pager.c b/sys/vm/vm_pager.c
index b13c9c0..e53a14c 100644
--- a/sys/vm/vm_pager.c
+++ b/sys/vm/vm_pager.c
@@ -240,21 +240,32 @@ vm_pager_bufferinit()
* need to perform page-level validation (e.g. the device pager).
*/
vm_object_t
-vm_pager_allocate(objtype_t type, void *handle, vm_ooffset_t size, vm_prot_t prot,
- vm_ooffset_t off)
+vm_pager_allocate(objtype_t type, void *handle, vm_ooffset_t size,
+ vm_prot_t prot, vm_ooffset_t off)
{
+ vm_object_t ret;
struct pagerops *ops;
+ int hadvmlock;
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
ops = pagertab[type];
if (ops)
- return ((*ops->pgo_alloc) (handle, size, prot, off));
- return (NULL);
+ ret = (*ops->pgo_alloc) (handle, size, prot, off);
+ else
+ ret = NULL;
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
+ return (ret);
}
void
vm_pager_deallocate(object)
vm_object_t object;
{
+
+ mtx_assert(&vm_mtx, MA_OWNED);
(*pagertab[object->type]->pgo_dealloc) (object);
}
@@ -374,6 +385,8 @@ initpbuf(struct buf *bp)
*
* NOTE: pfreecnt can be NULL, but this 'feature' will be removed
* relatively soon when the rest of the subsystems get smart about it. XXX
+ *
+ * vm_mtx can be held or unheld
*/
struct buf *
getpbuf(pfreecnt)
@@ -381,8 +394,12 @@ getpbuf(pfreecnt)
{
int s;
struct buf *bp;
+ int hadvmlock;
s = splvm();
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (hadvmlock)
+ mtx_unlock(&vm_mtx);
mtx_lock(&pbuf_mtx);
for (;;) {
@@ -407,6 +424,8 @@ getpbuf(pfreecnt)
splx(s);
initpbuf(bp);
+ if (hadvmlock)
+ mtx_lock(&vm_mtx);
return bp;
}
diff --git a/sys/vm/vm_pager.h b/sys/vm/vm_pager.h
index f54c739..b4511ca 100644
--- a/sys/vm/vm_pager.h
+++ b/sys/vm/vm_pager.h
@@ -124,10 +124,12 @@ vm_pager_get_pages(
) {
int r;
+ mtx_assert(&vm_mtx, MA_OWNED);
r = (*pagertab[object->type]->pgo_getpages)(object, m, count, reqpage);
if (r == VM_PAGER_OK && m[reqpage]->valid != VM_PAGE_BITS_ALL) {
vm_page_zero_invalid(m[reqpage], TRUE);
}
+ mtx_assert(&vm_mtx, MA_OWNED);
return(r);
}
@@ -139,8 +141,11 @@ vm_pager_put_pages(
int flags,
int *rtvals
) {
+
+ mtx_assert(&vm_mtx, MA_OWNED);
(*pagertab[object->type]->pgo_putpages)
(object, m, count, flags, rtvals);
+ mtx_assert(&vm_mtx, MA_OWNED);
}
/*
@@ -161,7 +166,13 @@ vm_pager_has_page(
int *before,
int *after
) {
- return ((*pagertab[object->type]->pgo_haspage) (object, offset, before, after));
+ boolean_t ret;
+
+ mtx_assert(&vm_mtx, MA_OWNED);
+ ret = (*pagertab[object->type]->pgo_haspage)
+ (object, offset, before, after);
+ mtx_assert(&vm_mtx, MA_OWNED);
+ return (ret);
}
/*
@@ -175,8 +186,11 @@ vm_pager_has_page(
static __inline void
vm_pager_page_unswapped(vm_page_t m)
{
+
+ mtx_assert(&vm_mtx, MA_OWNED);
if (pagertab[m->object->type]->pgo_pageunswapped)
(*pagertab[m->object->type]->pgo_pageunswapped)(m);
+ mtx_assert(&vm_mtx, MA_OWNED);
}
#endif
diff --git a/sys/vm/vm_unix.c b/sys/vm/vm_unix.c
index f9b24f8..4861306 100644
--- a/sys/vm/vm_unix.c
+++ b/sys/vm/vm_unix.c
@@ -49,6 +49,9 @@
#include <sys/sysproto.h>
#include <sys/proc.h>
#include <sys/resourcevar.h>
+#include <sys/lock.h>
+#include <sys/mutex.h>
+#include <sys/systm.h>
#include <vm/vm.h>
#include <vm/vm_param.h>
@@ -93,6 +96,7 @@ obreak(p, uap)
return EINVAL;
}
+ mtx_lock(&vm_mtx);
if (new > old) {
vm_size_t diff;
@@ -100,16 +104,19 @@ obreak(p, uap)
rv = vm_map_find(&vm->vm_map, NULL, 0, &old, diff, FALSE,
VM_PROT_ALL, VM_PROT_ALL, 0);
if (rv != KERN_SUCCESS) {
+ mtx_unlock(&vm_mtx);
return (ENOMEM);
}
vm->vm_dsize += btoc(diff);
} else if (new < old) {
rv = vm_map_remove(&vm->vm_map, new, old);
if (rv != KERN_SUCCESS) {
+ mtx_unlock(&vm_mtx);
return (ENOMEM);
}
vm->vm_dsize -= btoc(old - new);
}
+ mtx_unlock(&vm_mtx);
return (0);
}
diff --git a/sys/vm/vm_zone.c b/sys/vm/vm_zone.c
index 4cddadc..30fadbe 100644
--- a/sys/vm/vm_zone.c
+++ b/sys/vm/vm_zone.c
@@ -137,6 +137,7 @@ zinitna(vm_zone_t z, vm_object_t obj, char *name, int size,
* in pages as needed.
*/
if (z->zflags & ZONE_INTERRUPT) {
+ int hadvmlock;
totsize = round_page(z->zsize * nentries);
atomic_add_int(&zone_kmem_kvaspace, totsize);
@@ -145,12 +146,17 @@ zinitna(vm_zone_t z, vm_object_t obj, char *name, int size,
return 0;
z->zpagemax = totsize / PAGE_SIZE;
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
if (obj == NULL) {
z->zobj = vm_object_allocate(OBJT_DEFAULT, z->zpagemax);
} else {
z->zobj = obj;
_vm_object_allocate(OBJT_DEFAULT, z->zpagemax, obj);
}
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
z->zallocflag = VM_ALLOC_INTERRUPT;
z->zmax += nentries;
} else {
@@ -262,7 +268,6 @@ _zget(vm_zone_t z)
void *item;
KASSERT(z != NULL, ("invalid zone"));
- mtx_assert(&z->zmtx, MA_OWNED);
if (z->zflags & ZONE_INTERRUPT) {
item = (char *) z->zkva + z->zpagecount * PAGE_SIZE;
@@ -299,16 +304,13 @@ _zget(vm_zone_t z)
* We can wait, so just do normal map allocation in the appropriate
* map.
*/
+ mtx_unlock(&z->zmtx);
if (lockstatus(&kernel_map->lock, NULL)) {
- mtx_unlock(&z->zmtx);
item = (void *) kmem_malloc(kmem_map, nbytes, M_WAITOK);
- mtx_lock(&z->zmtx);
if (item != NULL)
atomic_add_int(&zone_kmem_pages, z->zalloc);
} else {
- mtx_unlock(&z->zmtx);
item = (void *) kmem_alloc(kernel_map, nbytes);
- mtx_lock(&z->zmtx);
if (item != NULL)
atomic_add_int(&zone_kern_pages, z->zalloc);
}
@@ -318,6 +320,7 @@ _zget(vm_zone_t z)
nbytes = 0;
}
nitems = nbytes / z->zsize;
+ mtx_lock(&z->zmtx);
}
z->ztotal += nitems;
@@ -361,14 +364,17 @@ void *
zalloc(vm_zone_t z)
{
void *item;
+ int hadvmlock;
KASSERT(z != NULL, ("invalid zone"));
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
mtx_lock(&z->zmtx);
if (z->zfreecnt <= z->zfreemin) {
item = _zget(z);
- mtx_unlock(&z->zmtx);
- return item;
+ goto out;
}
item = z->zitems;
@@ -381,8 +387,11 @@ zalloc(vm_zone_t z)
z->zfreecnt--;
z->znalloc++;
-
+
+out:
mtx_unlock(&z->zmtx);
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
return item;
}
@@ -392,8 +401,13 @@ zalloc(vm_zone_t z)
void
zfree(vm_zone_t z, void *item)
{
+ int hadvmlock;
+
KASSERT(z != NULL, ("invalid zone"));
KASSERT(item != NULL, ("invalid item"));
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
mtx_lock(&z->zmtx);
((void **) item)[0] = z->zitems;
@@ -405,6 +419,8 @@ zfree(vm_zone_t z, void *item)
z->zitems = item;
z->zfreecnt++;
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
mtx_unlock(&z->zmtx);
}
diff --git a/sys/vm/vnode_pager.c b/sys/vm/vnode_pager.c
index e9400b8..12763c8 100644
--- a/sys/vm/vnode_pager.c
+++ b/sys/vm/vnode_pager.c
@@ -103,6 +103,7 @@ vnode_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
vm_object_t object;
struct vnode *vp;
+ mtx_assert(&vm_mtx, MA_OWNED);
/*
* Pageout to vnode, no can do yet.
*/
@@ -122,11 +123,15 @@ vnode_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
* Prevent race condition when allocating the object. This
* can happen with NFS vnodes since the nfsnode isn't locked.
*/
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
while (vp->v_flag & VOLOCK) {
vp->v_flag |= VOWANT;
tsleep(vp, PVM, "vnpobj", 0);
}
vp->v_flag |= VOLOCK;
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
/*
* If the object is being terminated, wait for it to
@@ -134,7 +139,7 @@ vnode_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
*/
while (((object = vp->v_object) != NULL) &&
(object->flags & OBJ_DEAD)) {
- tsleep(object, PVM, "vadead", 0);
+ msleep(object, &vm_mtx, PVM, "vadead", 0);
}
if (vp->v_usecount == 0)
@@ -157,11 +162,15 @@ vnode_pager_alloc(void *handle, vm_ooffset_t size, vm_prot_t prot,
vp->v_usecount++;
}
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
vp->v_flag &= ~VOLOCK;
if (vp->v_flag & VOWANT) {
vp->v_flag &= ~VOWANT;
wakeup(vp);
}
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
return (object);
}
@@ -221,8 +230,12 @@ vnode_pager_haspage(object, pindex, before, after)
blocksperpage = (PAGE_SIZE / bsize);
reqblock = pindex * blocksperpage;
}
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
err = VOP_BMAP(vp, reqblock, (struct vnode **) 0, &bn,
after, before);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
if (err)
return TRUE;
if ( bn == -1)
@@ -285,6 +298,11 @@ vnode_pager_setsize(vp, nsize)
* File has shrunk. Toss any cached pages beyond the new EOF.
*/
if (nsize < object->un_pager.vnp.vnp_size) {
+ int hadvmlock;
+
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
vm_freeze_copyopts(object, OFF_TO_IDX(nsize), object->size);
if (nobjsize < object->size) {
vm_object_page_remove(object, nobjsize, object->size,
@@ -325,6 +343,8 @@ vnode_pager_setsize(vp, nsize)
m->dirty = VM_PAGE_BITS_ALL;
}
}
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
}
object->un_pager.vnp.vnp_size = nsize;
object->size = nobjsize;
@@ -542,8 +562,8 @@ vnode_pager_input_old(object, m)
*/
/*
- * EOPNOTSUPP is no longer legal. For local media VFS's that do not
- * implement their own VOP_GETPAGES, their VOP_GETPAGES should call to
+ * Local media VFS's that do not implement their own VOP_GETPAGES
+ * should have their VOP_GETPAGES should call to
* vnode_pager_generic_getpages() to implement the previous behaviour.
*
* All other FS's should use the bypass to get to the local media
@@ -560,16 +580,11 @@ vnode_pager_getpages(object, m, count, reqpage)
struct vnode *vp;
int bytes = count * PAGE_SIZE;
+ mtx_assert(&vm_mtx, MA_OWNED);
vp = object->handle;
- /*
- * XXX temporary diagnostic message to help track stale FS code,
- * Returning EOPNOTSUPP from here may make things unhappy.
- */
rtval = VOP_GETPAGES(vp, m, bytes, reqpage, 0);
- if (rtval == EOPNOTSUPP) {
- printf("vnode_pager: *** WARNING *** stale FS getpages\n");
- rtval = vnode_pager_generic_getpages( vp, m, bytes, reqpage);
- }
+ KASSERT(rtval != EOPNOTSUPP,
+ ("vnode_pager: FS getpages not implemented\n"));
return rtval;
}
@@ -891,13 +906,19 @@ vnode_pager_putpages(object, m, count, sync, rtvals)
vp = object->handle;
if (vp->v_type != VREG)
mp = NULL;
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
(void)vn_start_write(vp, &mp, V_WAIT);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
rtval = VOP_PUTPAGES(vp, m, bytes, sync, rtvals, 0);
- if (rtval == EOPNOTSUPP) {
- printf("vnode_pager: *** WARNING *** stale FS putpages\n");
- rtval = vnode_pager_generic_putpages( vp, m, bytes, sync, rtvals);
- }
+ KASSERT(rtval != EOPNOTSUPP,
+ ("vnode_pager: stale FS putpages\n"));
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
vn_finished_write(mp);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
}
@@ -1000,6 +1021,8 @@ vnode_pager_lock(object)
{
struct proc *p = curproc; /* XXX */
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
+ mtx_assert(&Giant, MA_OWNED);
for (; object != NULL; object = object->backing_object) {
if (object->type != OBJT_VNODE)
continue;
OpenPOWER on IntegriCloud