summaryrefslogtreecommitdiffstats
path: root/sys/kern
diff options
context:
space:
mode:
Diffstat (limited to 'sys/kern')
-rw-r--r--sys/kern/imgact_aout.c8
-rw-r--r--sys/kern/imgact_elf.c21
-rw-r--r--sys/kern/init_main.c4
-rw-r--r--sys/kern/kern_exec.c7
-rw-r--r--sys/kern/kern_exit.c2
-rw-r--r--sys/kern/kern_fork.c2
-rw-r--r--sys/kern/kern_resource.c2
-rw-r--r--sys/kern/kern_synch.c7
-rw-r--r--sys/kern/link_elf.c8
-rw-r--r--sys/kern/link_elf_obj.c8
-rw-r--r--sys/kern/subr_blist.c1
-rw-r--r--sys/kern/subr_trap.c13
-rw-r--r--sys/kern/sys_pipe.c11
-rw-r--r--sys/kern/syscalls.master34
-rw-r--r--sys/kern/sysv_shm.c6
-rw-r--r--sys/kern/vfs_bio.c105
-rw-r--r--sys/kern/vfs_cluster.c10
-rw-r--r--sys/kern/vfs_default.c16
-rw-r--r--sys/kern/vfs_extattr.c7
-rw-r--r--sys/kern/vfs_subr.c15
-rw-r--r--sys/kern/vfs_syscalls.c7
21 files changed, 259 insertions, 35 deletions
diff --git a/sys/kern/imgact_aout.c b/sys/kern/imgact_aout.c
index 9478eb3..8becda3 100644
--- a/sys/kern/imgact_aout.c
+++ b/sys/kern/imgact_aout.c
@@ -171,6 +171,7 @@ exec_aout_imgact(imgp)
if (error)
return (error);
+ mtx_lock(&vm_mtx);
/*
* Destroy old process VM and create a new one (with a new stack)
*/
@@ -184,7 +185,9 @@ exec_aout_imgact(imgp)
vp = imgp->vp;
map = &vmspace->vm_map;
vm_map_lock(map);
+ mtx_unlock(&vm_mtx);
VOP_GETVOBJECT(vp, &object);
+ mtx_lock(&vm_mtx);
vm_object_reference(object);
text_end = virtual_offset + a_out->a_text;
@@ -195,6 +198,7 @@ exec_aout_imgact(imgp)
MAP_COPY_ON_WRITE | MAP_PREFAULT);
if (error) {
vm_map_unlock(map);
+ mtx_unlock(&vm_mtx);
return (error);
}
data_end = text_end + a_out->a_data;
@@ -207,6 +211,7 @@ exec_aout_imgact(imgp)
MAP_COPY_ON_WRITE | MAP_PREFAULT);
if (error) {
vm_map_unlock(map);
+ mtx_unlock(&vm_mtx);
return (error);
}
}
@@ -217,6 +222,7 @@ exec_aout_imgact(imgp)
VM_PROT_ALL, VM_PROT_ALL, 0);
if (error) {
vm_map_unlock(map);
+ mtx_unlock(&vm_mtx);
return (error);
}
}
@@ -229,6 +235,8 @@ exec_aout_imgact(imgp)
vmspace->vm_daddr = (caddr_t) (uintptr_t)
(virtual_offset + a_out->a_text);
+ mtx_unlock(&vm_mtx);
+
/* Fill in image_params */
imgp->interpreted = 0;
imgp->entry_addr = a_out->a_entry;
diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c
index da7b9cb..2a15e9c 100644
--- a/sys/kern/imgact_elf.c
+++ b/sys/kern/imgact_elf.c
@@ -230,6 +230,7 @@ elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_o
else
map_len = round_page(offset+filsz) - file_addr;
+ mtx_lock(&vm_mtx);
if (map_len != 0) {
vm_object_reference(object);
vm_map_lock(&vmspace->vm_map);
@@ -244,12 +245,15 @@ elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_o
vm_map_unlock(&vmspace->vm_map);
if (rv != KERN_SUCCESS) {
vm_object_deallocate(object);
+ mtx_unlock(&vm_mtx);
return EINVAL;
}
/* we can stop now if we've covered it all */
- if (memsz == filsz)
+ if (memsz == filsz) {
+ mtx_unlock(&vm_mtx);
return 0;
+ }
}
@@ -270,8 +274,10 @@ elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_o
map_addr, map_addr + map_len,
VM_PROT_ALL, VM_PROT_ALL, 0);
vm_map_unlock(&vmspace->vm_map);
- if (rv != KERN_SUCCESS)
+ if (rv != KERN_SUCCESS) {
+ mtx_unlock(&vm_mtx);
return EINVAL;
+ }
}
if (copy_len != 0) {
@@ -287,14 +293,19 @@ elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_o
MAP_COPY_ON_WRITE | MAP_PREFAULT_PARTIAL);
if (rv != KERN_SUCCESS) {
vm_object_deallocate(object);
+ mtx_unlock(&vm_mtx);
return EINVAL;
}
/* send the page fragment to user space */
+ mtx_unlock(&vm_mtx);
error = copyout((caddr_t)data_buf, (caddr_t)map_addr, copy_len);
+ mtx_lock(&vm_mtx);
vm_map_remove(exec_map, data_buf, data_buf + PAGE_SIZE);
- if (error)
+ if (error) {
+ mtx_unlock(&vm_mtx);
return (error);
+ }
}
/*
@@ -303,6 +314,7 @@ elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_o
vm_map_protect(&vmspace->vm_map, map_addr, map_addr + map_len, prot,
FALSE);
+ mtx_unlock(&vm_mtx);
return error;
}
@@ -498,9 +510,11 @@ exec_elf_imgact(struct image_params *imgp)
if ((error = exec_extract_strings(imgp)) != 0)
goto fail;
+ mtx_lock(&vm_mtx);
exec_new_vmspace(imgp);
vmspace = imgp->proc->p_vmspace;
+ mtx_unlock(&vm_mtx);
for (i = 0; i < hdr->e_phnum; i++) {
switch(phdr[i].p_type) {
@@ -557,6 +571,7 @@ exec_elf_imgact(struct image_params *imgp)
}
}
+ /* XXX: lock the vm_mtx when twiddling vmspace? */
vmspace->vm_tsize = text_size >> PAGE_SHIFT;
vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr;
vmspace->vm_dsize = data_size >> PAGE_SHIFT;
diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c
index f1a6a0b..6f5c653 100644
--- a/sys/kern/init_main.c
+++ b/sys/kern/init_main.c
@@ -485,11 +485,15 @@ start_init(void *dummy)
* Need just enough stack to hold the faked-up "execve()" arguments.
*/
addr = trunc_page(USRSTACK - PAGE_SIZE);
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
if (vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &addr, PAGE_SIZE,
FALSE, VM_PROT_ALL, VM_PROT_ALL, 0) != 0)
panic("init: couldn't allocate argument space");
p->p_vmspace->vm_maxsaddr = (caddr_t)addr;
p->p_vmspace->vm_ssize = 1;
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
if ((var = getenv("init_path")) != NULL) {
strncpy(init_path, var, sizeof init_path);
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index 0b1b29e..8f49538 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -412,6 +412,7 @@ exec_map_first_page(imgp)
VOP_GETVOBJECT(imgp->vp, &object);
s = splvm();
+ mtx_lock(&vm_mtx);
ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL | VM_ALLOC_RETRY);
@@ -443,6 +444,7 @@ exec_map_first_page(imgp)
vm_page_free(ma[0]);
}
splx(s);
+ mtx_unlock(&vm_mtx);
return EIO;
}
}
@@ -454,6 +456,7 @@ exec_map_first_page(imgp)
pmap_kenter((vm_offset_t) imgp->image_header, VM_PAGE_TO_PHYS(ma[0]));
imgp->firstpage = ma[0];
+ mtx_unlock(&vm_mtx);
return 0;
}
@@ -461,9 +464,12 @@ void
exec_unmap_first_page(imgp)
struct image_params *imgp;
{
+
if (imgp->firstpage) {
+ mtx_lock(&vm_mtx);
pmap_kremove((vm_offset_t) imgp->image_header);
vm_page_unwire(imgp->firstpage, 1);
+ mtx_unlock(&vm_mtx);
imgp->firstpage = NULL;
}
}
@@ -482,6 +488,7 @@ exec_new_vmspace(imgp)
caddr_t stack_addr = (caddr_t) (USRSTACK - MAXSSIZ);
vm_map_t map = &vmspace->vm_map;
+ mtx_assert(&vm_mtx, MA_OWNED);
imgp->vmspace_destroyed = 1;
/*
diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c
index d5dccab..1af27d2 100644
--- a/sys/kern/kern_exit.c
+++ b/sys/kern/kern_exit.c
@@ -222,6 +222,7 @@ exit1(p, rv)
* Can't free the entire vmspace as the kernel stack
* may be mapped within that space also.
*/
+ mtx_lock(&vm_mtx);
if (vm->vm_refcnt == 1) {
if (vm->vm_shm)
shmexit(p);
@@ -230,6 +231,7 @@ exit1(p, rv)
(void) vm_map_remove(&vm->vm_map, VM_MIN_ADDRESS,
VM_MAXUSER_ADDRESS);
}
+ mtx_unlock(&vm_mtx);
PROC_LOCK(p);
if (SESS_LEADER(p)) {
diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c
index d3b991d..62dcc06 100644
--- a/sys/kern/kern_fork.c
+++ b/sys/kern/kern_fork.c
@@ -220,6 +220,7 @@ fork1(p1, flags, procp)
if ((flags & RFPROC) == 0) {
vm_fork(p1, 0, flags);
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
/*
* Close all file descriptors.
@@ -567,6 +568,7 @@ again:
* execution path later. (ie: directly into user mode)
*/
vm_fork(p1, p2, flags);
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
if (flags == (RFFDG | RFPROC)) {
cnt.v_forks++;
diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c
index 27431ab..f46313c 100644
--- a/sys/kern/kern_resource.c
+++ b/sys/kern/kern_resource.c
@@ -498,8 +498,10 @@ dosetrlimit(p, which, limp)
}
addr = trunc_page(addr);
size = round_page(size);
+ mtx_lock(&vm_mtx);
(void) vm_map_protect(&p->p_vmspace->vm_map,
addr, addr+size, prot, FALSE);
+ mtx_unlock(&vm_mtx);
}
break;
diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c
index 7d793de..e09a377 100644
--- a/sys/kern/kern_synch.c
+++ b/sys/kern/kern_synch.c
@@ -378,6 +378,13 @@ msleep(ident, mtx, priority, wmesg, timo)
int rval = 0;
WITNESS_SAVE_DECL(mtx);
+ KASSERT(ident == &proc0 || /* XXX: swapper */
+ timo != 0 || /* XXX: we might still miss a wakeup */
+ mtx_owned(&Giant) || mtx != NULL,
+ ("indefinite sleep without mutex, wmesg: \"%s\" ident: %p",
+ wmesg, ident));
+ if (mtx_owned(&vm_mtx) && mtx != &vm_mtx)
+ panic("sleeping with vm_mtx held.");
#ifdef KTRACE
if (p && KTRPOINT(p, KTR_CSW))
ktrcsw(p->p_tracep, 1, 0);
diff --git a/sys/kern/link_elf.c b/sys/kern/link_elf.c
index 344f163..613d1e4 100644
--- a/sys/kern/link_elf.c
+++ b/sys/kern/link_elf.c
@@ -653,8 +653,10 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu
ef = (elf_file_t) lf;
#ifdef SPARSE_MAPPING
+ mtx_lock(&vm_mtx);
ef->object = vm_object_allocate(OBJT_DEFAULT, mapsize >> PAGE_SHIFT);
if (ef->object == NULL) {
+ mtx_unlock(&vm_mtx);
free(ef, M_LINKER);
error = ENOMEM;
goto out;
@@ -667,9 +669,11 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu
VM_PROT_ALL, VM_PROT_ALL, 0);
if (error) {
vm_object_deallocate(ef->object);
+ mtx_unlock(&vm_mtx);
ef->object = 0;
goto out;
}
+ mtx_unlock(&vm_mtx);
#else
ef->address = malloc(mapsize, M_LINKER, M_WAITOK);
if (!ef->address) {
@@ -697,10 +701,12 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu
/*
* Wire down the pages
*/
+ mtx_lock(&vm_mtx);
vm_map_pageable(kernel_map,
(vm_offset_t) segbase,
(vm_offset_t) segbase + segs[i]->p_memsz,
FALSE);
+ mtx_unlock(&vm_mtx);
#endif
}
@@ -824,10 +830,12 @@ link_elf_unload_file(linker_file_t file)
}
#ifdef SPARSE_MAPPING
if (ef->object) {
+ mtx_lock(&vm_mtx);
vm_map_remove(kernel_map, (vm_offset_t) ef->address,
(vm_offset_t) ef->address
+ (ef->object->size << PAGE_SHIFT));
vm_object_deallocate(ef->object);
+ mtx_unlock(&vm_mtx);
}
#else
if (ef->address)
diff --git a/sys/kern/link_elf_obj.c b/sys/kern/link_elf_obj.c
index 344f163..613d1e4 100644
--- a/sys/kern/link_elf_obj.c
+++ b/sys/kern/link_elf_obj.c
@@ -653,8 +653,10 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu
ef = (elf_file_t) lf;
#ifdef SPARSE_MAPPING
+ mtx_lock(&vm_mtx);
ef->object = vm_object_allocate(OBJT_DEFAULT, mapsize >> PAGE_SHIFT);
if (ef->object == NULL) {
+ mtx_unlock(&vm_mtx);
free(ef, M_LINKER);
error = ENOMEM;
goto out;
@@ -667,9 +669,11 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu
VM_PROT_ALL, VM_PROT_ALL, 0);
if (error) {
vm_object_deallocate(ef->object);
+ mtx_unlock(&vm_mtx);
ef->object = 0;
goto out;
}
+ mtx_unlock(&vm_mtx);
#else
ef->address = malloc(mapsize, M_LINKER, M_WAITOK);
if (!ef->address) {
@@ -697,10 +701,12 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu
/*
* Wire down the pages
*/
+ mtx_lock(&vm_mtx);
vm_map_pageable(kernel_map,
(vm_offset_t) segbase,
(vm_offset_t) segbase + segs[i]->p_memsz,
FALSE);
+ mtx_unlock(&vm_mtx);
#endif
}
@@ -824,10 +830,12 @@ link_elf_unload_file(linker_file_t file)
}
#ifdef SPARSE_MAPPING
if (ef->object) {
+ mtx_lock(&vm_mtx);
vm_map_remove(kernel_map, (vm_offset_t) ef->address,
(vm_offset_t) ef->address
+ (ef->object->size << PAGE_SHIFT));
vm_object_deallocate(ef->object);
+ mtx_unlock(&vm_mtx);
}
#else
if (ef->address)
diff --git a/sys/kern/subr_blist.c b/sys/kern/subr_blist.c
index 9ac4338..061d151 100644
--- a/sys/kern/subr_blist.c
+++ b/sys/kern/subr_blist.c
@@ -71,6 +71,7 @@
#include <sys/kernel.h>
#include <sys/blist.h>
#include <sys/malloc.h>
+#include <sys/mutex.h>
#include <vm/vm.h>
#include <vm/vm_object.h>
#include <vm/vm_kern.h>
diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c
index af7bfc1..8924fa2 100644
--- a/sys/kern/subr_trap.c
+++ b/sys/kern/subr_trap.c
@@ -330,9 +330,7 @@ restart:
*/
eva = rcr2();
enable_intr();
- mtx_lock(&Giant);
i = trap_pfault(&frame, TRUE, eva);
- mtx_unlock(&Giant);
#if defined(I586_CPU) && !defined(NO_F00F_HACK)
if (i == -2) {
/*
@@ -443,9 +441,7 @@ restart:
*/
eva = rcr2();
enable_intr();
- mtx_lock(&Giant);
(void) trap_pfault(&frame, FALSE, eva);
- mtx_unlock(&Giant);
goto out;
case T_DNA:
@@ -887,7 +883,9 @@ nogo:
frame->tf_eip = (int)PCPU_GET(curpcb)->pcb_onfault;
return (0);
}
+ mtx_lock(&Giant);
trap_fatal(frame, eva);
+ mtx_unlock(&Giant);
return (-1);
}
@@ -1147,14 +1145,17 @@ syscall(frame)
/*
* Try to run the syscall without the MP lock if the syscall
- * is MP safe. We have to obtain the MP lock no matter what if
- * we are ktracing
+ * is MP safe.
*/
if ((callp->sy_narg & SYF_MPSAFE) == 0) {
mtx_lock(&Giant);
}
#ifdef KTRACE
+ /*
+ * We have to obtain the MP lock no matter what if
+ * we are ktracing
+ */
if (KTRPOINT(p, KTR_SYSCALL)) {
if (!mtx_owned(&Giant))
mtx_lock(&Giant);
diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c
index 0c32402..a788448 100644
--- a/sys/kern/sys_pipe.c
+++ b/sys/kern/sys_pipe.c
@@ -56,6 +56,7 @@
#include <sys/filedesc.h>
#include <sys/filio.h>
#include <sys/lock.h>
+#include <sys/mutex.h>
#include <sys/ttycom.h>
#include <sys/stat.h>
#include <sys/poll.h>
@@ -253,6 +254,7 @@ pipespace(cpipe, size)
* kernel_object.
* XXX -- minor change needed here for NetBSD/OpenBSD VM systems.
*/
+ mtx_lock(&vm_mtx);
object = vm_object_allocate(OBJT_DEFAULT, npages);
buffer = (caddr_t) vm_map_min(kernel_map);
@@ -264,6 +266,7 @@ pipespace(cpipe, size)
error = vm_map_find(kernel_map, object, 0,
(vm_offset_t *) &buffer, size, 1,
VM_PROT_ALL, VM_PROT_ALL, 0);
+ mtx_unlock(&vm_mtx);
if (error != KERN_SUCCESS) {
vm_object_deallocate(object);
@@ -551,6 +554,7 @@ pipe_build_write_buffer(wpipe, uio)
size = wpipe->pipe_buffer.size;
endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size);
+ mtx_lock(&vm_mtx);
addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base);
for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) {
vm_page_t m;
@@ -561,6 +565,7 @@ pipe_build_write_buffer(wpipe, uio)
for (j = 0; j < i; j++)
vm_page_unwire(wpipe->pipe_map.ms[j], 1);
+ mtx_unlock(&vm_mtx);
return (EFAULT);
}
@@ -592,6 +597,7 @@ pipe_build_write_buffer(wpipe, uio)
pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms,
wpipe->pipe_map.npages);
+ mtx_unlock(&vm_mtx);
/*
* and update the uio data
*/
@@ -625,8 +631,10 @@ pipe_destroy_write_buffer(wpipe)
amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE;
}
}
+ mtx_lock(&vm_mtx);
for (i = 0; i < wpipe->pipe_map.npages; i++)
vm_page_unwire(wpipe->pipe_map.ms[i], 1);
+ mtx_unlock(&vm_mtx);
}
/*
@@ -1199,12 +1207,13 @@ pipeclose(cpipe)
wakeup(ppipe);
ppipe->pipe_peer = NULL;
}
-
/*
* free resources
*/
+ mtx_lock(&vm_mtx);
pipe_free_kmem(cpipe);
zfree(pipe_zone, cpipe);
+ mtx_unlock(&vm_mtx);
}
}
diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master
index 32255bc..269814c 100644
--- a/sys/kern/syscalls.master
+++ b/sys/kern/syscalls.master
@@ -61,7 +61,7 @@
14 STD POSIX { int mknod(char *path, int mode, int dev); }
15 STD POSIX { int chmod(char *path, int mode); }
16 STD POSIX { int chown(char *path, int uid, int gid); }
-17 STD BSD { int obreak(char *nsize); } break obreak_args int
+17 MPSAFE STD BSD { int obreak(char *nsize); } break obreak_args int
18 STD BSD { int getfsstat(struct statfs *buf, long bufsize, \
int flags); }
19 COMPAT POSIX { long lseek(int fd, long offset, int whence); }
@@ -121,23 +121,23 @@
62 COMPAT POSIX { int fstat(int fd, struct ostat *sb); }
63 COMPAT BSD { int getkerninfo(int op, char *where, size_t *size, \
int arg); } getkerninfo getkerninfo_args int
-64 COMPAT BSD { int getpagesize(void); } \
+64 MPSAFE COMPAT BSD { int getpagesize(void); } \
getpagesize getpagesize_args int
65 STD BSD { int msync(void *addr, size_t len, int flags); }
66 STD BSD { int vfork(void); }
67 OBSOL NOHIDE vread
68 OBSOL NOHIDE vwrite
-69 STD BSD { int sbrk(int incr); }
-70 STD BSD { int sstk(int incr); }
-71 COMPAT BSD { int mmap(void *addr, int len, int prot, \
+69 MPSAFE STD BSD { int sbrk(int incr); }
+70 MPSAFE STD BSD { int sstk(int incr); }
+71 MPSAFE COMPAT BSD { int mmap(void *addr, int len, int prot, \
int flags, int fd, long pos); }
-72 STD BSD { int ovadvise(int anom); } vadvise ovadvise_args int
-73 STD BSD { int munmap(void *addr, size_t len); }
-74 STD BSD { int mprotect(const void *addr, size_t len, int prot); }
-75 STD BSD { int madvise(void *addr, size_t len, int behav); }
+72 MPSAFE STD BSD { int ovadvise(int anom); } vadvise ovadvise_args int
+73 MPSAFE STD BSD { int munmap(void *addr, size_t len); }
+74 MPSAFE STD BSD { int mprotect(const void *addr, size_t len, int prot); }
+75 MPSAFE STD BSD { int madvise(void *addr, size_t len, int behav); }
76 OBSOL NOHIDE vhangup
77 OBSOL NOHIDE vlimit
-78 STD BSD { int mincore(const void *addr, size_t len, \
+78 MPSAFE STD BSD { int mincore(const void *addr, size_t len, \
char *vec); }
79 STD POSIX { int getgroups(u_int gidsetsize, gid_t *gidset); }
80 STD POSIX { int setgroups(u_int gidsetsize, gid_t *gidset); }
@@ -306,7 +306,7 @@
setrlimit __setrlimit_args int
196 STD BSD { int getdirentries(int fd, char *buf, u_int count, \
long *basep); }
-197 STD BSD { caddr_t mmap(caddr_t addr, size_t len, int prot, \
+197 MPSAFE STD BSD { caddr_t mmap(caddr_t addr, size_t len, int prot, \
int flags, int fd, int pad, off_t pos); }
198 STD NOHIDE { int nosys(void); } __syscall __syscall_args int
199 STD POSIX { off_t lseek(int fd, int pad, off_t offset, \
@@ -318,8 +318,8 @@
__sysctl sysctl_args int
; properly, __sysctl should be a NOHIDE, but making an exception
; here allows to avoid one in libc/sys/Makefile.inc.
-203 STD BSD { int mlock(const void *addr, size_t len); }
-204 STD BSD { int munlock(const void *addr, size_t len); }
+203 MPSAFE STD BSD { int mlock(const void *addr, size_t len); }
+204 MPSAFE STD BSD { int munlock(const void *addr, size_t len); }
205 STD BSD { int undelete(char *path); }
206 STD BSD { int futimes(int fd, struct timeval *tptr); }
207 STD BSD { int getpgid(pid_t pid); }
@@ -386,7 +386,7 @@
248 UNIMPL NOHIDE nosys
249 UNIMPL NOHIDE nosys
; syscall numbers initially used in OpenBSD
-250 STD BSD { int minherit(void *addr, size_t len, int inherit); }
+250 MPSAFE STD BSD { int minherit(void *addr, size_t len, int inherit); }
251 STD BSD { int rfork(int flags); }
252 STD BSD { int openbsd_poll(struct pollfd *fds, u_int nfds, \
int timeout); }
@@ -414,7 +414,7 @@
274 STD BSD { int lchmod(char *path, mode_t mode); }
275 NOPROTO BSD { int lchown(char *path, uid_t uid, gid_t gid); } netbsd_lchown lchown_args int
276 STD BSD { int lutimes(char *path, struct timeval *tptr); }
-277 NOPROTO BSD { int msync(void *addr, size_t len, int flags); } netbsd_msync msync_args int
+277 MPSAFE NOPROTO BSD { int msync(void *addr, size_t len, int flags); } netbsd_msync msync_args int
278 STD BSD { int nstat(char *path, struct nstat *ub); }
279 STD BSD { int nfstat(int fd, struct nstat *sb); }
280 STD BSD { int nlstat(char *path, struct nstat *ub); }
@@ -463,8 +463,8 @@
321 STD BSD { int yield(void); }
322 OBSOL NOHIDE thr_sleep
323 OBSOL NOHIDE thr_wakeup
-324 STD BSD { int mlockall(int how); }
-325 STD BSD { int munlockall(void); }
+324 MPSAFE STD BSD { int mlockall(int how); }
+325 MPSAFE STD BSD { int munlockall(void); }
326 STD BSD { int __getcwd(u_char *buf, u_int buflen); }
327 STD POSIX { int sched_setparam (pid_t pid, const struct sched_param *param); }
diff --git a/sys/kern/sysv_shm.c b/sys/kern/sysv_shm.c
index fab53a8..0a9abda 100644
--- a/sys/kern/sysv_shm.c
+++ b/sys/kern/sysv_shm.c
@@ -43,6 +43,7 @@
#include <sys/shm.h>
#include <sys/proc.h>
#include <sys/malloc.h>
+#include <sys/mutex.h>
#include <sys/mman.h>
#include <sys/stat.h>
#include <sys/syscall.h>
@@ -314,14 +315,17 @@ shmat(p, uap)
}
shm_handle = shmseg->shm_internal;
+ mtx_lock(&vm_mtx);
vm_object_reference(shm_handle->shm_object);
rv = vm_map_find(&p->p_vmspace->vm_map, shm_handle->shm_object,
0, &attach_va, size, (flags & MAP_FIXED)?0:1, prot, prot, 0);
if (rv != KERN_SUCCESS) {
+ mtx_unlock(&vm_mtx);
return ENOMEM;
}
vm_map_inherit(&p->p_vmspace->vm_map,
attach_va, attach_va + size, VM_INHERIT_SHARE);
+ mtx_unlock(&vm_mtx);
shmmap_s->va = attach_va;
shmmap_s->shmid = uap->shmid;
@@ -549,6 +553,7 @@ shmget_allocate_segment(p, uap, mode)
* We make sure that we have allocated a pager before we need
* to.
*/
+ mtx_lock(&vm_mtx);
if (shm_use_phys) {
shm_handle->shm_object =
vm_pager_allocate(OBJT_PHYS, 0, size, VM_PROT_DEFAULT, 0);
@@ -558,6 +563,7 @@ shmget_allocate_segment(p, uap, mode)
}
vm_object_clear_flag(shm_handle->shm_object, OBJ_ONEMAPPING);
vm_object_set_flag(shm_handle->shm_object, OBJ_NOSPLIT);
+ mtx_unlock(&vm_mtx);
shmseg->shm_internal = shm_handle;
shmseg->shm_perm.cuid = shmseg->shm_perm.uid = cred->cr_uid;
diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c
index c1b53d8..a980330 100644
--- a/sys/kern/vfs_bio.c
+++ b/sys/kern/vfs_bio.c
@@ -281,6 +281,8 @@ waitrunningbufspace(void)
* Called when a buffer is extended. This function clears the B_CACHE
* bit if the newly extended portion of the buffer does not contain
* valid data.
+ *
+ * must be called with vm_mtx held
*/
static __inline__
void
@@ -426,11 +428,13 @@ bufinit(void)
* from buf_daemon.
*/
+ mtx_lock(&vm_mtx);
bogus_offset = kmem_alloc_pageable(kernel_map, PAGE_SIZE);
bogus_page = vm_page_alloc(kernel_object,
((bogus_offset - VM_MIN_KERNEL_ADDRESS) >> PAGE_SHIFT),
VM_ALLOC_NORMAL);
cnt.v_wire_count++;
+ mtx_unlock(&vm_mtx);
}
@@ -441,17 +445,27 @@ bufinit(void)
* buffer_map.
*
* Since this call frees up buffer space, we call bufspacewakeup().
+ *
+ * Can be called with or without the vm_mtx.
*/
static void
bfreekva(struct buf * bp)
{
+
if (bp->b_kvasize) {
+ int hadvmlock;
+
++buffreekvacnt;
bufspace -= bp->b_kvasize;
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
vm_map_delete(buffer_map,
(vm_offset_t) bp->b_kvabase,
(vm_offset_t) bp->b_kvabase + bp->b_kvasize
);
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
bp->b_kvasize = 0;
bufspacewakeup();
}
@@ -807,6 +821,7 @@ bdwrite(struct buf * bp)
VOP_BMAP(bp->b_vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL, NULL);
}
+ mtx_lock(&vm_mtx);
/*
* Set the *dirty* buffer range based upon the VM system dirty pages.
*/
@@ -820,6 +835,7 @@ bdwrite(struct buf * bp)
* out on the next sync, or perhaps the cluster will be completed.
*/
vfs_clean_pages(bp);
+ mtx_unlock(&vm_mtx);
bqrelse(bp);
/*
@@ -973,12 +989,15 @@ buf_dirty_count_severe(void)
* Release a busy buffer and, if requested, free its resources. The
* buffer will be stashed in the appropriate bufqueue[] allowing it
* to be accessed later as a cache entity or reused for other purposes.
+ *
+ * vm_mtx must be not be held.
*/
void
brelse(struct buf * bp)
{
int s;
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
KASSERT(!(bp->b_flags & (B_CLUSTER|B_PAGING)), ("brelse: inappropriate B_PAGING or B_CLUSTER bp %p", bp));
s = splbio();
@@ -1088,6 +1107,7 @@ brelse(struct buf * bp)
resid = bp->b_bufsize;
foff = bp->b_offset;
+ mtx_lock(&vm_mtx);
for (i = 0; i < bp->b_npages; i++) {
int had_bogus = 0;
@@ -1099,10 +1119,12 @@ brelse(struct buf * bp)
* now.
*/
if (m == bogus_page) {
+ mtx_unlock(&vm_mtx);
VOP_GETVOBJECT(vp, &obj);
poff = OFF_TO_IDX(bp->b_offset);
had_bogus = 1;
+ mtx_lock(&vm_mtx);
for (j = i; j < bp->b_npages; j++) {
vm_page_t mtmp;
mtmp = bp->b_pages[j];
@@ -1136,11 +1158,15 @@ brelse(struct buf * bp)
if (bp->b_flags & (B_INVAL | B_RELBUF))
vfs_vmio_release(bp);
+ mtx_unlock(&vm_mtx);
} else if (bp->b_flags & B_VMIO) {
- if (bp->b_flags & (B_INVAL | B_RELBUF))
+ if (bp->b_flags & (B_INVAL | B_RELBUF)) {
+ mtx_lock(&vm_mtx);
vfs_vmio_release(bp);
+ mtx_unlock(&vm_mtx);
+ }
}
@@ -1302,6 +1328,9 @@ bqrelse(struct buf * bp)
splx(s);
}
+/*
+ * Must be called with vm_mtx held.
+ */
static void
vfs_vmio_release(bp)
struct buf *bp;
@@ -1310,6 +1339,7 @@ vfs_vmio_release(bp)
vm_page_t m;
s = splvm();
+ mtx_assert(&vm_mtx, MA_OWNED);
for (i = 0; i < bp->b_npages; i++) {
m = bp->b_pages[i];
bp->b_pages[i] = NULL;
@@ -1343,6 +1373,9 @@ vfs_vmio_release(bp)
}
splx(s);
pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages);
+
+ /* could drop vm_mtx here */
+
if (bp->b_bufsize) {
bufspacewakeup();
bp->b_bufsize = 0;
@@ -1614,7 +1647,9 @@ restart:
if (qindex == QUEUE_CLEAN) {
if (bp->b_flags & B_VMIO) {
bp->b_flags &= ~B_ASYNC;
+ mtx_lock(&vm_mtx);
vfs_vmio_release(bp);
+ mtx_unlock(&vm_mtx);
}
if (bp->b_vp)
brelvp(bp);
@@ -1735,6 +1770,8 @@ restart:
if (maxsize != bp->b_kvasize) {
vm_offset_t addr = 0;
+ /* we'll hold the lock over some vm ops */
+ mtx_lock(&vm_mtx);
bfreekva(bp);
if (vm_map_findspace(buffer_map,
@@ -1743,6 +1780,7 @@ restart:
* Uh oh. Buffer map is to fragmented. We
* must defragment the map.
*/
+ mtx_unlock(&vm_mtx);
++bufdefragcnt;
defrag = 1;
bp->b_flags |= B_INVAL;
@@ -1759,6 +1797,7 @@ restart:
bufspace += bp->b_kvasize;
++bufreusecnt;
}
+ mtx_unlock(&vm_mtx);
}
bp->b_data = bp->b_kvabase;
}
@@ -1936,18 +1975,24 @@ inmem(struct vnode * vp, daddr_t blkno)
size = vp->v_mount->mnt_stat.f_iosize;
off = (vm_ooffset_t)blkno * (vm_ooffset_t)vp->v_mount->mnt_stat.f_iosize;
+ mtx_lock(&vm_mtx);
for (toff = 0; toff < vp->v_mount->mnt_stat.f_iosize; toff += tinc) {
m = vm_page_lookup(obj, OFF_TO_IDX(off + toff));
if (!m)
- return 0;
+ goto notinmem;
tinc = size;
if (tinc > PAGE_SIZE - ((toff + off) & PAGE_MASK))
tinc = PAGE_SIZE - ((toff + off) & PAGE_MASK);
if (vm_page_is_valid(m,
(vm_offset_t) ((toff + off) & PAGE_MASK), tinc) == 0)
- return 0;
+ goto notinmem;
}
+ mtx_unlock(&vm_mtx);
return 1;
+
+notinmem:
+ mtx_unlock(&vm_mtx);
+ return (0);
}
/*
@@ -1960,11 +2005,14 @@ inmem(struct vnode * vp, daddr_t blkno)
*
* This routine is primarily used by NFS, but is generalized for the
* B_VMIO case.
+ *
+ * Can be called with or without vm_mtx
*/
static void
vfs_setdirty(struct buf *bp)
{
int i;
+ int hadvmlock;
vm_object_t object;
/*
@@ -1983,6 +2031,10 @@ vfs_setdirty(struct buf *bp)
if ((bp->b_flags & B_VMIO) == 0)
return;
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
+
object = bp->b_pages[0]->object;
if ((object->flags & OBJ_WRITEABLE) && !(object->flags & OBJ_MIGHTBEDIRTY))
@@ -2040,6 +2092,8 @@ vfs_setdirty(struct buf *bp)
bp->b_dirtyend = eoffset;
}
}
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
}
/*
@@ -2441,6 +2495,7 @@ allocbuf(struct buf *bp, int size)
* DEV_BSIZE aligned existing buffer size. Figure out
* if we have to remove any pages.
*/
+ mtx_lock(&vm_mtx);
if (desiredpages < bp->b_npages) {
for (i = desiredpages; i < bp->b_npages; i++) {
/*
@@ -2461,6 +2516,7 @@ allocbuf(struct buf *bp, int size)
(desiredpages << PAGE_SHIFT), (bp->b_npages - desiredpages));
bp->b_npages = desiredpages;
}
+ mtx_unlock(&vm_mtx);
} else if (size > bp->b_bcount) {
/*
* We are growing the buffer, possibly in a
@@ -2481,6 +2537,7 @@ allocbuf(struct buf *bp, int size)
vp = bp->b_vp;
VOP_GETVOBJECT(vp, &obj);
+ mtx_lock(&vm_mtx);
while (bp->b_npages < desiredpages) {
vm_page_t m;
vm_pindex_t pi;
@@ -2589,6 +2646,9 @@ allocbuf(struct buf *bp, int size)
bp->b_pages,
bp->b_npages
);
+
+ mtx_unlock(&vm_mtx);
+
bp->b_data = (caddr_t)((vm_offset_t)bp->b_data |
(vm_offset_t)(bp->b_offset & PAGE_MASK));
}
@@ -2726,6 +2786,7 @@ bufdone(struct buf *bp)
if (error) {
panic("biodone: no object");
}
+ mtx_lock(&vm_mtx);
#if defined(VFS_BIO_DEBUG)
if (obj->paging_in_progress < bp->b_npages) {
printf("biodone: paging in progress(%d) < bp->b_npages(%d)\n",
@@ -2814,6 +2875,7 @@ bufdone(struct buf *bp)
}
if (obj)
vm_object_pip_wakeupn(obj, 0);
+ mtx_unlock(&vm_mtx);
}
/*
@@ -2837,12 +2899,15 @@ bufdone(struct buf *bp)
* This routine is called in lieu of iodone in the case of
* incomplete I/O. This keeps the busy status for pages
* consistant.
+ *
+ * vm_mtx should not be held
*/
void
vfs_unbusy_pages(struct buf * bp)
{
int i;
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
runningbufwakeup(bp);
if (bp->b_flags & B_VMIO) {
struct vnode *vp = bp->b_vp;
@@ -2850,6 +2915,7 @@ vfs_unbusy_pages(struct buf * bp)
VOP_GETVOBJECT(vp, &obj);
+ mtx_lock(&vm_mtx);
for (i = 0; i < bp->b_npages; i++) {
vm_page_t m = bp->b_pages[i];
@@ -2866,6 +2932,7 @@ vfs_unbusy_pages(struct buf * bp)
vm_page_io_finish(m);
}
vm_object_pip_wakeupn(obj, 0);
+ mtx_unlock(&vm_mtx);
}
}
@@ -2876,12 +2943,15 @@ vfs_unbusy_pages(struct buf * bp)
* range is restricted to the buffer's size.
*
* This routine is typically called after a read completes.
+ *
+ * vm_mtx should be held
*/
static void
vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, int pageno, vm_page_t m)
{
vm_ooffset_t soff, eoff;
+ mtx_assert(&vm_mtx, MA_OWNED);
/*
* Start and end offsets in buffer. eoff - soff may not cross a
* page boundry or cross the end of the buffer. The end of the
@@ -2917,12 +2987,15 @@ vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, int pageno, vm_page_t m)
* Since I/O has not been initiated yet, certain buffer flags
* such as BIO_ERROR or B_INVAL may be in an inconsistant state
* and should be ignored.
+ *
+ * vm_mtx should not be held
*/
void
vfs_busy_pages(struct buf * bp, int clear_modify)
{
int i, bogus;
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
if (bp->b_flags & B_VMIO) {
struct vnode *vp = bp->b_vp;
vm_object_t obj;
@@ -2932,6 +3005,7 @@ vfs_busy_pages(struct buf * bp, int clear_modify)
foff = bp->b_offset;
KASSERT(bp->b_offset != NOOFFSET,
("vfs_busy_pages: no buffer offset"));
+ mtx_lock(&vm_mtx);
vfs_setdirty(bp);
retry:
@@ -2979,6 +3053,7 @@ retry:
}
if (bogus)
pmap_qenter(trunc_page((vm_offset_t)bp->b_data), bp->b_pages, bp->b_npages);
+ mtx_unlock(&vm_mtx);
}
}
@@ -2989,12 +3064,15 @@ retry:
*
* Note that while we only really need to clean through to b_bcount, we
* just go ahead and clean through to b_bufsize.
+ *
+ * should be called with vm_mtx held
*/
static void
vfs_clean_pages(struct buf * bp)
{
int i;
+ mtx_assert(&vm_mtx, MA_OWNED);
if (bp->b_flags & B_VMIO) {
vm_ooffset_t foff;
@@ -3021,6 +3099,7 @@ vfs_clean_pages(struct buf * bp)
* Set the range within the buffer to valid and clean. The range is
* relative to the beginning of the buffer, b_offset. Note that b_offset
* itself may be offset from the beginning of the first page.
+ *
*/
void
@@ -3061,13 +3140,18 @@ vfs_bio_set_validclean(struct buf *bp, int base, int size)
*
* Note that while we only theoretically need to clear through b_bcount,
* we go ahead and clear through b_bufsize.
+ *
+ * We'll get vm_mtx here for safety if processing a VMIO buffer.
+ * I don't think vm_mtx is needed, but we're twiddling vm_page flags.
*/
void
vfs_bio_clrbuf(struct buf *bp) {
int i, mask = 0;
caddr_t sa, ea;
+
if ((bp->b_flags & (B_VMIO | B_MALLOC)) == B_VMIO) {
+ mtx_lock(&vm_mtx);
bp->b_flags &= ~B_INVAL;
bp->b_ioflags &= ~BIO_ERROR;
if( (bp->b_npages == 1) && (bp->b_bufsize < PAGE_SIZE) &&
@@ -3079,6 +3163,7 @@ vfs_bio_clrbuf(struct buf *bp) {
}
bp->b_pages[0]->valid |= mask;
bp->b_resid = 0;
+ mtx_unlock(&vm_mtx);
return;
}
ea = sa = bp->b_data;
@@ -3106,6 +3191,7 @@ vfs_bio_clrbuf(struct buf *bp) {
vm_page_flag_clear(bp->b_pages[i], PG_ZERO);
}
bp->b_resid = 0;
+ mtx_unlock(&vm_mtx);
} else {
clrbuf(bp);
}
@@ -3115,18 +3201,22 @@ vfs_bio_clrbuf(struct buf *bp) {
* vm_hold_load_pages and vm_hold_unload pages get pages into
* a buffers address space. The pages are anonymous and are
* not associated with a file object.
+ *
+ * vm_mtx should not be held
*/
-void
+static void
vm_hold_load_pages(struct buf * bp, vm_offset_t from, vm_offset_t to)
{
vm_offset_t pg;
vm_page_t p;
int index;
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
to = round_page(to);
from = round_page(from);
index = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT;
+ mtx_lock(&vm_mtx);
for (pg = from; pg < to; pg += PAGE_SIZE, index++) {
tryagain:
@@ -3152,6 +3242,7 @@ tryagain:
vm_page_wakeup(p);
}
bp->b_npages = index;
+ mtx_unlock(&vm_mtx);
}
void
@@ -3160,11 +3251,15 @@ vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to)
vm_offset_t pg;
vm_page_t p;
int index, newnpages;
+ int hadvmlock;
from = round_page(from);
to = round_page(to);
newnpages = index = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT;
+ hadvmlock = mtx_owned(&vm_mtx);
+ if (!hadvmlock)
+ mtx_lock(&vm_mtx);
for (pg = from; pg < to; pg += PAGE_SIZE, index++) {
p = bp->b_pages[index];
if (p && (index < bp->b_npages)) {
@@ -3180,6 +3275,8 @@ vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to)
}
}
bp->b_npages = newnpages;
+ if (!hadvmlock)
+ mtx_unlock(&vm_mtx);
}
diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c
index 8a6e045..0eb47bd 100644
--- a/sys/kern/vfs_cluster.c
+++ b/sys/kern/vfs_cluster.c
@@ -433,6 +433,7 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp)
BUF_KERNPROC(tbp);
TAILQ_INSERT_TAIL(&bp->b_cluster.cluster_head,
tbp, b_cluster.cluster_entry);
+ mtx_lock(&vm_mtx);
for (j = 0; j < tbp->b_npages; j += 1) {
vm_page_t m;
m = tbp->b_pages[j];
@@ -446,10 +447,12 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp)
if ((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL)
tbp->b_pages[j] = bogus_page;
}
+ mtx_unlock(&vm_mtx);
bp->b_bcount += tbp->b_bcount;
bp->b_bufsize += tbp->b_bufsize;
}
+ mtx_lock(&vm_mtx);
for(j=0;j<bp->b_npages;j++) {
if ((bp->b_pages[j]->valid & VM_PAGE_BITS_ALL) ==
VM_PAGE_BITS_ALL)
@@ -462,6 +465,7 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp)
pmap_qenter(trunc_page((vm_offset_t) bp->b_data),
(vm_page_t *)bp->b_pages, bp->b_npages);
+ mtx_unlock(&vm_mtx);
return (bp);
}
@@ -484,7 +488,9 @@ cluster_callback(bp)
if (bp->b_ioflags & BIO_ERROR)
error = bp->b_error;
+ mtx_lock(&vm_mtx);
pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages);
+ mtx_unlock(&vm_mtx);
/*
* Move memory from the large cluster buffer into the component
* buffers and mark IO as done on these.
@@ -851,6 +857,7 @@ cluster_wbuild(vp, size, start_lbn, len)
}
}
+ mtx_lock(&vm_mtx);
for (j = 0; j < tbp->b_npages; j += 1) {
m = tbp->b_pages[j];
vm_page_io_start(m);
@@ -861,6 +868,7 @@ cluster_wbuild(vp, size, start_lbn, len)
bp->b_npages++;
}
}
+ mtx_unlock(&vm_mtx);
}
bp->b_bcount += size;
bp->b_bufsize += size;
@@ -879,8 +887,10 @@ cluster_wbuild(vp, size, start_lbn, len)
tbp, b_cluster.cluster_entry);
}
finishcluster:
+ mtx_lock(&vm_mtx);
pmap_qenter(trunc_page((vm_offset_t) bp->b_data),
(vm_page_t *) bp->b_pages, bp->b_npages);
+ mtx_unlock(&vm_mtx);
if (bp->b_bufsize > bp->b_kvasize)
panic(
"cluster_wbuild: b_bufsize(%ld) > b_kvasize(%d)\n",
diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c
index 328a9b1..d17e934 100644
--- a/sys/kern/vfs_default.c
+++ b/sys/kern/vfs_default.c
@@ -535,14 +535,18 @@ retry:
if (vp->v_type == VREG || vp->v_type == VDIR) {
if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0)
goto retn;
+ mtx_lock(&vm_mtx);
object = vnode_pager_alloc(vp, vat.va_size, 0, 0);
+ mtx_unlock(&vm_mtx);
} else if (devsw(vp->v_rdev) != NULL) {
/*
* This simply allocates the biggest object possible
* for a disk vnode. This should be fixed, but doesn't
* cause any problems (yet).
*/
+ mtx_lock(&vm_mtx);
object = vnode_pager_alloc(vp, IDX_TO_OFF(INT_MAX), 0, 0);
+ mtx_unlock(&vm_mtx);
} else {
goto retn;
}
@@ -550,15 +554,23 @@ retry:
* Dereference the reference we just created. This assumes
* that the object is associated with the vp.
*/
+ mtx_lock(&vm_mtx);
object->ref_count--;
+ mtx_unlock(&vm_mtx);
vp->v_usecount--;
} else {
+ /*
+ * XXX: safe to hold vm mutex through VOP_UNLOCK?
+ */
+ mtx_lock(&vm_mtx);
if (object->flags & OBJ_DEAD) {
VOP_UNLOCK(vp, 0, p);
- tsleep(object, PVM, "vodead", 0);
+ msleep(object, VM_OBJECT_MTX(object), PVM, "vodead", 0);
+ mtx_unlock(&vm_mtx);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
goto retry;
}
+ mtx_unlock(&vm_mtx);
}
KASSERT(vp->v_object != NULL, ("vfs_object_create: NULL object"));
@@ -580,6 +592,7 @@ vop_stddestroyvobject(ap)
if (vp->v_object == NULL)
return (0);
+ mtx_lock(&vm_mtx);
if (obj->ref_count == 0) {
/*
* vclean() may be called twice. The first time
@@ -594,6 +607,7 @@ vop_stddestroyvobject(ap)
*/
vm_pager_deallocate(obj);
}
+ mtx_unlock(&vm_mtx);
return (0);
}
diff --git a/sys/kern/vfs_extattr.c b/sys/kern/vfs_extattr.c
index 6b73258..3f97551 100644
--- a/sys/kern/vfs_extattr.c
+++ b/sys/kern/vfs_extattr.c
@@ -2770,8 +2770,13 @@ fsync(p, uap)
if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
return (error);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
- if (VOP_GETVOBJECT(vp, &obj) == 0)
+ if (VOP_GETVOBJECT(vp, &obj) == 0) {
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
vm_object_page_clean(obj, 0, 0, 0);
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
+ }
error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);
#ifdef SOFTUPDATES
if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c
index 2f4dc8d..6c050ba 100644
--- a/sys/kern/vfs_subr.c
+++ b/sys/kern/vfs_subr.c
@@ -711,6 +711,8 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
int s, error;
vm_object_t object;
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
+
if (flags & V_SAVE) {
s = splbio();
while (vp->v_numoutput) {
@@ -797,8 +799,10 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo)
*/
mtx_lock(&vp->v_interlock);
if (VOP_GETVOBJECT(vp, &object) == 0) {
+ mtx_lock(&vm_mtx);
vm_object_page_remove(object, 0, 0,
(flags & V_SAVE) ? TRUE : FALSE);
+ mtx_unlock(&vm_mtx);
}
mtx_unlock(&vp->v_interlock);
@@ -1132,6 +1136,8 @@ speedup_syncer()
* Also sets B_PAGING flag to indicate that vnode is not fully associated
* with the buffer. i.e. the bp has not been linked into the vnode or
* ref-counted.
+ *
+ * Doesn't block, only vnode seems to need a lock.
*/
void
pbgetvp(vp, bp)
@@ -1554,6 +1560,7 @@ vput(vp)
{
struct proc *p = curproc; /* XXX */
+ mtx_assert(&Giant, MA_OWNED);
KASSERT(vp != NULL, ("vput: null vp"));
mtx_lock(&vp->v_interlock);
/* Skip this v_writecount check if we're going to panic below. */
@@ -2382,7 +2389,11 @@ loop:
if (!vget(vp,
LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) {
if (VOP_GETVOBJECT(vp, &obj) == 0) {
- vm_object_page_clean(obj, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : OBJPC_NOSYNC);
+ mtx_lock(&vm_mtx);
+ vm_object_page_clean(obj, 0, 0,
+ flags == MNT_WAIT ?
+ OBJPC_SYNC : OBJPC_NOSYNC);
+ mtx_unlock(&vm_mtx);
anyio = 1;
}
vput(vp);
@@ -2409,6 +2420,8 @@ vfs_object_create(vp, p, cred)
struct proc *p;
struct ucred *cred;
{
+
+ mtx_assert(&vm_mtx, MA_NOTOWNED);
return (VOP_CREATEVOBJECT(vp, cred, p));
}
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index 6b73258..3f97551 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -2770,8 +2770,13 @@ fsync(p, uap)
if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0)
return (error);
vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p);
- if (VOP_GETVOBJECT(vp, &obj) == 0)
+ if (VOP_GETVOBJECT(vp, &obj) == 0) {
+ mtx_unlock(&Giant);
+ mtx_lock(&vm_mtx);
vm_object_page_clean(obj, 0, 0, 0);
+ mtx_unlock(&vm_mtx);
+ mtx_lock(&Giant);
+ }
error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p);
#ifdef SOFTUPDATES
if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP))
OpenPOWER on IntegriCloud