diff options
Diffstat (limited to 'sys/kern')
-rw-r--r-- | sys/kern/imgact_aout.c | 8 | ||||
-rw-r--r-- | sys/kern/imgact_elf.c | 21 | ||||
-rw-r--r-- | sys/kern/init_main.c | 4 | ||||
-rw-r--r-- | sys/kern/kern_exec.c | 7 | ||||
-rw-r--r-- | sys/kern/kern_exit.c | 2 | ||||
-rw-r--r-- | sys/kern/kern_fork.c | 2 | ||||
-rw-r--r-- | sys/kern/kern_resource.c | 2 | ||||
-rw-r--r-- | sys/kern/kern_synch.c | 7 | ||||
-rw-r--r-- | sys/kern/link_elf.c | 8 | ||||
-rw-r--r-- | sys/kern/link_elf_obj.c | 8 | ||||
-rw-r--r-- | sys/kern/subr_blist.c | 1 | ||||
-rw-r--r-- | sys/kern/subr_trap.c | 13 | ||||
-rw-r--r-- | sys/kern/sys_pipe.c | 11 | ||||
-rw-r--r-- | sys/kern/syscalls.master | 34 | ||||
-rw-r--r-- | sys/kern/sysv_shm.c | 6 | ||||
-rw-r--r-- | sys/kern/vfs_bio.c | 105 | ||||
-rw-r--r-- | sys/kern/vfs_cluster.c | 10 | ||||
-rw-r--r-- | sys/kern/vfs_default.c | 16 | ||||
-rw-r--r-- | sys/kern/vfs_extattr.c | 7 | ||||
-rw-r--r-- | sys/kern/vfs_subr.c | 15 | ||||
-rw-r--r-- | sys/kern/vfs_syscalls.c | 7 |
21 files changed, 259 insertions, 35 deletions
diff --git a/sys/kern/imgact_aout.c b/sys/kern/imgact_aout.c index 9478eb3..8becda3 100644 --- a/sys/kern/imgact_aout.c +++ b/sys/kern/imgact_aout.c @@ -171,6 +171,7 @@ exec_aout_imgact(imgp) if (error) return (error); + mtx_lock(&vm_mtx); /* * Destroy old process VM and create a new one (with a new stack) */ @@ -184,7 +185,9 @@ exec_aout_imgact(imgp) vp = imgp->vp; map = &vmspace->vm_map; vm_map_lock(map); + mtx_unlock(&vm_mtx); VOP_GETVOBJECT(vp, &object); + mtx_lock(&vm_mtx); vm_object_reference(object); text_end = virtual_offset + a_out->a_text; @@ -195,6 +198,7 @@ exec_aout_imgact(imgp) MAP_COPY_ON_WRITE | MAP_PREFAULT); if (error) { vm_map_unlock(map); + mtx_unlock(&vm_mtx); return (error); } data_end = text_end + a_out->a_data; @@ -207,6 +211,7 @@ exec_aout_imgact(imgp) MAP_COPY_ON_WRITE | MAP_PREFAULT); if (error) { vm_map_unlock(map); + mtx_unlock(&vm_mtx); return (error); } } @@ -217,6 +222,7 @@ exec_aout_imgact(imgp) VM_PROT_ALL, VM_PROT_ALL, 0); if (error) { vm_map_unlock(map); + mtx_unlock(&vm_mtx); return (error); } } @@ -229,6 +235,8 @@ exec_aout_imgact(imgp) vmspace->vm_daddr = (caddr_t) (uintptr_t) (virtual_offset + a_out->a_text); + mtx_unlock(&vm_mtx); + /* Fill in image_params */ imgp->interpreted = 0; imgp->entry_addr = a_out->a_entry; diff --git a/sys/kern/imgact_elf.c b/sys/kern/imgact_elf.c index da7b9cb..2a15e9c 100644 --- a/sys/kern/imgact_elf.c +++ b/sys/kern/imgact_elf.c @@ -230,6 +230,7 @@ elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_o else map_len = round_page(offset+filsz) - file_addr; + mtx_lock(&vm_mtx); if (map_len != 0) { vm_object_reference(object); vm_map_lock(&vmspace->vm_map); @@ -244,12 +245,15 @@ elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_o vm_map_unlock(&vmspace->vm_map); if (rv != KERN_SUCCESS) { vm_object_deallocate(object); + mtx_unlock(&vm_mtx); return EINVAL; } /* we can stop now if we've covered it all */ - if (memsz == filsz) + if (memsz == filsz) { + mtx_unlock(&vm_mtx); return 0; + } } @@ -270,8 +274,10 @@ elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_o map_addr, map_addr + map_len, VM_PROT_ALL, VM_PROT_ALL, 0); vm_map_unlock(&vmspace->vm_map); - if (rv != KERN_SUCCESS) + if (rv != KERN_SUCCESS) { + mtx_unlock(&vm_mtx); return EINVAL; + } } if (copy_len != 0) { @@ -287,14 +293,19 @@ elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_o MAP_COPY_ON_WRITE | MAP_PREFAULT_PARTIAL); if (rv != KERN_SUCCESS) { vm_object_deallocate(object); + mtx_unlock(&vm_mtx); return EINVAL; } /* send the page fragment to user space */ + mtx_unlock(&vm_mtx); error = copyout((caddr_t)data_buf, (caddr_t)map_addr, copy_len); + mtx_lock(&vm_mtx); vm_map_remove(exec_map, data_buf, data_buf + PAGE_SIZE); - if (error) + if (error) { + mtx_unlock(&vm_mtx); return (error); + } } /* @@ -303,6 +314,7 @@ elf_load_section(struct proc *p, struct vmspace *vmspace, struct vnode *vp, vm_o vm_map_protect(&vmspace->vm_map, map_addr, map_addr + map_len, prot, FALSE); + mtx_unlock(&vm_mtx); return error; } @@ -498,9 +510,11 @@ exec_elf_imgact(struct image_params *imgp) if ((error = exec_extract_strings(imgp)) != 0) goto fail; + mtx_lock(&vm_mtx); exec_new_vmspace(imgp); vmspace = imgp->proc->p_vmspace; + mtx_unlock(&vm_mtx); for (i = 0; i < hdr->e_phnum; i++) { switch(phdr[i].p_type) { @@ -557,6 +571,7 @@ exec_elf_imgact(struct image_params *imgp) } } + /* XXX: lock the vm_mtx when twiddling vmspace? */ vmspace->vm_tsize = text_size >> PAGE_SHIFT; vmspace->vm_taddr = (caddr_t)(uintptr_t)text_addr; vmspace->vm_dsize = data_size >> PAGE_SHIFT; diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index f1a6a0b..6f5c653 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -485,11 +485,15 @@ start_init(void *dummy) * Need just enough stack to hold the faked-up "execve()" arguments. */ addr = trunc_page(USRSTACK - PAGE_SIZE); + mtx_unlock(&Giant); + mtx_lock(&vm_mtx); if (vm_map_find(&p->p_vmspace->vm_map, NULL, 0, &addr, PAGE_SIZE, FALSE, VM_PROT_ALL, VM_PROT_ALL, 0) != 0) panic("init: couldn't allocate argument space"); p->p_vmspace->vm_maxsaddr = (caddr_t)addr; p->p_vmspace->vm_ssize = 1; + mtx_unlock(&vm_mtx); + mtx_lock(&Giant); if ((var = getenv("init_path")) != NULL) { strncpy(init_path, var, sizeof init_path); diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index 0b1b29e..8f49538 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -412,6 +412,7 @@ exec_map_first_page(imgp) VOP_GETVOBJECT(imgp->vp, &object); s = splvm(); + mtx_lock(&vm_mtx); ma[0] = vm_page_grab(object, 0, VM_ALLOC_NORMAL | VM_ALLOC_RETRY); @@ -443,6 +444,7 @@ exec_map_first_page(imgp) vm_page_free(ma[0]); } splx(s); + mtx_unlock(&vm_mtx); return EIO; } } @@ -454,6 +456,7 @@ exec_map_first_page(imgp) pmap_kenter((vm_offset_t) imgp->image_header, VM_PAGE_TO_PHYS(ma[0])); imgp->firstpage = ma[0]; + mtx_unlock(&vm_mtx); return 0; } @@ -461,9 +464,12 @@ void exec_unmap_first_page(imgp) struct image_params *imgp; { + if (imgp->firstpage) { + mtx_lock(&vm_mtx); pmap_kremove((vm_offset_t) imgp->image_header); vm_page_unwire(imgp->firstpage, 1); + mtx_unlock(&vm_mtx); imgp->firstpage = NULL; } } @@ -482,6 +488,7 @@ exec_new_vmspace(imgp) caddr_t stack_addr = (caddr_t) (USRSTACK - MAXSSIZ); vm_map_t map = &vmspace->vm_map; + mtx_assert(&vm_mtx, MA_OWNED); imgp->vmspace_destroyed = 1; /* diff --git a/sys/kern/kern_exit.c b/sys/kern/kern_exit.c index d5dccab..1af27d2 100644 --- a/sys/kern/kern_exit.c +++ b/sys/kern/kern_exit.c @@ -222,6 +222,7 @@ exit1(p, rv) * Can't free the entire vmspace as the kernel stack * may be mapped within that space also. */ + mtx_lock(&vm_mtx); if (vm->vm_refcnt == 1) { if (vm->vm_shm) shmexit(p); @@ -230,6 +231,7 @@ exit1(p, rv) (void) vm_map_remove(&vm->vm_map, VM_MIN_ADDRESS, VM_MAXUSER_ADDRESS); } + mtx_unlock(&vm_mtx); PROC_LOCK(p); if (SESS_LEADER(p)) { diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index d3b991d..62dcc06 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -220,6 +220,7 @@ fork1(p1, flags, procp) if ((flags & RFPROC) == 0) { vm_fork(p1, 0, flags); + mtx_assert(&vm_mtx, MA_NOTOWNED); /* * Close all file descriptors. @@ -567,6 +568,7 @@ again: * execution path later. (ie: directly into user mode) */ vm_fork(p1, p2, flags); + mtx_assert(&vm_mtx, MA_NOTOWNED); if (flags == (RFFDG | RFPROC)) { cnt.v_forks++; diff --git a/sys/kern/kern_resource.c b/sys/kern/kern_resource.c index 27431ab..f46313c 100644 --- a/sys/kern/kern_resource.c +++ b/sys/kern/kern_resource.c @@ -498,8 +498,10 @@ dosetrlimit(p, which, limp) } addr = trunc_page(addr); size = round_page(size); + mtx_lock(&vm_mtx); (void) vm_map_protect(&p->p_vmspace->vm_map, addr, addr+size, prot, FALSE); + mtx_unlock(&vm_mtx); } break; diff --git a/sys/kern/kern_synch.c b/sys/kern/kern_synch.c index 7d793de..e09a377 100644 --- a/sys/kern/kern_synch.c +++ b/sys/kern/kern_synch.c @@ -378,6 +378,13 @@ msleep(ident, mtx, priority, wmesg, timo) int rval = 0; WITNESS_SAVE_DECL(mtx); + KASSERT(ident == &proc0 || /* XXX: swapper */ + timo != 0 || /* XXX: we might still miss a wakeup */ + mtx_owned(&Giant) || mtx != NULL, + ("indefinite sleep without mutex, wmesg: \"%s\" ident: %p", + wmesg, ident)); + if (mtx_owned(&vm_mtx) && mtx != &vm_mtx) + panic("sleeping with vm_mtx held."); #ifdef KTRACE if (p && KTRPOINT(p, KTR_CSW)) ktrcsw(p->p_tracep, 1, 0); diff --git a/sys/kern/link_elf.c b/sys/kern/link_elf.c index 344f163..613d1e4 100644 --- a/sys/kern/link_elf.c +++ b/sys/kern/link_elf.c @@ -653,8 +653,10 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu ef = (elf_file_t) lf; #ifdef SPARSE_MAPPING + mtx_lock(&vm_mtx); ef->object = vm_object_allocate(OBJT_DEFAULT, mapsize >> PAGE_SHIFT); if (ef->object == NULL) { + mtx_unlock(&vm_mtx); free(ef, M_LINKER); error = ENOMEM; goto out; @@ -667,9 +669,11 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu VM_PROT_ALL, VM_PROT_ALL, 0); if (error) { vm_object_deallocate(ef->object); + mtx_unlock(&vm_mtx); ef->object = 0; goto out; } + mtx_unlock(&vm_mtx); #else ef->address = malloc(mapsize, M_LINKER, M_WAITOK); if (!ef->address) { @@ -697,10 +701,12 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu /* * Wire down the pages */ + mtx_lock(&vm_mtx); vm_map_pageable(kernel_map, (vm_offset_t) segbase, (vm_offset_t) segbase + segs[i]->p_memsz, FALSE); + mtx_unlock(&vm_mtx); #endif } @@ -824,10 +830,12 @@ link_elf_unload_file(linker_file_t file) } #ifdef SPARSE_MAPPING if (ef->object) { + mtx_lock(&vm_mtx); vm_map_remove(kernel_map, (vm_offset_t) ef->address, (vm_offset_t) ef->address + (ef->object->size << PAGE_SHIFT)); vm_object_deallocate(ef->object); + mtx_unlock(&vm_mtx); } #else if (ef->address) diff --git a/sys/kern/link_elf_obj.c b/sys/kern/link_elf_obj.c index 344f163..613d1e4 100644 --- a/sys/kern/link_elf_obj.c +++ b/sys/kern/link_elf_obj.c @@ -653,8 +653,10 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu ef = (elf_file_t) lf; #ifdef SPARSE_MAPPING + mtx_lock(&vm_mtx); ef->object = vm_object_allocate(OBJT_DEFAULT, mapsize >> PAGE_SHIFT); if (ef->object == NULL) { + mtx_unlock(&vm_mtx); free(ef, M_LINKER); error = ENOMEM; goto out; @@ -667,9 +669,11 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu VM_PROT_ALL, VM_PROT_ALL, 0); if (error) { vm_object_deallocate(ef->object); + mtx_unlock(&vm_mtx); ef->object = 0; goto out; } + mtx_unlock(&vm_mtx); #else ef->address = malloc(mapsize, M_LINKER, M_WAITOK); if (!ef->address) { @@ -697,10 +701,12 @@ link_elf_load_file(linker_class_t cls, const char* filename, linker_file_t* resu /* * Wire down the pages */ + mtx_lock(&vm_mtx); vm_map_pageable(kernel_map, (vm_offset_t) segbase, (vm_offset_t) segbase + segs[i]->p_memsz, FALSE); + mtx_unlock(&vm_mtx); #endif } @@ -824,10 +830,12 @@ link_elf_unload_file(linker_file_t file) } #ifdef SPARSE_MAPPING if (ef->object) { + mtx_lock(&vm_mtx); vm_map_remove(kernel_map, (vm_offset_t) ef->address, (vm_offset_t) ef->address + (ef->object->size << PAGE_SHIFT)); vm_object_deallocate(ef->object); + mtx_unlock(&vm_mtx); } #else if (ef->address) diff --git a/sys/kern/subr_blist.c b/sys/kern/subr_blist.c index 9ac4338..061d151 100644 --- a/sys/kern/subr_blist.c +++ b/sys/kern/subr_blist.c @@ -71,6 +71,7 @@ #include <sys/kernel.h> #include <sys/blist.h> #include <sys/malloc.h> +#include <sys/mutex.h> #include <vm/vm.h> #include <vm/vm_object.h> #include <vm/vm_kern.h> diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c index af7bfc1..8924fa2 100644 --- a/sys/kern/subr_trap.c +++ b/sys/kern/subr_trap.c @@ -330,9 +330,7 @@ restart: */ eva = rcr2(); enable_intr(); - mtx_lock(&Giant); i = trap_pfault(&frame, TRUE, eva); - mtx_unlock(&Giant); #if defined(I586_CPU) && !defined(NO_F00F_HACK) if (i == -2) { /* @@ -443,9 +441,7 @@ restart: */ eva = rcr2(); enable_intr(); - mtx_lock(&Giant); (void) trap_pfault(&frame, FALSE, eva); - mtx_unlock(&Giant); goto out; case T_DNA: @@ -887,7 +883,9 @@ nogo: frame->tf_eip = (int)PCPU_GET(curpcb)->pcb_onfault; return (0); } + mtx_lock(&Giant); trap_fatal(frame, eva); + mtx_unlock(&Giant); return (-1); } @@ -1147,14 +1145,17 @@ syscall(frame) /* * Try to run the syscall without the MP lock if the syscall - * is MP safe. We have to obtain the MP lock no matter what if - * we are ktracing + * is MP safe. */ if ((callp->sy_narg & SYF_MPSAFE) == 0) { mtx_lock(&Giant); } #ifdef KTRACE + /* + * We have to obtain the MP lock no matter what if + * we are ktracing + */ if (KTRPOINT(p, KTR_SYSCALL)) { if (!mtx_owned(&Giant)) mtx_lock(&Giant); diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c index 0c32402..a788448 100644 --- a/sys/kern/sys_pipe.c +++ b/sys/kern/sys_pipe.c @@ -56,6 +56,7 @@ #include <sys/filedesc.h> #include <sys/filio.h> #include <sys/lock.h> +#include <sys/mutex.h> #include <sys/ttycom.h> #include <sys/stat.h> #include <sys/poll.h> @@ -253,6 +254,7 @@ pipespace(cpipe, size) * kernel_object. * XXX -- minor change needed here for NetBSD/OpenBSD VM systems. */ + mtx_lock(&vm_mtx); object = vm_object_allocate(OBJT_DEFAULT, npages); buffer = (caddr_t) vm_map_min(kernel_map); @@ -264,6 +266,7 @@ pipespace(cpipe, size) error = vm_map_find(kernel_map, object, 0, (vm_offset_t *) &buffer, size, 1, VM_PROT_ALL, VM_PROT_ALL, 0); + mtx_unlock(&vm_mtx); if (error != KERN_SUCCESS) { vm_object_deallocate(object); @@ -551,6 +554,7 @@ pipe_build_write_buffer(wpipe, uio) size = wpipe->pipe_buffer.size; endaddr = round_page((vm_offset_t)uio->uio_iov->iov_base + size); + mtx_lock(&vm_mtx); addr = trunc_page((vm_offset_t)uio->uio_iov->iov_base); for (i = 0; addr < endaddr; addr += PAGE_SIZE, i++) { vm_page_t m; @@ -561,6 +565,7 @@ pipe_build_write_buffer(wpipe, uio) for (j = 0; j < i; j++) vm_page_unwire(wpipe->pipe_map.ms[j], 1); + mtx_unlock(&vm_mtx); return (EFAULT); } @@ -592,6 +597,7 @@ pipe_build_write_buffer(wpipe, uio) pmap_qenter(wpipe->pipe_map.kva, wpipe->pipe_map.ms, wpipe->pipe_map.npages); + mtx_unlock(&vm_mtx); /* * and update the uio data */ @@ -625,8 +631,10 @@ pipe_destroy_write_buffer(wpipe) amountpipekva -= wpipe->pipe_buffer.size + PAGE_SIZE; } } + mtx_lock(&vm_mtx); for (i = 0; i < wpipe->pipe_map.npages; i++) vm_page_unwire(wpipe->pipe_map.ms[i], 1); + mtx_unlock(&vm_mtx); } /* @@ -1199,12 +1207,13 @@ pipeclose(cpipe) wakeup(ppipe); ppipe->pipe_peer = NULL; } - /* * free resources */ + mtx_lock(&vm_mtx); pipe_free_kmem(cpipe); zfree(pipe_zone, cpipe); + mtx_unlock(&vm_mtx); } } diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master index 32255bc..269814c 100644 --- a/sys/kern/syscalls.master +++ b/sys/kern/syscalls.master @@ -61,7 +61,7 @@ 14 STD POSIX { int mknod(char *path, int mode, int dev); } 15 STD POSIX { int chmod(char *path, int mode); } 16 STD POSIX { int chown(char *path, int uid, int gid); } -17 STD BSD { int obreak(char *nsize); } break obreak_args int +17 MPSAFE STD BSD { int obreak(char *nsize); } break obreak_args int 18 STD BSD { int getfsstat(struct statfs *buf, long bufsize, \ int flags); } 19 COMPAT POSIX { long lseek(int fd, long offset, int whence); } @@ -121,23 +121,23 @@ 62 COMPAT POSIX { int fstat(int fd, struct ostat *sb); } 63 COMPAT BSD { int getkerninfo(int op, char *where, size_t *size, \ int arg); } getkerninfo getkerninfo_args int -64 COMPAT BSD { int getpagesize(void); } \ +64 MPSAFE COMPAT BSD { int getpagesize(void); } \ getpagesize getpagesize_args int 65 STD BSD { int msync(void *addr, size_t len, int flags); } 66 STD BSD { int vfork(void); } 67 OBSOL NOHIDE vread 68 OBSOL NOHIDE vwrite -69 STD BSD { int sbrk(int incr); } -70 STD BSD { int sstk(int incr); } -71 COMPAT BSD { int mmap(void *addr, int len, int prot, \ +69 MPSAFE STD BSD { int sbrk(int incr); } +70 MPSAFE STD BSD { int sstk(int incr); } +71 MPSAFE COMPAT BSD { int mmap(void *addr, int len, int prot, \ int flags, int fd, long pos); } -72 STD BSD { int ovadvise(int anom); } vadvise ovadvise_args int -73 STD BSD { int munmap(void *addr, size_t len); } -74 STD BSD { int mprotect(const void *addr, size_t len, int prot); } -75 STD BSD { int madvise(void *addr, size_t len, int behav); } +72 MPSAFE STD BSD { int ovadvise(int anom); } vadvise ovadvise_args int +73 MPSAFE STD BSD { int munmap(void *addr, size_t len); } +74 MPSAFE STD BSD { int mprotect(const void *addr, size_t len, int prot); } +75 MPSAFE STD BSD { int madvise(void *addr, size_t len, int behav); } 76 OBSOL NOHIDE vhangup 77 OBSOL NOHIDE vlimit -78 STD BSD { int mincore(const void *addr, size_t len, \ +78 MPSAFE STD BSD { int mincore(const void *addr, size_t len, \ char *vec); } 79 STD POSIX { int getgroups(u_int gidsetsize, gid_t *gidset); } 80 STD POSIX { int setgroups(u_int gidsetsize, gid_t *gidset); } @@ -306,7 +306,7 @@ setrlimit __setrlimit_args int 196 STD BSD { int getdirentries(int fd, char *buf, u_int count, \ long *basep); } -197 STD BSD { caddr_t mmap(caddr_t addr, size_t len, int prot, \ +197 MPSAFE STD BSD { caddr_t mmap(caddr_t addr, size_t len, int prot, \ int flags, int fd, int pad, off_t pos); } 198 STD NOHIDE { int nosys(void); } __syscall __syscall_args int 199 STD POSIX { off_t lseek(int fd, int pad, off_t offset, \ @@ -318,8 +318,8 @@ __sysctl sysctl_args int ; properly, __sysctl should be a NOHIDE, but making an exception ; here allows to avoid one in libc/sys/Makefile.inc. -203 STD BSD { int mlock(const void *addr, size_t len); } -204 STD BSD { int munlock(const void *addr, size_t len); } +203 MPSAFE STD BSD { int mlock(const void *addr, size_t len); } +204 MPSAFE STD BSD { int munlock(const void *addr, size_t len); } 205 STD BSD { int undelete(char *path); } 206 STD BSD { int futimes(int fd, struct timeval *tptr); } 207 STD BSD { int getpgid(pid_t pid); } @@ -386,7 +386,7 @@ 248 UNIMPL NOHIDE nosys 249 UNIMPL NOHIDE nosys ; syscall numbers initially used in OpenBSD -250 STD BSD { int minherit(void *addr, size_t len, int inherit); } +250 MPSAFE STD BSD { int minherit(void *addr, size_t len, int inherit); } 251 STD BSD { int rfork(int flags); } 252 STD BSD { int openbsd_poll(struct pollfd *fds, u_int nfds, \ int timeout); } @@ -414,7 +414,7 @@ 274 STD BSD { int lchmod(char *path, mode_t mode); } 275 NOPROTO BSD { int lchown(char *path, uid_t uid, gid_t gid); } netbsd_lchown lchown_args int 276 STD BSD { int lutimes(char *path, struct timeval *tptr); } -277 NOPROTO BSD { int msync(void *addr, size_t len, int flags); } netbsd_msync msync_args int +277 MPSAFE NOPROTO BSD { int msync(void *addr, size_t len, int flags); } netbsd_msync msync_args int 278 STD BSD { int nstat(char *path, struct nstat *ub); } 279 STD BSD { int nfstat(int fd, struct nstat *sb); } 280 STD BSD { int nlstat(char *path, struct nstat *ub); } @@ -463,8 +463,8 @@ 321 STD BSD { int yield(void); } 322 OBSOL NOHIDE thr_sleep 323 OBSOL NOHIDE thr_wakeup -324 STD BSD { int mlockall(int how); } -325 STD BSD { int munlockall(void); } +324 MPSAFE STD BSD { int mlockall(int how); } +325 MPSAFE STD BSD { int munlockall(void); } 326 STD BSD { int __getcwd(u_char *buf, u_int buflen); } 327 STD POSIX { int sched_setparam (pid_t pid, const struct sched_param *param); } diff --git a/sys/kern/sysv_shm.c b/sys/kern/sysv_shm.c index fab53a8..0a9abda 100644 --- a/sys/kern/sysv_shm.c +++ b/sys/kern/sysv_shm.c @@ -43,6 +43,7 @@ #include <sys/shm.h> #include <sys/proc.h> #include <sys/malloc.h> +#include <sys/mutex.h> #include <sys/mman.h> #include <sys/stat.h> #include <sys/syscall.h> @@ -314,14 +315,17 @@ shmat(p, uap) } shm_handle = shmseg->shm_internal; + mtx_lock(&vm_mtx); vm_object_reference(shm_handle->shm_object); rv = vm_map_find(&p->p_vmspace->vm_map, shm_handle->shm_object, 0, &attach_va, size, (flags & MAP_FIXED)?0:1, prot, prot, 0); if (rv != KERN_SUCCESS) { + mtx_unlock(&vm_mtx); return ENOMEM; } vm_map_inherit(&p->p_vmspace->vm_map, attach_va, attach_va + size, VM_INHERIT_SHARE); + mtx_unlock(&vm_mtx); shmmap_s->va = attach_va; shmmap_s->shmid = uap->shmid; @@ -549,6 +553,7 @@ shmget_allocate_segment(p, uap, mode) * We make sure that we have allocated a pager before we need * to. */ + mtx_lock(&vm_mtx); if (shm_use_phys) { shm_handle->shm_object = vm_pager_allocate(OBJT_PHYS, 0, size, VM_PROT_DEFAULT, 0); @@ -558,6 +563,7 @@ shmget_allocate_segment(p, uap, mode) } vm_object_clear_flag(shm_handle->shm_object, OBJ_ONEMAPPING); vm_object_set_flag(shm_handle->shm_object, OBJ_NOSPLIT); + mtx_unlock(&vm_mtx); shmseg->shm_internal = shm_handle; shmseg->shm_perm.cuid = shmseg->shm_perm.uid = cred->cr_uid; diff --git a/sys/kern/vfs_bio.c b/sys/kern/vfs_bio.c index c1b53d8..a980330 100644 --- a/sys/kern/vfs_bio.c +++ b/sys/kern/vfs_bio.c @@ -281,6 +281,8 @@ waitrunningbufspace(void) * Called when a buffer is extended. This function clears the B_CACHE * bit if the newly extended portion of the buffer does not contain * valid data. + * + * must be called with vm_mtx held */ static __inline__ void @@ -426,11 +428,13 @@ bufinit(void) * from buf_daemon. */ + mtx_lock(&vm_mtx); bogus_offset = kmem_alloc_pageable(kernel_map, PAGE_SIZE); bogus_page = vm_page_alloc(kernel_object, ((bogus_offset - VM_MIN_KERNEL_ADDRESS) >> PAGE_SHIFT), VM_ALLOC_NORMAL); cnt.v_wire_count++; + mtx_unlock(&vm_mtx); } @@ -441,17 +445,27 @@ bufinit(void) * buffer_map. * * Since this call frees up buffer space, we call bufspacewakeup(). + * + * Can be called with or without the vm_mtx. */ static void bfreekva(struct buf * bp) { + if (bp->b_kvasize) { + int hadvmlock; + ++buffreekvacnt; bufspace -= bp->b_kvasize; + hadvmlock = mtx_owned(&vm_mtx); + if (!hadvmlock) + mtx_lock(&vm_mtx); vm_map_delete(buffer_map, (vm_offset_t) bp->b_kvabase, (vm_offset_t) bp->b_kvabase + bp->b_kvasize ); + if (!hadvmlock) + mtx_unlock(&vm_mtx); bp->b_kvasize = 0; bufspacewakeup(); } @@ -807,6 +821,7 @@ bdwrite(struct buf * bp) VOP_BMAP(bp->b_vp, bp->b_lblkno, NULL, &bp->b_blkno, NULL, NULL); } + mtx_lock(&vm_mtx); /* * Set the *dirty* buffer range based upon the VM system dirty pages. */ @@ -820,6 +835,7 @@ bdwrite(struct buf * bp) * out on the next sync, or perhaps the cluster will be completed. */ vfs_clean_pages(bp); + mtx_unlock(&vm_mtx); bqrelse(bp); /* @@ -973,12 +989,15 @@ buf_dirty_count_severe(void) * Release a busy buffer and, if requested, free its resources. The * buffer will be stashed in the appropriate bufqueue[] allowing it * to be accessed later as a cache entity or reused for other purposes. + * + * vm_mtx must be not be held. */ void brelse(struct buf * bp) { int s; + mtx_assert(&vm_mtx, MA_NOTOWNED); KASSERT(!(bp->b_flags & (B_CLUSTER|B_PAGING)), ("brelse: inappropriate B_PAGING or B_CLUSTER bp %p", bp)); s = splbio(); @@ -1088,6 +1107,7 @@ brelse(struct buf * bp) resid = bp->b_bufsize; foff = bp->b_offset; + mtx_lock(&vm_mtx); for (i = 0; i < bp->b_npages; i++) { int had_bogus = 0; @@ -1099,10 +1119,12 @@ brelse(struct buf * bp) * now. */ if (m == bogus_page) { + mtx_unlock(&vm_mtx); VOP_GETVOBJECT(vp, &obj); poff = OFF_TO_IDX(bp->b_offset); had_bogus = 1; + mtx_lock(&vm_mtx); for (j = i; j < bp->b_npages; j++) { vm_page_t mtmp; mtmp = bp->b_pages[j]; @@ -1136,11 +1158,15 @@ brelse(struct buf * bp) if (bp->b_flags & (B_INVAL | B_RELBUF)) vfs_vmio_release(bp); + mtx_unlock(&vm_mtx); } else if (bp->b_flags & B_VMIO) { - if (bp->b_flags & (B_INVAL | B_RELBUF)) + if (bp->b_flags & (B_INVAL | B_RELBUF)) { + mtx_lock(&vm_mtx); vfs_vmio_release(bp); + mtx_unlock(&vm_mtx); + } } @@ -1302,6 +1328,9 @@ bqrelse(struct buf * bp) splx(s); } +/* + * Must be called with vm_mtx held. + */ static void vfs_vmio_release(bp) struct buf *bp; @@ -1310,6 +1339,7 @@ vfs_vmio_release(bp) vm_page_t m; s = splvm(); + mtx_assert(&vm_mtx, MA_OWNED); for (i = 0; i < bp->b_npages; i++) { m = bp->b_pages[i]; bp->b_pages[i] = NULL; @@ -1343,6 +1373,9 @@ vfs_vmio_release(bp) } splx(s); pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages); + + /* could drop vm_mtx here */ + if (bp->b_bufsize) { bufspacewakeup(); bp->b_bufsize = 0; @@ -1614,7 +1647,9 @@ restart: if (qindex == QUEUE_CLEAN) { if (bp->b_flags & B_VMIO) { bp->b_flags &= ~B_ASYNC; + mtx_lock(&vm_mtx); vfs_vmio_release(bp); + mtx_unlock(&vm_mtx); } if (bp->b_vp) brelvp(bp); @@ -1735,6 +1770,8 @@ restart: if (maxsize != bp->b_kvasize) { vm_offset_t addr = 0; + /* we'll hold the lock over some vm ops */ + mtx_lock(&vm_mtx); bfreekva(bp); if (vm_map_findspace(buffer_map, @@ -1743,6 +1780,7 @@ restart: * Uh oh. Buffer map is to fragmented. We * must defragment the map. */ + mtx_unlock(&vm_mtx); ++bufdefragcnt; defrag = 1; bp->b_flags |= B_INVAL; @@ -1759,6 +1797,7 @@ restart: bufspace += bp->b_kvasize; ++bufreusecnt; } + mtx_unlock(&vm_mtx); } bp->b_data = bp->b_kvabase; } @@ -1936,18 +1975,24 @@ inmem(struct vnode * vp, daddr_t blkno) size = vp->v_mount->mnt_stat.f_iosize; off = (vm_ooffset_t)blkno * (vm_ooffset_t)vp->v_mount->mnt_stat.f_iosize; + mtx_lock(&vm_mtx); for (toff = 0; toff < vp->v_mount->mnt_stat.f_iosize; toff += tinc) { m = vm_page_lookup(obj, OFF_TO_IDX(off + toff)); if (!m) - return 0; + goto notinmem; tinc = size; if (tinc > PAGE_SIZE - ((toff + off) & PAGE_MASK)) tinc = PAGE_SIZE - ((toff + off) & PAGE_MASK); if (vm_page_is_valid(m, (vm_offset_t) ((toff + off) & PAGE_MASK), tinc) == 0) - return 0; + goto notinmem; } + mtx_unlock(&vm_mtx); return 1; + +notinmem: + mtx_unlock(&vm_mtx); + return (0); } /* @@ -1960,11 +2005,14 @@ inmem(struct vnode * vp, daddr_t blkno) * * This routine is primarily used by NFS, but is generalized for the * B_VMIO case. + * + * Can be called with or without vm_mtx */ static void vfs_setdirty(struct buf *bp) { int i; + int hadvmlock; vm_object_t object; /* @@ -1983,6 +2031,10 @@ vfs_setdirty(struct buf *bp) if ((bp->b_flags & B_VMIO) == 0) return; + hadvmlock = mtx_owned(&vm_mtx); + if (!hadvmlock) + mtx_lock(&vm_mtx); + object = bp->b_pages[0]->object; if ((object->flags & OBJ_WRITEABLE) && !(object->flags & OBJ_MIGHTBEDIRTY)) @@ -2040,6 +2092,8 @@ vfs_setdirty(struct buf *bp) bp->b_dirtyend = eoffset; } } + if (!hadvmlock) + mtx_unlock(&vm_mtx); } /* @@ -2441,6 +2495,7 @@ allocbuf(struct buf *bp, int size) * DEV_BSIZE aligned existing buffer size. Figure out * if we have to remove any pages. */ + mtx_lock(&vm_mtx); if (desiredpages < bp->b_npages) { for (i = desiredpages; i < bp->b_npages; i++) { /* @@ -2461,6 +2516,7 @@ allocbuf(struct buf *bp, int size) (desiredpages << PAGE_SHIFT), (bp->b_npages - desiredpages)); bp->b_npages = desiredpages; } + mtx_unlock(&vm_mtx); } else if (size > bp->b_bcount) { /* * We are growing the buffer, possibly in a @@ -2481,6 +2537,7 @@ allocbuf(struct buf *bp, int size) vp = bp->b_vp; VOP_GETVOBJECT(vp, &obj); + mtx_lock(&vm_mtx); while (bp->b_npages < desiredpages) { vm_page_t m; vm_pindex_t pi; @@ -2589,6 +2646,9 @@ allocbuf(struct buf *bp, int size) bp->b_pages, bp->b_npages ); + + mtx_unlock(&vm_mtx); + bp->b_data = (caddr_t)((vm_offset_t)bp->b_data | (vm_offset_t)(bp->b_offset & PAGE_MASK)); } @@ -2726,6 +2786,7 @@ bufdone(struct buf *bp) if (error) { panic("biodone: no object"); } + mtx_lock(&vm_mtx); #if defined(VFS_BIO_DEBUG) if (obj->paging_in_progress < bp->b_npages) { printf("biodone: paging in progress(%d) < bp->b_npages(%d)\n", @@ -2814,6 +2875,7 @@ bufdone(struct buf *bp) } if (obj) vm_object_pip_wakeupn(obj, 0); + mtx_unlock(&vm_mtx); } /* @@ -2837,12 +2899,15 @@ bufdone(struct buf *bp) * This routine is called in lieu of iodone in the case of * incomplete I/O. This keeps the busy status for pages * consistant. + * + * vm_mtx should not be held */ void vfs_unbusy_pages(struct buf * bp) { int i; + mtx_assert(&vm_mtx, MA_NOTOWNED); runningbufwakeup(bp); if (bp->b_flags & B_VMIO) { struct vnode *vp = bp->b_vp; @@ -2850,6 +2915,7 @@ vfs_unbusy_pages(struct buf * bp) VOP_GETVOBJECT(vp, &obj); + mtx_lock(&vm_mtx); for (i = 0; i < bp->b_npages; i++) { vm_page_t m = bp->b_pages[i]; @@ -2866,6 +2932,7 @@ vfs_unbusy_pages(struct buf * bp) vm_page_io_finish(m); } vm_object_pip_wakeupn(obj, 0); + mtx_unlock(&vm_mtx); } } @@ -2876,12 +2943,15 @@ vfs_unbusy_pages(struct buf * bp) * range is restricted to the buffer's size. * * This routine is typically called after a read completes. + * + * vm_mtx should be held */ static void vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, int pageno, vm_page_t m) { vm_ooffset_t soff, eoff; + mtx_assert(&vm_mtx, MA_OWNED); /* * Start and end offsets in buffer. eoff - soff may not cross a * page boundry or cross the end of the buffer. The end of the @@ -2917,12 +2987,15 @@ vfs_page_set_valid(struct buf *bp, vm_ooffset_t off, int pageno, vm_page_t m) * Since I/O has not been initiated yet, certain buffer flags * such as BIO_ERROR or B_INVAL may be in an inconsistant state * and should be ignored. + * + * vm_mtx should not be held */ void vfs_busy_pages(struct buf * bp, int clear_modify) { int i, bogus; + mtx_assert(&vm_mtx, MA_NOTOWNED); if (bp->b_flags & B_VMIO) { struct vnode *vp = bp->b_vp; vm_object_t obj; @@ -2932,6 +3005,7 @@ vfs_busy_pages(struct buf * bp, int clear_modify) foff = bp->b_offset; KASSERT(bp->b_offset != NOOFFSET, ("vfs_busy_pages: no buffer offset")); + mtx_lock(&vm_mtx); vfs_setdirty(bp); retry: @@ -2979,6 +3053,7 @@ retry: } if (bogus) pmap_qenter(trunc_page((vm_offset_t)bp->b_data), bp->b_pages, bp->b_npages); + mtx_unlock(&vm_mtx); } } @@ -2989,12 +3064,15 @@ retry: * * Note that while we only really need to clean through to b_bcount, we * just go ahead and clean through to b_bufsize. + * + * should be called with vm_mtx held */ static void vfs_clean_pages(struct buf * bp) { int i; + mtx_assert(&vm_mtx, MA_OWNED); if (bp->b_flags & B_VMIO) { vm_ooffset_t foff; @@ -3021,6 +3099,7 @@ vfs_clean_pages(struct buf * bp) * Set the range within the buffer to valid and clean. The range is * relative to the beginning of the buffer, b_offset. Note that b_offset * itself may be offset from the beginning of the first page. + * */ void @@ -3061,13 +3140,18 @@ vfs_bio_set_validclean(struct buf *bp, int base, int size) * * Note that while we only theoretically need to clear through b_bcount, * we go ahead and clear through b_bufsize. + * + * We'll get vm_mtx here for safety if processing a VMIO buffer. + * I don't think vm_mtx is needed, but we're twiddling vm_page flags. */ void vfs_bio_clrbuf(struct buf *bp) { int i, mask = 0; caddr_t sa, ea; + if ((bp->b_flags & (B_VMIO | B_MALLOC)) == B_VMIO) { + mtx_lock(&vm_mtx); bp->b_flags &= ~B_INVAL; bp->b_ioflags &= ~BIO_ERROR; if( (bp->b_npages == 1) && (bp->b_bufsize < PAGE_SIZE) && @@ -3079,6 +3163,7 @@ vfs_bio_clrbuf(struct buf *bp) { } bp->b_pages[0]->valid |= mask; bp->b_resid = 0; + mtx_unlock(&vm_mtx); return; } ea = sa = bp->b_data; @@ -3106,6 +3191,7 @@ vfs_bio_clrbuf(struct buf *bp) { vm_page_flag_clear(bp->b_pages[i], PG_ZERO); } bp->b_resid = 0; + mtx_unlock(&vm_mtx); } else { clrbuf(bp); } @@ -3115,18 +3201,22 @@ vfs_bio_clrbuf(struct buf *bp) { * vm_hold_load_pages and vm_hold_unload pages get pages into * a buffers address space. The pages are anonymous and are * not associated with a file object. + * + * vm_mtx should not be held */ -void +static void vm_hold_load_pages(struct buf * bp, vm_offset_t from, vm_offset_t to) { vm_offset_t pg; vm_page_t p; int index; + mtx_assert(&vm_mtx, MA_NOTOWNED); to = round_page(to); from = round_page(from); index = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT; + mtx_lock(&vm_mtx); for (pg = from; pg < to; pg += PAGE_SIZE, index++) { tryagain: @@ -3152,6 +3242,7 @@ tryagain: vm_page_wakeup(p); } bp->b_npages = index; + mtx_unlock(&vm_mtx); } void @@ -3160,11 +3251,15 @@ vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to) vm_offset_t pg; vm_page_t p; int index, newnpages; + int hadvmlock; from = round_page(from); to = round_page(to); newnpages = index = (from - trunc_page((vm_offset_t)bp->b_data)) >> PAGE_SHIFT; + hadvmlock = mtx_owned(&vm_mtx); + if (!hadvmlock) + mtx_lock(&vm_mtx); for (pg = from; pg < to; pg += PAGE_SIZE, index++) { p = bp->b_pages[index]; if (p && (index < bp->b_npages)) { @@ -3180,6 +3275,8 @@ vm_hold_free_pages(struct buf * bp, vm_offset_t from, vm_offset_t to) } } bp->b_npages = newnpages; + if (!hadvmlock) + mtx_unlock(&vm_mtx); } diff --git a/sys/kern/vfs_cluster.c b/sys/kern/vfs_cluster.c index 8a6e045..0eb47bd 100644 --- a/sys/kern/vfs_cluster.c +++ b/sys/kern/vfs_cluster.c @@ -433,6 +433,7 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp) BUF_KERNPROC(tbp); TAILQ_INSERT_TAIL(&bp->b_cluster.cluster_head, tbp, b_cluster.cluster_entry); + mtx_lock(&vm_mtx); for (j = 0; j < tbp->b_npages; j += 1) { vm_page_t m; m = tbp->b_pages[j]; @@ -446,10 +447,12 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp) if ((m->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) tbp->b_pages[j] = bogus_page; } + mtx_unlock(&vm_mtx); bp->b_bcount += tbp->b_bcount; bp->b_bufsize += tbp->b_bufsize; } + mtx_lock(&vm_mtx); for(j=0;j<bp->b_npages;j++) { if ((bp->b_pages[j]->valid & VM_PAGE_BITS_ALL) == VM_PAGE_BITS_ALL) @@ -462,6 +465,7 @@ cluster_rbuild(vp, filesize, lbn, blkno, size, run, fbp) pmap_qenter(trunc_page((vm_offset_t) bp->b_data), (vm_page_t *)bp->b_pages, bp->b_npages); + mtx_unlock(&vm_mtx); return (bp); } @@ -484,7 +488,9 @@ cluster_callback(bp) if (bp->b_ioflags & BIO_ERROR) error = bp->b_error; + mtx_lock(&vm_mtx); pmap_qremove(trunc_page((vm_offset_t) bp->b_data), bp->b_npages); + mtx_unlock(&vm_mtx); /* * Move memory from the large cluster buffer into the component * buffers and mark IO as done on these. @@ -851,6 +857,7 @@ cluster_wbuild(vp, size, start_lbn, len) } } + mtx_lock(&vm_mtx); for (j = 0; j < tbp->b_npages; j += 1) { m = tbp->b_pages[j]; vm_page_io_start(m); @@ -861,6 +868,7 @@ cluster_wbuild(vp, size, start_lbn, len) bp->b_npages++; } } + mtx_unlock(&vm_mtx); } bp->b_bcount += size; bp->b_bufsize += size; @@ -879,8 +887,10 @@ cluster_wbuild(vp, size, start_lbn, len) tbp, b_cluster.cluster_entry); } finishcluster: + mtx_lock(&vm_mtx); pmap_qenter(trunc_page((vm_offset_t) bp->b_data), (vm_page_t *) bp->b_pages, bp->b_npages); + mtx_unlock(&vm_mtx); if (bp->b_bufsize > bp->b_kvasize) panic( "cluster_wbuild: b_bufsize(%ld) > b_kvasize(%d)\n", diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c index 328a9b1..d17e934 100644 --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -535,14 +535,18 @@ retry: if (vp->v_type == VREG || vp->v_type == VDIR) { if ((error = VOP_GETATTR(vp, &vat, cred, p)) != 0) goto retn; + mtx_lock(&vm_mtx); object = vnode_pager_alloc(vp, vat.va_size, 0, 0); + mtx_unlock(&vm_mtx); } else if (devsw(vp->v_rdev) != NULL) { /* * This simply allocates the biggest object possible * for a disk vnode. This should be fixed, but doesn't * cause any problems (yet). */ + mtx_lock(&vm_mtx); object = vnode_pager_alloc(vp, IDX_TO_OFF(INT_MAX), 0, 0); + mtx_unlock(&vm_mtx); } else { goto retn; } @@ -550,15 +554,23 @@ retry: * Dereference the reference we just created. This assumes * that the object is associated with the vp. */ + mtx_lock(&vm_mtx); object->ref_count--; + mtx_unlock(&vm_mtx); vp->v_usecount--; } else { + /* + * XXX: safe to hold vm mutex through VOP_UNLOCK? + */ + mtx_lock(&vm_mtx); if (object->flags & OBJ_DEAD) { VOP_UNLOCK(vp, 0, p); - tsleep(object, PVM, "vodead", 0); + msleep(object, VM_OBJECT_MTX(object), PVM, "vodead", 0); + mtx_unlock(&vm_mtx); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); goto retry; } + mtx_unlock(&vm_mtx); } KASSERT(vp->v_object != NULL, ("vfs_object_create: NULL object")); @@ -580,6 +592,7 @@ vop_stddestroyvobject(ap) if (vp->v_object == NULL) return (0); + mtx_lock(&vm_mtx); if (obj->ref_count == 0) { /* * vclean() may be called twice. The first time @@ -594,6 +607,7 @@ vop_stddestroyvobject(ap) */ vm_pager_deallocate(obj); } + mtx_unlock(&vm_mtx); return (0); } diff --git a/sys/kern/vfs_extattr.c b/sys/kern/vfs_extattr.c index 6b73258..3f97551 100644 --- a/sys/kern/vfs_extattr.c +++ b/sys/kern/vfs_extattr.c @@ -2770,8 +2770,13 @@ fsync(p, uap) if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) return (error); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - if (VOP_GETVOBJECT(vp, &obj) == 0) + if (VOP_GETVOBJECT(vp, &obj) == 0) { + mtx_unlock(&Giant); + mtx_lock(&vm_mtx); vm_object_page_clean(obj, 0, 0, 0); + mtx_unlock(&vm_mtx); + mtx_lock(&Giant); + } error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p); #ifdef SOFTUPDATES if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)) diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index 2f4dc8d..6c050ba 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -711,6 +711,8 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) int s, error; vm_object_t object; + mtx_assert(&vm_mtx, MA_NOTOWNED); + if (flags & V_SAVE) { s = splbio(); while (vp->v_numoutput) { @@ -797,8 +799,10 @@ vinvalbuf(vp, flags, cred, p, slpflag, slptimeo) */ mtx_lock(&vp->v_interlock); if (VOP_GETVOBJECT(vp, &object) == 0) { + mtx_lock(&vm_mtx); vm_object_page_remove(object, 0, 0, (flags & V_SAVE) ? TRUE : FALSE); + mtx_unlock(&vm_mtx); } mtx_unlock(&vp->v_interlock); @@ -1132,6 +1136,8 @@ speedup_syncer() * Also sets B_PAGING flag to indicate that vnode is not fully associated * with the buffer. i.e. the bp has not been linked into the vnode or * ref-counted. + * + * Doesn't block, only vnode seems to need a lock. */ void pbgetvp(vp, bp) @@ -1554,6 +1560,7 @@ vput(vp) { struct proc *p = curproc; /* XXX */ + mtx_assert(&Giant, MA_OWNED); KASSERT(vp != NULL, ("vput: null vp")); mtx_lock(&vp->v_interlock); /* Skip this v_writecount check if we're going to panic below. */ @@ -2382,7 +2389,11 @@ loop: if (!vget(vp, LK_INTERLOCK | LK_EXCLUSIVE | LK_RETRY | LK_NOOBJ, curproc)) { if (VOP_GETVOBJECT(vp, &obj) == 0) { - vm_object_page_clean(obj, 0, 0, flags == MNT_WAIT ? OBJPC_SYNC : OBJPC_NOSYNC); + mtx_lock(&vm_mtx); + vm_object_page_clean(obj, 0, 0, + flags == MNT_WAIT ? + OBJPC_SYNC : OBJPC_NOSYNC); + mtx_unlock(&vm_mtx); anyio = 1; } vput(vp); @@ -2409,6 +2420,8 @@ vfs_object_create(vp, p, cred) struct proc *p; struct ucred *cred; { + + mtx_assert(&vm_mtx, MA_NOTOWNED); return (VOP_CREATEVOBJECT(vp, cred, p)); } diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index 6b73258..3f97551 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -2770,8 +2770,13 @@ fsync(p, uap) if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) return (error); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); - if (VOP_GETVOBJECT(vp, &obj) == 0) + if (VOP_GETVOBJECT(vp, &obj) == 0) { + mtx_unlock(&Giant); + mtx_lock(&vm_mtx); vm_object_page_clean(obj, 0, 0, 0); + mtx_unlock(&vm_mtx); + mtx_lock(&Giant); + } error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, p); #ifdef SOFTUPDATES if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)) |