diff options
author | alfred <alfred@FreeBSD.org> | 2002-01-13 11:58:06 +0000 |
---|---|---|
committer | alfred <alfred@FreeBSD.org> | 2002-01-13 11:58:06 +0000 |
commit | 844237b3960bfbf49070d6371a84f67f9e3366f6 (patch) | |
tree | 598e20df363e602313c7ad93de8f8c4b4240d61d /sys/kern | |
parent | 8cd61193307ff459ae72eb7aa6a734eb5e3b427e (diff) | |
download | FreeBSD-src-844237b3960bfbf49070d6371a84f67f9e3366f6.zip FreeBSD-src-844237b3960bfbf49070d6371a84f67f9e3366f6.tar.gz |
SMP Lock struct file, filedesc and the global file list.
Seigo Tanimura (tanimura) posted the initial delta.
I've polished it quite a bit reducing the need for locking and
adapting it for KSE.
Locks:
1 mutex in each filedesc
protects all the fields.
protects "struct file" initialization, while a struct file
is being changed from &badfileops -> &pipeops or something
the filedesc should be locked.
1 mutex in each struct file
protects the refcount fields.
doesn't protect anything else.
the flags used for garbage collection have been moved to
f_gcflag which was the FILLER short, this doesn't need
locking because the garbage collection is a single threaded
container.
could likely be made to use a pool mutex.
1 sx lock for the global filelist.
struct file * fhold(struct file *fp);
/* increments reference count on a file */
struct file * fhold_locked(struct file *fp);
/* like fhold but expects file to locked */
struct file * ffind_hold(struct thread *, int fd);
/* finds the struct file in thread, adds one reference and
returns it unlocked */
struct file * ffind_lock(struct thread *, int fd);
/* ffind_hold, but returns file locked */
I still have to smp-safe the fget cruft, I'll get to that asap.
Diffstat (limited to 'sys/kern')
-rw-r--r-- | sys/kern/init_main.c | 3 | ||||
-rw-r--r-- | sys/kern/kern_acl.c | 4 | ||||
-rw-r--r-- | sys/kern/kern_descrip.c | 341 | ||||
-rw-r--r-- | sys/kern/kern_event.c | 92 | ||||
-rw-r--r-- | sys/kern/kern_exec.c | 5 | ||||
-rw-r--r-- | sys/kern/kern_fork.c | 12 | ||||
-rw-r--r-- | sys/kern/subr_acl_posix1e.c | 4 | ||||
-rw-r--r-- | sys/kern/sys_generic.c | 133 | ||||
-rw-r--r-- | sys/kern/sys_pipe.c | 24 | ||||
-rw-r--r-- | sys/kern/sys_socket.c | 7 | ||||
-rw-r--r-- | sys/kern/uipc_syscalls.c | 36 | ||||
-rw-r--r-- | sys/kern/uipc_usrreq.c | 70 | ||||
-rw-r--r-- | sys/kern/vfs_acl.c | 4 | ||||
-rw-r--r-- | sys/kern/vfs_cache.c | 17 | ||||
-rw-r--r-- | sys/kern/vfs_extattr.c | 189 | ||||
-rw-r--r-- | sys/kern/vfs_lookup.c | 2 | ||||
-rw-r--r-- | sys/kern/vfs_syscalls.c | 189 | ||||
-rw-r--r-- | sys/kern/vfs_vnops.c | 2 |
18 files changed, 917 insertions, 217 deletions
diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index 4acc213..e984aec 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -358,6 +358,7 @@ proc0_init(void *dummy __unused) /* Create the file descriptor table. */ fdp = &filedesc0; p->p_fd = &fdp->fd_fd; + mtx_init(&fdp->fd_fd.fd_mtx, "struct filedesc", MTX_DEF); fdp->fd_fd.fd_refcnt = 1; fdp->fd_fd.fd_cmask = cmask; fdp->fd_fd.fd_ofiles = fdp->fd_dfiles; @@ -487,10 +488,12 @@ start_init(void *dummy) /* Get the vnode for '/'. Set p->p_fd->fd_cdir to reference it. */ if (VFS_ROOT(TAILQ_FIRST(&mountlist), &rootvnode)) panic("cannot find root vnode"); + FILEDESC_LOCK(p->p_fd); p->p_fd->fd_cdir = rootvnode; VREF(p->p_fd->fd_cdir); p->p_fd->fd_rdir = rootvnode; VREF(p->p_fd->fd_rdir); + FILEDESC_UNLOCK(p->p_fd); VOP_UNLOCK(rootvnode, 0, td); if (devfs_present) { diff --git a/sys/kern/kern_acl.c b/sys/kern/kern_acl.c index b50c896..63be63d 100644 --- a/sys/kern/kern_acl.c +++ b/sys/kern/kern_acl.c @@ -703,6 +703,7 @@ __acl_get_fd(struct thread *td, struct __acl_get_fd_args *uap) if (error == 0) { error = vacl_get_acl(td, (struct vnode *)fp->f_data, SCARG(uap, type), SCARG(uap, aclp)); + fdrop(fp, td); } mtx_unlock(&Giant); return (error); @@ -724,6 +725,7 @@ __acl_set_fd(struct thread *td, struct __acl_set_fd_args *uap) if (error == 0) { error = vacl_set_acl(td, (struct vnode *)fp->f_data, SCARG(uap, type), SCARG(uap, aclp)); + fdrop(fp, td); } mtx_unlock(&Giant); return (error); @@ -767,6 +769,7 @@ __acl_delete_fd(struct thread *td, struct __acl_delete_fd_args *uap) if (error == 0) { error = vacl_delete(td, (struct vnode *)fp->f_data, SCARG(uap, type)); + fdrop(fp, td); } mtx_unlock(&Giant); return (error); @@ -811,6 +814,7 @@ __acl_aclcheck_fd(struct thread *td, struct __acl_aclcheck_fd_args *uap) if (error == 0) { error = vacl_aclcheck(td, (struct vnode *)fp->f_data, SCARG(uap, type), SCARG(uap, aclp)); + fdrop(fp, td); } mtx_unlock(&Giant); return (error); diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index 55bab63..1538698 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -109,6 +109,7 @@ static int badfo_close __P((struct file *fp, struct thread *td)); struct filelist filehead; /* head of list of open files */ int nfiles; /* actual number of open files */ extern int cmask; +struct sx filelist_lock; /* sx to protect filelist */ /* * System calls on descriptors. @@ -163,22 +164,27 @@ dup2(td, uap) int i, error; mtx_lock(&Giant); + FILEDESC_LOCK(fdp); retry: if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL || new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || new >= maxfilesperproc) { + FILEDESC_UNLOCK(fdp); error = EBADF; goto done2; } if (old == new) { td->td_retval[0] = new; + FILEDESC_UNLOCK(fdp); error = 0; goto done2; } if (new >= fdp->fd_nfiles) { - if ((error = fdalloc(td, new, &i))) + if ((error = fdalloc(td, new, &i))) { + FILEDESC_UNLOCK(fdp); goto done2; + } if (new != i) panic("dup2: fdalloc"); /* @@ -216,12 +222,16 @@ dup(td, uap) mtx_lock(&Giant); old = uap->fd; fdp = td->td_proc->p_fd; + FILEDESC_LOCK(fdp); if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL) { + FILEDESC_UNLOCK(fdp); error = EBADF; goto done2; } - if ((error = fdalloc(td, 0, &new))) + if ((error = fdalloc(td, 0, &new))) { + FILEDESC_UNLOCK(fdp); goto done2; + } error = do_dup(fdp, (int)old, new, td->td_retval, td); done2: mtx_unlock(&Giant); @@ -255,12 +265,15 @@ fcntl(td, uap) int i, tmp, error = 0, flg = F_POSIX; struct flock fl; u_int newmin; + struct proc *leaderp; mtx_lock(&Giant); fdp = p->p_fd; + FILEDESC_LOCK(fdp); if ((unsigned)uap->fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[uap->fd]) == NULL) { + FILEDESC_UNLOCK(fdp); error = EBADF; goto done2; } @@ -271,28 +284,37 @@ fcntl(td, uap) newmin = uap->arg; if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || newmin >= maxfilesperproc) { + FILEDESC_UNLOCK(fdp); error = EINVAL; break; } - if ((error = fdalloc(td, newmin, &i))) + if ((error = fdalloc(td, newmin, &i))) { + FILEDESC_UNLOCK(fdp); break; + } error = do_dup(fdp, uap->fd, i, td->td_retval, td); break; case F_GETFD: td->td_retval[0] = *pop & 1; + FILEDESC_UNLOCK(fdp); break; case F_SETFD: *pop = (*pop &~ 1) | (uap->arg & 1); + FILEDESC_UNLOCK(fdp); break; case F_GETFL: + FILE_LOCK(fp); + FILEDESC_UNLOCK(fdp); td->td_retval[0] = OFLAGS(fp->f_flag); + FILE_UNLOCK(fp); break; case F_SETFL: fhold(fp); + FILEDESC_UNLOCK(fdp); fp->f_flag &= ~FCNTLFLAGS; fp->f_flag |= FFLAGS(uap->arg & ~O_ACCMODE) & FCNTLFLAGS; tmp = fp->f_flag & FNONBLOCK; @@ -315,12 +337,14 @@ fcntl(td, uap) case F_GETOWN: fhold(fp); + FILEDESC_UNLOCK(fdp); error = fo_ioctl(fp, FIOGETOWN, (caddr_t)td->td_retval, td); fdrop(fp, td); break; case F_SETOWN: fhold(fp); + FILEDESC_UNLOCK(fdp); error = fo_ioctl(fp, FIOSETOWN, (caddr_t)&uap->arg, td); fdrop(fp, td); break; @@ -331,15 +355,18 @@ fcntl(td, uap) case F_SETLK: if (fp->f_type != DTYPE_VNODE) { + FILEDESC_UNLOCK(fdp); error = EBADF; break; } vp = (struct vnode *)fp->f_data; - /* * copyin/lockop may block */ fhold(fp); + FILEDESC_UNLOCK(fdp); + vp = (struct vnode *)fp->f_data; + /* Copy in the lock structure */ error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl, sizeof(fl)); @@ -364,8 +391,11 @@ fcntl(td, uap) error = EBADF; break; } + PROC_LOCK(p); p->p_flag |= P_ADVLOCK; - error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, + leaderp = p->p_leader; + PROC_UNLOCK(p); + error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_SETLK, &fl, flg); break; case F_WRLCK: @@ -373,12 +403,18 @@ fcntl(td, uap) error = EBADF; break; } + PROC_LOCK(p); p->p_flag |= P_ADVLOCK; - error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, + leaderp = p->p_leader; + PROC_UNLOCK(p); + error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_SETLK, &fl, flg); break; case F_UNLCK: - error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, + PROC_LOCK(p); + leaderp = p->p_leader; + PROC_UNLOCK(p); + error = VOP_ADVLOCK(vp, (caddr_t)leaderp, F_UNLCK, &fl, F_POSIX); break; default: @@ -390,6 +426,7 @@ fcntl(td, uap) case F_GETLK: if (fp->f_type != DTYPE_VNODE) { + FILEDESC_UNLOCK(fdp); error = EBADF; break; } @@ -398,6 +435,9 @@ fcntl(td, uap) * copyin/lockop may block */ fhold(fp); + FILEDESC_UNLOCK(fdp); + vp = (struct vnode *)fp->f_data; + /* Copy in the lock structure */ error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl, sizeof(fl)); @@ -431,6 +471,7 @@ fcntl(td, uap) } break; default: + FILEDESC_UNLOCK(fdp); error = EINVAL; break; } @@ -441,6 +482,7 @@ done2: /* * Common code for dup, dup2, and fcntl(F_DUPFD). + * filedesc must be locked, but will be unlocked as a side effect. */ static int do_dup(fdp, old, new, retval, td) @@ -452,6 +494,8 @@ do_dup(fdp, old, new, retval, td) struct file *fp; struct file *delfp; + FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); + /* * Save info on the descriptor being overwritten. We have * to do the unmap now, but we cannot close it without @@ -474,6 +518,8 @@ do_dup(fdp, old, new, retval, td) fdp->fd_lastfile = new; *retval = new; + FILEDESC_UNLOCK(fdp); + /* * If we dup'd over a valid file, we now own the reference to it * and must dispose of it using closef() semantics (as if a @@ -632,8 +678,10 @@ close(td, uap) mtx_lock(&Giant); fdp = td->td_proc->p_fd; + FILEDESC_LOCK(fdp); if ((unsigned)fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) { + FILEDESC_UNLOCK(fdp); error = EBADF; goto done2; } @@ -652,8 +700,12 @@ close(td, uap) fdp->fd_lastfile--; if (fd < fdp->fd_freefile) fdp->fd_freefile = fd; - if (fd < fdp->fd_knlistsize) + if (fd < fdp->fd_knlistsize) { + FILEDESC_UNLOCK(fdp); knote_fdclose(td, fd); + } else + FILEDESC_UNLOCK(fdp); + error = closef(fp, td); done2: mtx_unlock(&Giant); @@ -756,7 +808,6 @@ nfstat(td, uap) struct nstat nub; int error; - mtx_lock(&Giant); if ((error = fget(td, uap->fd, &fp)) != 0) goto done2; error = fo_stat(fp, &ub, td); @@ -792,6 +843,9 @@ fpathconf(td, uap) struct vnode *vp; int error; + fp = ffind_hold(td, uap->fd); + if (fp == NULL) + return (EBADF); mtx_lock(&Giant); if ((error = fget(td, uap->fd, &fp)) != 0) goto done2; @@ -800,6 +854,7 @@ fpathconf(td, uap) case DTYPE_PIPE: case DTYPE_SOCKET: if (uap->name != _PC_PIPE_BUF) { + fdrop(fp, td); error = EINVAL; goto done2; } @@ -837,9 +892,11 @@ fdalloc(td, want, result) register struct filedesc *fdp = td->td_proc->p_fd; register int i; int lim, last, nfiles; - struct file **newofile; + struct file **newofile, **oldofile; char *newofileflags; + FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); + /* * Search for a free descriptor starting at the higher * of want or fd_freefile. If that fails, consider @@ -871,15 +928,19 @@ fdalloc(td, want, result) nfiles = NDEXTENT; else nfiles = 2 * fdp->fd_nfiles; + FILEDESC_UNLOCK(fdp); MALLOC(newofile, struct file **, nfiles * OFILESIZE, M_FILEDESC, M_WAITOK); + FILEDESC_LOCK(fdp); /* * deal with file-table extend race that might have occured * when malloc was blocked. */ if (fdp->fd_nfiles >= nfiles) { + FILEDESC_UNLOCK(fdp); FREE(newofile, M_FILEDESC); + FILEDESC_LOCK(fdp); continue; } newofileflags = (char *) &newofile[nfiles]; @@ -894,11 +955,15 @@ fdalloc(td, want, result) (i = sizeof(char) * fdp->fd_nfiles)); bzero(newofileflags + i, nfiles * sizeof(char) - i); if (fdp->fd_nfiles > NDFILE) - FREE(fdp->fd_ofiles, M_FILEDESC); + oldofile = fdp->fd_ofiles; + else + oldofile = NULL; fdp->fd_ofiles = newofile; fdp->fd_ofileflags = newofileflags; fdp->fd_nfiles = nfiles; fdexpand++; + if (oldofile != NULL) + FREE(oldofile, M_FILEDESC); } return (0); } @@ -917,6 +982,8 @@ fdavail(td, n) register struct file **fpp; register int i, lim, last; + FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); + lim = min((int)p->p_rlimit[RLIMIT_NOFILE].rlim_cur, maxfilesperproc); if ((i = lim - fdp->fd_nfiles) > 0 && (n -= i) <= 0) return (1); @@ -944,17 +1011,20 @@ falloc(td, resultfp, resultfd) register struct file *fp, *fq; int error, i; + sx_xlock(&filelist_lock); if (nfiles >= maxfiles) { + sx_xunlock(&filelist_lock); tablefull("file"); return (ENFILE); } + nfiles++; + sx_xunlock(&filelist_lock); /* * Allocate a new file descriptor. * If the process has file descriptor zero open, add to the list * of open files at that point, otherwise put it at the front of * the list of open files. */ - nfiles++; MALLOC(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK | M_ZERO); /* @@ -962,21 +1032,32 @@ falloc(td, resultfp, resultfd) * allocating the slot, else a race might have shrunk it if we had * allocated it before the malloc. */ + FILEDESC_LOCK(p->p_fd); if ((error = fdalloc(td, 0, &i))) { + FILEDESC_UNLOCK(p->p_fd); + sx_xlock(&filelist_lock); nfiles--; + sx_xunlock(&filelist_lock); FREE(fp, M_FILE); return (error); } + mtx_init(&fp->f_mtx, "file structure", MTX_DEF); + fp->f_gcflag = 0; fp->f_count = 1; fp->f_cred = crhold(p->p_ucred); fp->f_ops = &badfileops; fp->f_seqcount = 1; + FILEDESC_UNLOCK(p->p_fd); + sx_xlock(&filelist_lock); + FILEDESC_LOCK(p->p_fd); if ((fq = p->p_fd->fd_ofiles[0])) { LIST_INSERT_AFTER(fq, fp, f_list); } else { LIST_INSERT_HEAD(&filehead, fp, f_list); } p->p_fd->fd_ofiles[i] = fp; + FILEDESC_UNLOCK(p->p_fd); + sx_xunlock(&filelist_lock); if (resultfp) *resultfp = fp; if (resultfd) @@ -991,10 +1072,14 @@ void ffree(fp) register struct file *fp; { + KASSERT((fp->f_count == 0), ("ffree: fp_fcount not 0!")); + sx_xlock(&filelist_lock); LIST_REMOVE(fp, f_list); - crfree(fp->f_cred); nfiles--; + sx_xunlock(&filelist_lock); + crfree(fp->f_cred); + mtx_destroy(&fp->f_mtx); FREE(fp, M_FILE); } @@ -1010,6 +1095,8 @@ fdinit(td) MALLOC(newfdp, struct filedesc0 *, sizeof(struct filedesc0), M_FILEDESC, M_WAITOK | M_ZERO); + mtx_init(&newfdp->fd_fd.fd_mtx, "filedesc structure", MTX_DEF); + FILEDESC_LOCK(&newfdp->fd_fd); newfdp->fd_fd.fd_cdir = fdp->fd_cdir; if (newfdp->fd_fd.fd_cdir) VREF(newfdp->fd_fd.fd_cdir); @@ -1027,6 +1114,7 @@ fdinit(td) newfdp->fd_fd.fd_ofileflags = newfdp->fd_dfileflags; newfdp->fd_fd.fd_nfiles = NDFILE; newfdp->fd_fd.fd_knlistsize = -1; + FILEDESC_UNLOCK(&newfdp->fd_fd); return (&newfdp->fd_fd); } @@ -1038,7 +1126,9 @@ struct filedesc * fdshare(p) struct proc *p; { + FILEDESC_LOCK(p->p_fd); p->p_fd->fd_refcnt++; + FILEDESC_UNLOCK(p->p_fd); return (p->p_fd); } @@ -1051,15 +1141,22 @@ fdcopy(td) { register struct filedesc *newfdp, *fdp = td->td_proc->p_fd; register struct file **fpp; - register int i; + register int i, j; /* Certain daemons might not have file descriptors. */ if (fdp == NULL) return (NULL); + FILEDESC_LOCK_ASSERT(fdp, MA_OWNED); + + FILEDESC_UNLOCK(fdp); MALLOC(newfdp, struct filedesc *, sizeof(struct filedesc0), M_FILEDESC, M_WAITOK); + FILEDESC_LOCK(fdp); bcopy(fdp, newfdp, sizeof(struct filedesc)); + FILEDESC_UNLOCK(fdp); + bzero(&newfdp->fd_mtx, sizeof(newfdp->fd_mtx)); + mtx_init(&newfdp->fd_mtx, "filedesc structure", MTX_DEF); if (newfdp->fd_cdir) VREF(newfdp->fd_cdir); if (newfdp->fd_rdir) @@ -1074,6 +1171,9 @@ fdcopy(td) * additional memory for the number of descriptors currently * in use. */ + FILEDESC_LOCK(fdp); + newfdp->fd_lastfile = fdp->fd_lastfile; + newfdp->fd_nfiles = fdp->fd_nfiles; if (newfdp->fd_lastfile < NDFILE) { newfdp->fd_ofiles = ((struct filedesc0 *) newfdp)->fd_dfiles; newfdp->fd_ofileflags = @@ -1085,11 +1185,31 @@ fdcopy(td) * for the file descriptors currently in use, * allowing the table to shrink. */ +retry: i = newfdp->fd_nfiles; while (i > 2 * NDEXTENT && i > newfdp->fd_lastfile * 2) i /= 2; + FILEDESC_UNLOCK(fdp); MALLOC(newfdp->fd_ofiles, struct file **, i * OFILESIZE, M_FILEDESC, M_WAITOK); + FILEDESC_LOCK(fdp); + newfdp->fd_lastfile = fdp->fd_lastfile; + newfdp->fd_nfiles = fdp->fd_nfiles; + j = newfdp->fd_nfiles; + while (j > 2 * NDEXTENT && j > newfdp->fd_lastfile * 2) + j /= 2; + if (i != j) { + /* + * The size of the original table has changed. + * Go over once again. + */ + FILEDESC_UNLOCK(fdp); + FREE(newfdp->fd_ofiles, M_FILEDESC); + FILEDESC_LOCK(fdp); + newfdp->fd_lastfile = fdp->fd_lastfile; + newfdp->fd_nfiles = fdp->fd_nfiles; + goto retry; + } newfdp->fd_ofileflags = (char *) &newfdp->fd_ofiles[i]; } newfdp->fd_nfiles = i; @@ -1118,8 +1238,9 @@ fdcopy(td) fpp = newfdp->fd_ofiles; for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) { - if (*fpp != NULL) + if (*fpp != NULL) { fhold(*fpp); + } } return (newfdp); } @@ -1139,12 +1260,16 @@ fdfree(td) if (fdp == NULL) return; - if (--fdp->fd_refcnt > 0) + FILEDESC_LOCK(fdp); + if (--fdp->fd_refcnt > 0) { + FILEDESC_UNLOCK(fdp); return; + } /* * we are the last reference to the structure, we can * safely assume it will not change out from under us. */ + FILEDESC_UNLOCK(fdp); fpp = fdp->fd_ofiles; for (i = fdp->fd_lastfile; i-- >= 0; fpp++) { if (*fpp) @@ -1162,6 +1287,7 @@ fdfree(td) FREE(fdp->fd_knlist, M_KQUEUE); if (fdp->fd_knhash) FREE(fdp->fd_knhash, M_KQUEUE); + mtx_destroy(&fdp->fd_mtx); FREE(fdp, M_FILEDESC); } @@ -1204,6 +1330,7 @@ setugidsafety(td) * note: fdp->fd_ofiles may be reallocated out from under us while * we are blocked in a close. Be careful! */ + FILEDESC_LOCK(fdp); for (i = 0; i <= fdp->fd_lastfile; i++) { if (i > 2) break; @@ -1214,8 +1341,11 @@ setugidsafety(td) if ((fdp->fd_ofileflags[i] & UF_MAPPED) != 0) (void) munmapfd(td, i); #endif - if (i < fdp->fd_knlistsize) + if (i < fdp->fd_knlistsize) { + FILEDESC_UNLOCK(fdp); knote_fdclose(td, i); + FILEDESC_LOCK(fdp); + } /* * NULL-out descriptor prior to close to avoid * a race while close blocks. @@ -1225,11 +1355,14 @@ setugidsafety(td) fdp->fd_ofileflags[i] = 0; if (i < fdp->fd_freefile) fdp->fd_freefile = i; + FILEDESC_UNLOCK(fdp); (void) closef(fp, td); + FILEDESC_LOCK(fdp); } } while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL) fdp->fd_lastfile--; + FILEDESC_UNLOCK(fdp); } /* @@ -1246,6 +1379,8 @@ fdcloseexec(td) if (fdp == NULL) return; + FILEDESC_LOCK(fdp); + /* * We cannot cache fd_ofiles or fd_ofileflags since operations * may block and rip them out from under us. @@ -1259,8 +1394,11 @@ fdcloseexec(td) if (fdp->fd_ofileflags[i] & UF_MAPPED) (void) munmapfd(td, i); #endif - if (i < fdp->fd_knlistsize) + if (i < fdp->fd_knlistsize) { + FILEDESC_UNLOCK(fdp); knote_fdclose(td, i); + FILEDESC_LOCK(fdp); + } /* * NULL-out descriptor prior to close to avoid * a race while close blocks. @@ -1270,11 +1408,14 @@ fdcloseexec(td) fdp->fd_ofileflags[i] = 0; if (i < fdp->fd_freefile) fdp->fd_freefile = i; + FILEDESC_UNLOCK(fdp); (void) closef(fp, td); + FILEDESC_LOCK(fdp); } } while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL) fdp->fd_lastfile--; + FILEDESC_UNLOCK(fdp); } /* @@ -1315,6 +1456,68 @@ closef(fp, td) } /* + * Find the struct file 'fd' in process 'p' and bump it's refcount + * struct file is not locked on return. + */ +struct file * +ffind_hold(td, fd) + struct thread *td; + int fd; +{ + struct filedesc *fdp; + struct file *fp; + + if (td == NULL || (fdp = td->td_proc->p_fd) == NULL) + return (NULL); + FILEDESC_LOCK(fdp); + if (fd < 0 || fd >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[fd]) == NULL || + fp->f_ops == &badfileops) + fp = NULL; + else + fhold(fp); + FILEDESC_UNLOCK(fdp); + return (fp); +} + +/* + * Find the struct file 'fd' in process 'p' and bump it's refcount, + * struct file is locked on return. + */ +struct file * +ffind_lock(td, fd) + struct thread *td; + int fd; +{ + struct filedesc *fdp; + struct file *fp; + + if (td == NULL || (fdp = td->td_proc->p_fd) == NULL) + return (NULL); + FILEDESC_LOCK(fdp); + if (fd < 0 || fd >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[fd]) == NULL || + fp->f_ops == &badfileops) { + fp = NULL; + } else { + FILE_LOCK(fp); + fhold_locked(fp); + } + FILEDESC_UNLOCK(fdp); + return (fp); +} + +int +fdrop(fp, td) + struct file *fp; + struct thread *td; +{ + + FILE_LOCK(fp); + return (fdrop_locked(fp, td)); +} + +/* * Extract the file pointer associated with the specified descriptor for * the current user process. If no error occured 0 is returned, *fpp * will be set to the file pointer, and the file pointer's ref count @@ -1478,7 +1681,7 @@ fputsock(struct socket *so) } int -fdrop(fp, td) +fdrop_locked(fp, td) struct file *fp; struct thread *td; { @@ -1486,8 +1689,12 @@ fdrop(fp, td) struct vnode *vp; int error; - if (--fp->f_count > 0) + FILE_LOCK_ASSERT(fp, MA_OWNED); + + if (--fp->f_count > 0) { + FILE_UNLOCK(fp); return (0); + } if (fp->f_count < 0) panic("fdrop: count < 0"); if ((fp->f_flag & FHASLOCK) && fp->f_type == DTYPE_VNODE) { @@ -1496,8 +1703,10 @@ fdrop(fp, td) lf.l_len = 0; lf.l_type = F_UNLCK; vp = (struct vnode *)fp->f_data; + FILE_UNLOCK(fp); (void) VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); - } + } else + FILE_UNLOCK(fp); if (fp->f_ops != &badfileops) error = fo_close(fp, td); else @@ -1527,30 +1736,29 @@ flock(td, uap) struct thread *td; register struct flock_args *uap; { - register struct filedesc *fdp = td->td_proc->p_fd; register struct file *fp; struct vnode *vp; struct flock lf; int error; - mtx_lock(&Giant); - - if ((unsigned)uap->fd >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[uap->fd]) == NULL) { - error = EBADF; - goto done2; - } + fp = ffind_hold(td, uap->fd); + if (fp == NULL) + return (EBADF); if (fp->f_type != DTYPE_VNODE) { - error = EOPNOTSUPP; - goto done2; + fdrop(fp, td); + return (EOPNOTSUPP); } + + mtx_lock(&Giant); vp = (struct vnode *)fp->f_data; lf.l_whence = SEEK_SET; lf.l_start = 0; lf.l_len = 0; if (uap->how & LOCK_UN) { lf.l_type = F_UNLCK; + FILE_LOCK(fp); fp->f_flag &= ~FHASLOCK; + FILE_UNLOCK(fp); error = VOP_ADVLOCK(vp, (caddr_t)fp, F_UNLCK, &lf, F_FLOCK); goto done2; } @@ -1562,12 +1770,13 @@ flock(td, uap) error = EBADF; goto done2; } + FILE_LOCK(fp); fp->f_flag |= FHASLOCK; - if (uap->how & LOCK_NB) - error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK); - else - error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, F_FLOCK|F_WAIT); + FILE_UNLOCK(fp); + error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, + (uap->how & LOCK_NB) ? F_FLOCK : F_FLOCK | F_WAIT); done2: + fdrop(fp, td); mtx_unlock(&Giant); return (error); } @@ -1619,8 +1828,10 @@ dupfdopen(td, fdp, indx, dfd, mode, error) * of file descriptors, or the fd to be dup'd has already been * closed, then reject. */ + FILEDESC_LOCK(fdp); if ((u_int)dfd >= fdp->fd_nfiles || (wfp = fdp->fd_ofiles[dfd]) == NULL) { + FILEDESC_UNLOCK(fdp); return (EBADF); } @@ -1642,8 +1853,12 @@ dupfdopen(td, fdp, indx, dfd, mode, error) * Check that the mode the file is being opened for is a * subset of the mode of the existing descriptor. */ - if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) + FILE_LOCK(wfp); + if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) { + FILE_UNLOCK(wfp); + FILEDESC_UNLOCK(fdp); return (EACCES); + } fp = fdp->fd_ofiles[indx]; #if 0 if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED) @@ -1651,15 +1866,19 @@ dupfdopen(td, fdp, indx, dfd, mode, error) #endif fdp->fd_ofiles[indx] = wfp; fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; - fhold(wfp); + fhold_locked(wfp); + FILE_UNLOCK(wfp); if (indx > fdp->fd_lastfile) fdp->fd_lastfile = indx; + if (fp != NULL) + FILE_LOCK(fp); + FILEDESC_UNLOCK(fdp); /* * we now own the reference to fp that the ofiles[] array * used to own. Release it. */ - if (fp) - fdrop(fp, td); + if (fp != NULL) + fdrop_locked(fp, td); return (0); case ENXIO: @@ -1677,12 +1896,6 @@ dupfdopen(td, fdp, indx, dfd, mode, error) fdp->fd_ofileflags[dfd] = 0; /* - * we now own the reference to fp that the ofiles[] array - * used to own. Release it. - */ - if (fp) - fdrop(fp, td); - /* * Complete the clean up of the filedesc structure by * recomputing the various hints. */ @@ -1696,9 +1909,20 @@ dupfdopen(td, fdp, indx, dfd, mode, error) if (dfd < fdp->fd_freefile) fdp->fd_freefile = dfd; } + if (fp != NULL) + FILE_LOCK(fp); + FILEDESC_UNLOCK(fdp); + + /* + * we now own the reference to fp that the ofiles[] array + * used to own. Release it. + */ + if (fp != NULL) + fdrop_locked(fp, td); return (0); default: + FILEDESC_UNLOCK(fdp); return (error); } /* NOTREACHED */ @@ -1713,26 +1937,34 @@ sysctl_kern_file(SYSCTL_HANDLER_ARGS) int error; struct file *fp; + sx_slock(&filelist_lock); if (!req->oldptr) { /* * overestimate by 10 files */ - return (SYSCTL_OUT(req, 0, sizeof(filehead) + - (nfiles + 10) * sizeof(struct file))); + error = SYSCTL_OUT(req, 0, sizeof(filehead) + + (nfiles + 10) * sizeof(struct file)); + sx_sunlock(&filelist_lock); + return (error); } error = SYSCTL_OUT(req, (caddr_t)&filehead, sizeof(filehead)); - if (error) + if (error) { + sx_sunlock(&filelist_lock); return (error); + } /* * followed by an array of file structures */ LIST_FOREACH(fp, &filehead, f_list) { error = SYSCTL_OUT(req, (caddr_t)fp, sizeof (struct file)); - if (error) + if (error) { + sx_sunlock(&filelist_lock); return (error); + } } + sx_sunlock(&filelist_lock); return (0); } @@ -1842,3 +2074,14 @@ badfo_close(fp, td) SYSINIT(fildescdev,SI_SUB_DRIVERS,SI_ORDER_MIDDLE+CDEV_MAJOR, fildesc_drvinit,NULL) + +static void filelistinit __P((void *)); +SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, filelistinit, NULL) + +/* ARGSUSED*/ +static void +filelistinit(dummy) + void *dummy; +{ + sx_init(&filelist_lock, "filelist lock"); +} diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c index 6bec056..038b233 100644 --- a/sys/kern/kern_event.c +++ b/sys/kern/kern_event.c @@ -372,15 +372,20 @@ kqueue(struct thread *td, struct kqueue_args *uap) error = falloc(td, &fp, &fd); if (error) goto done2; + kq = malloc(sizeof(struct kqueue), M_KQUEUE, M_WAITOK | M_ZERO); + TAILQ_INIT(&kq->kq_head); + FILE_LOCK(fp); fp->f_flag = FREAD | FWRITE; fp->f_type = DTYPE_KQUEUE; fp->f_ops = &kqueueops; - kq = malloc(sizeof(struct kqueue), M_KQUEUE, M_WAITOK | M_ZERO); TAILQ_INIT(&kq->kq_head); fp->f_data = (caddr_t)kq; + FILE_UNLOCK(fp); + FILEDESC_LOCK(fdp); td->td_retval[0] = fd; if (fdp->fd_knlistsize < 0) fdp->fd_knlistsize = 0; /* this process has a kq */ + FILEDESC_UNLOCK(fdp); kq->kq_fdp = fdp; done2: mtx_unlock(&Giant); @@ -409,19 +414,19 @@ kevent(struct thread *td, struct kevent_args *uap) struct timespec ts; int i, n, nerrors, error; - mtx_lock(&Giant); - if ((error = fget(td, uap->fd, &fp)) != 0) - goto done; - if (fp->f_type != DTYPE_KQUEUE) { - error = EBADF; - goto done; + fp = ffind_hold(td, uap->fd); + if (fp == NULL || fp->f_type != DTYPE_KQUEUE) { + if (fp != NULL) + fdrop(fp, td); + return (EBADF); } if (uap->timeout != NULL) { error = copyin(uap->timeout, &ts, sizeof(ts)); if (error) - goto done; + goto done_nogiant; uap->timeout = &ts; } + mtx_lock(&Giant); kq = (struct kqueue *)fp->f_data; nerrors = 0; @@ -462,9 +467,10 @@ kevent(struct thread *td, struct kevent_args *uap) error = kqueue_scan(fp, uap->nevents, uap->eventlist, uap->timeout, td); done: + mtx_unlock(&Giant); +done_nogiant: if (fp != NULL) fdrop(fp, td); - mtx_unlock(&Giant); return (error); } @@ -521,11 +527,14 @@ kqueue_register(struct kqueue *kq, struct kevent *kev, struct thread *td) return (EINVAL); } + FILEDESC_LOCK(fdp); if (fops->f_isfd) { /* validate descriptor */ if ((u_int)kev->ident >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[kev->ident]) == NULL) + (fp = fdp->fd_ofiles[kev->ident]) == NULL) { + FILEDESC_UNLOCK(fdp); return (EBADF); + } fhold(fp); if (kev->ident < fdp->fd_knlistsize) { @@ -547,6 +556,7 @@ kqueue_register(struct kqueue *kq, struct kevent *kev, struct thread *td) break; } } + FILEDESC_UNLOCK(fdp); if (kn == NULL && ((kev->flags & EV_ADD) == 0)) { error = ENOENT; @@ -633,12 +643,15 @@ static int kqueue_scan(struct file *fp, int maxevents, struct kevent *ulistp, const struct timespec *tsp, struct thread *td) { - struct kqueue *kq = (struct kqueue *)fp->f_data; + struct kqueue *kq; struct kevent *kevp; struct timeval atv, rtv, ttv; struct knote *kn, marker; int s, count, timeout, nkev = 0, error = 0; + FILE_LOCK_ASSERT(fp, MA_NOTOWNED); + + kq = (struct kqueue *)fp->f_data; count = maxevents; if (count == 0) goto done; @@ -788,10 +801,11 @@ kqueue_ioctl(struct file *fp, u_long com, caddr_t data, struct thread *td) static int kqueue_poll(struct file *fp, int events, struct ucred *cred, struct thread *td) { - struct kqueue *kq = (struct kqueue *)fp->f_data; + struct kqueue *kq; int revents = 0; int s = splnet(); + kq = (struct kqueue *)fp->f_data; if (events & (POLLIN | POLLRDNORM)) { if (kq->kq_count) { revents |= events & (POLLIN | POLLRDNORM); @@ -808,8 +822,9 @@ kqueue_poll(struct file *fp, int events, struct ucred *cred, struct thread *td) static int kqueue_stat(struct file *fp, struct stat *st, struct thread *td) { - struct kqueue *kq = (struct kqueue *)fp->f_data; + struct kqueue *kq; + kq = (struct kqueue *)fp->f_data; bzero((void *)st, sizeof(*st)); st->st_size = kq->kq_count; st->st_blksize = sizeof(struct kevent); @@ -826,6 +841,7 @@ kqueue_close(struct file *fp, struct thread *td) struct knote **knp, *kn, *kn0; int i; + FILEDESC_LOCK(fdp); for (i = 0; i < fdp->fd_knlistsize; i++) { knp = &SLIST_FIRST(&fdp->fd_knlist[i]); kn = *knp; @@ -833,9 +849,12 @@ kqueue_close(struct file *fp, struct thread *td) kn0 = SLIST_NEXT(kn, kn_link); if (kq == kn->kn_kq) { kn->kn_fop->f_detach(kn); - fdrop(kn->kn_fp, td); - knote_free(kn); *knp = kn0; + FILE_LOCK(kn->kn_fp); + FILEDESC_UNLOCK(fdp); + fdrop_locked(kn->kn_fp, td); + knote_free(kn); + FILEDESC_LOCK(fdp); } else { knp = &SLIST_NEXT(kn, kn_link); } @@ -850,9 +869,11 @@ kqueue_close(struct file *fp, struct thread *td) kn0 = SLIST_NEXT(kn, kn_link); if (kq == kn->kn_kq) { kn->kn_fop->f_detach(kn); + *knp = kn0; /* XXX non-fd release of kn->kn_ptr */ + FILEDESC_UNLOCK(fdp); knote_free(kn); - *knp = kn0; + FILEDESC_LOCK(fdp); } else { knp = &SLIST_NEXT(kn, kn_link); } @@ -860,6 +881,7 @@ kqueue_close(struct file *fp, struct thread *td) } } } + FILEDESC_UNLOCK(fdp); free(kq, M_KQUEUE); fp->f_data = NULL; @@ -915,16 +937,21 @@ void knote_fdclose(struct thread *td, int fd) { struct filedesc *fdp = td->td_proc->p_fd; - struct klist *list = &fdp->fd_knlist[fd]; + struct klist *list; + FILEDESC_LOCK(fdp); + list = &fdp->fd_knlist[fd]; + FILEDESC_UNLOCK(fdp); knote_remove(td, list); } static void knote_attach(struct knote *kn, struct filedesc *fdp) { - struct klist *list; - int size; + struct klist *list, *oldlist; + int size, newsize; + + FILEDESC_LOCK(fdp); if (! kn->kn_fop->f_isfd) { if (fdp->fd_knhashmask == 0) @@ -935,23 +962,42 @@ knote_attach(struct knote *kn, struct filedesc *fdp) } if (fdp->fd_knlistsize <= kn->kn_id) { +retry: size = fdp->fd_knlistsize; while (size <= kn->kn_id) size += KQEXTENT; + FILEDESC_UNLOCK(fdp); MALLOC(list, struct klist *, size * sizeof(struct klist *), M_KQUEUE, M_WAITOK); + FILEDESC_LOCK(fdp); + newsize = fdp->fd_knlistsize; + while (newsize <= kn->kn_id) + newsize += KQEXTENT; + if (newsize != size) { + FILEDESC_UNLOCK(fdp); + free(list, M_TEMP); + FILEDESC_LOCK(fdp); + goto retry; + } bcopy((caddr_t)fdp->fd_knlist, (caddr_t)list, fdp->fd_knlistsize * sizeof(struct klist *)); bzero((caddr_t)list + fdp->fd_knlistsize * sizeof(struct klist *), (size - fdp->fd_knlistsize) * sizeof(struct klist *)); if (fdp->fd_knlist != NULL) - FREE(fdp->fd_knlist, M_KQUEUE); + oldlist = fdp->fd_knlist; + else + oldlist = NULL; fdp->fd_knlistsize = size; fdp->fd_knlist = list; + FILEDESC_UNLOCK(fdp); + if (oldlist != NULL) + FREE(oldlist, M_KQUEUE); + FILEDESC_LOCK(fdp); } list = &fdp->fd_knlist[kn->kn_id]; done: + FILEDESC_UNLOCK(fdp); SLIST_INSERT_HEAD(list, kn, kn_link); kn->kn_status = 0; } @@ -966,16 +1012,20 @@ knote_drop(struct knote *kn, struct thread *td) struct filedesc *fdp = td->td_proc->p_fd; struct klist *list; + FILEDESC_LOCK(fdp); if (kn->kn_fop->f_isfd) list = &fdp->fd_knlist[kn->kn_id]; else list = &fdp->fd_knhash[KN_HASH(kn->kn_id, fdp->fd_knhashmask)]; + if (kn->kn_fop->f_isfd) + FILE_LOCK(kn->kn_fp); + FILEDESC_UNLOCK(fdp); SLIST_REMOVE(list, kn, knote, kn_link); if (kn->kn_status & KN_QUEUED) knote_dequeue(kn); if (kn->kn_fop->f_isfd) - fdrop(kn->kn_fp, td); + fdrop_locked(kn->kn_fp, td); knote_free(kn); } diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c index 670d5dd..3fe2ab3 100644 --- a/sys/kern/kern_exec.c +++ b/sys/kern/kern_exec.c @@ -281,13 +281,16 @@ interpret: * For security and other reasons, the file descriptor table cannot * be shared after an exec. */ + FILEDESC_LOCK(p->p_fd); if (p->p_fd->fd_refcnt > 1) { struct filedesc *tmp; tmp = fdcopy(td); + FILEDESC_UNLOCK(p->p_fd); fdfree(td); p->p_fd = tmp; - } + } else + FILEDESC_UNLOCK(p->p_fd); /* * For security and other reasons, signal handlers cannot diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index a0b8688..da46522 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -269,14 +269,18 @@ fork1(td, flags, procp) * Unshare file descriptors (from parent.) */ if (flags & RFFDG) { + FILEDESC_LOCK(p1->p_fd); if (p1->p_fd->fd_refcnt > 1) { struct filedesc *newfd; + newfd = fdcopy(td); + FILEDESC_UNLOCK(p1->p_fd); PROC_LOCK(p1); fdfree(td); p1->p_fd = newfd; PROC_UNLOCK(p1); - } + } else + FILEDESC_UNLOCK(p1->p_fd); } *procp = NULL; return (0); @@ -519,9 +523,11 @@ again: if (flags & RFCFDG) fd = fdinit(td); - else if (flags & RFFDG) + else if (flags & RFFDG) { + FILEDESC_LOCK(p1->p_fd); fd = fdcopy(td); - else + FILEDESC_UNLOCK(p1->p_fd); + } else fd = fdshare(p1); PROC_LOCK(p2); p2->p_fd = fd; diff --git a/sys/kern/subr_acl_posix1e.c b/sys/kern/subr_acl_posix1e.c index b50c896..63be63d 100644 --- a/sys/kern/subr_acl_posix1e.c +++ b/sys/kern/subr_acl_posix1e.c @@ -703,6 +703,7 @@ __acl_get_fd(struct thread *td, struct __acl_get_fd_args *uap) if (error == 0) { error = vacl_get_acl(td, (struct vnode *)fp->f_data, SCARG(uap, type), SCARG(uap, aclp)); + fdrop(fp, td); } mtx_unlock(&Giant); return (error); @@ -724,6 +725,7 @@ __acl_set_fd(struct thread *td, struct __acl_set_fd_args *uap) if (error == 0) { error = vacl_set_acl(td, (struct vnode *)fp->f_data, SCARG(uap, type), SCARG(uap, aclp)); + fdrop(fp, td); } mtx_unlock(&Giant); return (error); @@ -767,6 +769,7 @@ __acl_delete_fd(struct thread *td, struct __acl_delete_fd_args *uap) if (error == 0) { error = vacl_delete(td, (struct vnode *)fp->f_data, SCARG(uap, type)); + fdrop(fp, td); } mtx_unlock(&Giant); return (error); @@ -811,6 +814,7 @@ __acl_aclcheck_fd(struct thread *td, struct __acl_aclcheck_fd_args *uap) if (error == 0) { error = vacl_aclcheck(td, (struct vnode *)fp->f_data, SCARG(uap, type), SCARG(uap, aclp)); + fdrop(fp, td); } mtx_unlock(&Giant); return (error); diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c index 352e325..df85cf2 100644 --- a/sys/kern/sys_generic.c +++ b/sys/kern/sys_generic.c @@ -83,6 +83,30 @@ static int dofileread __P((struct thread *, struct file *, int, void *, static int dofilewrite __P((struct thread *, struct file *, int, const void *, size_t, off_t, int)); +struct file* +holdfp(fdp, fd, flag) + struct filedesc* fdp; + int fd, flag; +{ + struct file* fp; + + FILEDESC_LOCK(fdp); + if (((u_int)fd) >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[fd]) == NULL) { + FILEDESC_UNLOCK(fdp); + return (NULL); + } + FILE_LOCK(fp); + FILEDESC_UNLOCK(fdp); + if ((fp->f_flag & flag) == 0) { + FILE_UNLOCK(fp); + return (NULL); + } + fp->f_count++; + FILE_UNLOCK(fp); + return (fp); +} + /* * Read system call. */ @@ -137,17 +161,18 @@ pread(td, uap) struct file *fp; int error; - mtx_lock(&Giant); - if ((error = fget_read(td, uap->fd, &fp)) == 0) { - if (fp->f_type == DTYPE_VNODE) { - error = dofileread(td, fp, uap->fd, uap->buf, - uap->nbyte, uap->offset, FOF_OFFSET); - } else { - error = ESPIPE; - } - fdrop(fp, td); + fp = holdfp(td->td_proc->p_fd, uap->fd, FREAD); + if (fp == NULL) + return (EBADF); + if (fp->f_type != DTYPE_VNODE) { + error = ESPIPE; + } else { + mtx_lock(&Giant); + error = dofileread(td, fp, uap->fd, uap->buf, uap->nbyte, + uap->offset, FOF_OFFSET); + mtx_unlock(&Giant); } - mtx_unlock(&Giant); + fdrop(fp, td); return(error); } @@ -381,7 +406,6 @@ pwrite(td, uap) } else { error = EBADF; /* this can't be right */ } - mtx_unlock(&Giant); return(error); } @@ -592,26 +616,27 @@ ioctl(td, uap) long align; } ubuf; - mtx_lock(&Giant); - fdp = td->td_proc->p_fd; - if ((u_int)uap->fd >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[uap->fd]) == NULL) { - error = EBADF; - goto done2; - } - + fp = ffind_hold(td, uap->fd); + if (fp == NULL) + return (EBADF); if ((fp->f_flag & (FREAD | FWRITE)) == 0) { - error = EBADF; - goto done2; + fdrop(fp, td); + return (EBADF); } - + fdp = td->td_proc->p_fd; switch (com = uap->com) { case FIONCLEX: + FILEDESC_LOCK(fdp); fdp->fd_ofileflags[uap->fd] &= ~UF_EXCLOSE; - goto done2; + FILEDESC_UNLOCK(fdp); + fdrop(fp, td); + return (0); case FIOCLEX: + FILEDESC_LOCK(fdp); fdp->fd_ofileflags[uap->fd] |= UF_EXCLOSE; - goto done2; + FILEDESC_UNLOCK(fdp); + fdrop(fp, td); + return (0); } /* @@ -620,12 +645,11 @@ ioctl(td, uap) */ size = IOCPARM_LEN(com); if (size > IOCPARM_MAX) { - error = ENOTTY; - goto done2; + fdrop(fp, td); + return (ENOTTY); } - fhold(fp); - + mtx_lock(&Giant); memp = NULL; if (size > sizeof (ubuf.stkbuf)) { memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); @@ -640,7 +664,7 @@ ioctl(td, uap) if (memp) free(memp, M_IOCTLOPS); fdrop(fp, td); - goto done2; + goto done; } } else { *(caddr_t *)data = uap->data; @@ -658,18 +682,22 @@ ioctl(td, uap) switch (com) { case FIONBIO: + FILE_LOCK(fp); if ((tmp = *(int *)data)) fp->f_flag |= FNONBLOCK; else fp->f_flag &= ~FNONBLOCK; + FILE_UNLOCK(fp); error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, td); break; case FIOASYNC: + FILE_LOCK(fp); if ((tmp = *(int *)data)) fp->f_flag |= FASYNC; else fp->f_flag &= ~FASYNC; + FILE_UNLOCK(fp); error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, td); break; @@ -686,7 +714,7 @@ ioctl(td, uap) if (memp) free(memp, M_IOCTLOPS); fdrop(fp, td); -done2: +done: mtx_unlock(&Giant); return (error); } @@ -713,6 +741,7 @@ select(td, uap) register struct thread *td; register struct select_args *uap; { + struct filedesc *fdp; /* * The magic 2048 here is chosen to be just enough for FD_SETSIZE * infds with the new FD_SETSIZE of 1024, and more than enough for @@ -728,11 +757,13 @@ select(td, uap) if (uap->nd < 0) return (EINVAL); - + fdp = td->td_proc->p_fd; mtx_lock(&Giant); + FILEDESC_LOCK(fdp); if (uap->nd > td->td_proc->p_fd->fd_nfiles) uap->nd = td->td_proc->p_fd->fd_nfiles; /* forgiving; slightly wrong */ + FILEDESC_UNLOCK(fdp); /* * Allocate just enough bits for the non-null fd_sets. Use the @@ -887,6 +918,11 @@ done_noproclock: return (error); } +/* + * Used to hold then release a group of fds for select(2). + * Hold (hold == 1) or release (hold == 0) a group of filedescriptors. + * if holding then use ibits setting the bits in obits, otherwise use obits. + */ static int selholddrop(td, ibits, obits, nfd, hold) struct thread *td; @@ -898,6 +934,7 @@ selholddrop(td, ibits, obits, nfd, hold) fd_mask bits; struct file *fp; + FILEDESC_LOCK(fdp); for (i = 0; i < nfd; i += NFDBITS) { if (hold) bits = ibits[i/NFDBITS]; @@ -908,16 +945,28 @@ selholddrop(td, ibits, obits, nfd, hold) if (!(bits & 1)) continue; fp = fdp->fd_ofiles[fd]; - if (fp == NULL) + if (fp == NULL) { + FILEDESC_UNLOCK(fdp); return (EBADF); + } if (hold) { fhold(fp); obits[(fd)/NFDBITS] |= ((fd_mask)1 << ((fd) % NFDBITS)); - } else + } else { + /* XXX: optimize by making a special + * version of fdrop that only unlocks + * the filedesc if needed? This would + * redcuce the number of lock/unlock + * pairs by quite a bit. + */ + FILEDESC_UNLOCK(fdp); fdrop(fp, td); + FILEDESC_LOCK(fdp); + } } } + FILEDESC_UNLOCK(fdp); return (0); } @@ -927,7 +976,6 @@ selscan(td, ibits, obits, nfd) fd_mask **ibits, **obits; int nfd; { - struct filedesc *fdp = td->td_proc->p_fd; int msk, i, fd; fd_mask bits; struct file *fp; @@ -944,7 +992,7 @@ selscan(td, ibits, obits, nfd) for (fd = i; bits && fd < nfd; fd++, bits >>= 1) { if (!(bits & 1)) continue; - fp = fdp->fd_ofiles[fd]; + fp = ffind_hold(td, fd); if (fp == NULL) return (EBADF); if (fo_poll(fp, flag[msk], fp->f_cred, td)) { @@ -952,6 +1000,7 @@ selscan(td, ibits, obits, nfd) ((fd_mask)1 << ((fd) % NFDBITS)); n++; } + fdrop(fp, td); } } } @@ -1116,6 +1165,7 @@ pollholddrop(td, fds, nfd, hold) int i; struct file *fp; + FILEDESC_LOCK(fdp); for (i = 0; i < nfd; i++, fds++) { if (0 <= fds->fd && fds->fd < fdp->fd_nfiles) { fp = fdp->fd_ofiles[fds->fd]; @@ -1125,10 +1175,15 @@ pollholddrop(td, fds, nfd, hold) fds->revents = 1; } else fds->revents = 0; - } else if(fp != NULL && fds->revents) - fdrop(fp, td); + } else if(fp != NULL && fds->revents) { + FILE_LOCK(fp); + FILEDESC_UNLOCK(fdp); + fdrop_locked(fp, td); + FILEDESC_LOCK(fdp); + } } } + FILEDESC_UNLOCK(fdp); return (0); } @@ -1144,13 +1199,17 @@ pollscan(td, fds, nfd) int n = 0; for (i = 0; i < nfd; i++, fds++) { + FILEDESC_LOCK(fdp); if (fds->fd >= fdp->fd_nfiles) { fds->revents = POLLNVAL; n++; + FILEDESC_UNLOCK(fdp); } else if (fds->fd < 0) { fds->revents = 0; + FILEDESC_UNLOCK(fdp); } else { fp = fdp->fd_ofiles[fds->fd]; + FILEDESC_UNLOCK(fdp); if (fp == NULL) { fds->revents = POLLNVAL; n++; diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c index fd16065..49f1959 100644 --- a/sys/kern/sys_pipe.c +++ b/sys/kern/sys_pipe.c @@ -205,27 +205,33 @@ pipe(td, uap) * to avoid races against processes which manage to dup() the read * side while we are blocked trying to allocate the write side. */ + FILE_LOCK(rf); rf->f_flag = FREAD | FWRITE; rf->f_type = DTYPE_PIPE; rf->f_data = (caddr_t)rpipe; rf->f_ops = &pipeops; + FILE_UNLOCK(rf); error = falloc(td, &wf, &fd); if (error) { + FILEDESC_LOCK(fdp); if (fdp->fd_ofiles[td->td_retval[0]] == rf) { fdp->fd_ofiles[td->td_retval[0]] = NULL; + FILEDESC_UNLOCK(fdp); fdrop(rf, td); - } + } else + FILEDESC_UNLOCK(fdp); fdrop(rf, td); /* rpipe has been closed by fdrop(). */ pipeclose(wpipe); return (error); } + FILE_LOCK(wf); wf->f_flag = FREAD | FWRITE; wf->f_type = DTYPE_PIPE; wf->f_data = (caddr_t)wpipe; wf->f_ops = &pipeops; + FILE_UNLOCK(wf); td->td_retval[1] = fd; - rpipe->pipe_peer = wpipe; wpipe->pipe_peer = rpipe; fdrop(rf, td); @@ -495,9 +501,12 @@ pipe_read(fp, uio, cred, flags, td) * Handle non-blocking mode operation or * wait for more data. */ + FILE_LOCK(fp); if (fp->f_flag & FNONBLOCK) { + FILE_UNLOCK(fp); error = EAGAIN; } else { + FILE_UNLOCK(fp); rpipe->pipe_state |= PIPE_WANTR; if ((error = tsleep(rpipe, PRIBIO | PCATCH, "piperd", 0)) == 0) @@ -825,15 +834,18 @@ pipe_write(fp, uio, cred, flags, td) * The direct write mechanism will detect the reader going * away on us. */ + FILE_LOCK(fp); if ((uio->uio_iov->iov_len >= PIPE_MINDIRECT) && (fp->f_flag & FNONBLOCK) == 0 && (wpipe->pipe_map.kva || (amountpipekva < LIMITPIPEKVA)) && (uio->uio_iov->iov_len >= PIPE_MINDIRECT)) { + FILE_UNLOCK(fp); error = pipe_direct_write( wpipe, uio); if (error) break; continue; - } + } else + FILE_UNLOCK(fp); #endif /* @@ -961,10 +973,13 @@ pipe_write(fp, uio, cred, flags, td) /* * don't block on non-blocking I/O */ + FILE_LOCK(fp); if (fp->f_flag & FNONBLOCK) { + FILE_UNLOCK(fp); error = EAGAIN; break; } + FILE_UNLOCK(fp); /* * We have no more space and have something to offer, @@ -1236,8 +1251,9 @@ pipeclose(cpipe) static int pipe_kqfilter(struct file *fp, struct knote *kn) { - struct pipe *cpipe = (struct pipe *)kn->kn_fp->f_data; + struct pipe *cpipe; + cpipe = (struct pipe *)kn->kn_fp->f_data; switch (kn->kn_filter) { case EVFILT_READ: kn->kn_fop = &pipe_rfiltops; diff --git a/sys/kern/sys_socket.c b/sys/kern/sys_socket.c index 04a419a..13f4000 100644 --- a/sys/kern/sys_socket.c +++ b/sys/kern/sys_socket.c @@ -197,10 +197,11 @@ soo_close(fp, td) int error = 0; struct socket *so; + so = (struct socket *)fp->f_data; fp->f_ops = &badfileops; - if ((so = (struct socket *)fp->f_data) != NULL) { - fp->f_data = NULL; + fp->f_data = 0; + + if (so) error = soclose(so); - } return (error); } diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c index 6860d76..19d62fd 100644 --- a/sys/kern/uipc_syscalls.c +++ b/sys/kern/uipc_syscalls.c @@ -134,16 +134,20 @@ socket(td, uap) fhold(fp); error = socreate(uap->domain, &so, uap->type, uap->protocol, td->td_proc->p_ucred, td); + FILEDESC_LOCK(fdp); if (error) { if (fdp->fd_ofiles[fd] == fp) { fdp->fd_ofiles[fd] = NULL; + FILEDESC_UNLOCK(fdp); fdrop(fp, td); - } + } else + FILEDESC_UNLOCK(fdp); } else { fp->f_data = (caddr_t)so; /* already has ref count */ fp->f_flag = FREAD|FWRITE; fp->f_ops = &socketops; fp->f_type = DTYPE_SOCKET; + FILEDESC_UNLOCK(fdp); td->td_retval[0] = fd; } fdrop(fp, td); @@ -306,11 +310,13 @@ accept1(td, uap, compat) if (head->so_sigio != NULL) fsetown(fgetown(head->so_sigio), &so->so_sigio); + FILE_LOCK(nfp); soref(so); /* file descriptor reference */ nfp->f_data = (caddr_t)so; /* nfp has ref count from falloc */ nfp->f_flag = fflag; nfp->f_ops = &socketops; nfp->f_type = DTYPE_SOCKET; + FILE_UNLOCK(nfp); sa = 0; error = soaccept(so, &sa); if (error) { @@ -357,9 +363,13 @@ noconnection: * out from under us. */ if (error) { + FILEDESC_LOCK(fdp); if (fdp->fd_ofiles[fd] == nfp) { fdp->fd_ofiles[fd] = NULL; + FILEDESC_UNLOCK(fdp); fdrop(nfp, td); + } else { + FILEDESC_UNLOCK(fdp); } } splx(s); @@ -510,24 +520,37 @@ socketpair(td, uap) if (error) goto free4; } - fp1->f_flag = fp2->f_flag = FREAD|FWRITE; - fp1->f_ops = fp2->f_ops = &socketops; - fp1->f_type = fp2->f_type = DTYPE_SOCKET; + FILE_LOCK(fp1); + fp1->f_flag = FREAD|FWRITE; + fp1->f_ops = &socketops; + fp1->f_type = DTYPE_SOCKET; + FILE_UNLOCK(fp1); + FILE_LOCK(fp2); + fp2->f_flag = FREAD|FWRITE; + fp2->f_ops = &socketops; + fp2->f_type = DTYPE_SOCKET; + FILE_UNLOCK(fp2); error = copyout((caddr_t)sv, (caddr_t)uap->rsv, 2 * sizeof (int)); fdrop(fp1, td); fdrop(fp2, td); goto done2; free4: + FILEDESC_LOCK(fdp); if (fdp->fd_ofiles[sv[1]] == fp2) { fdp->fd_ofiles[sv[1]] = NULL; + FILEDESC_UNLOCK(fdp); fdrop(fp2, td); - } + } else + FILEDESC_UNLOCK(fdp); fdrop(fp2, td); free3: + FILEDESC_LOCK(fdp); if (fdp->fd_ofiles[sv[0]] == fp1) { fdp->fd_ofiles[sv[0]] = NULL; + FILEDESC_UNLOCK(fdp); fdrop(fp1, td); - } + } else + FILEDESC_UNLOCK(fdp); fdrop(fp1, td); free2: (void)soclose(so2); @@ -1932,4 +1955,3 @@ done: mtx_unlock(&Giant); return (error); } - diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c index a7ffcff..546124d 100644 --- a/sys/kern/uipc_usrreq.c +++ b/sys/kern/uipc_usrreq.c @@ -57,6 +57,7 @@ #include <sys/unpcb.h> #include <sys/vnode.h> #include <sys/jail.h> +#include <sys/sx.h> #include <vm/vm_zone.h> @@ -535,7 +536,9 @@ unp_attach(so) unp_count++; LIST_INIT(&unp->unp_refs); unp->unp_socket = so; + FILEDESC_LOCK(curproc->p_fd); unp->unp_rvnode = curthread->td_proc->p_fd->fd_rdir; + FILEDESC_UNLOCK(curproc->p_fd); LIST_INSERT_HEAD(so->so_type == SOCK_DGRAM ? &unp_dhead : &unp_shead, unp, unp_link); so->so_pcb = (caddr_t)unp; @@ -628,7 +631,9 @@ restart: } VATTR_NULL(&vattr); vattr.va_type = VSOCK; + FILEDESC_LOCK(td->td_proc->p_fd); vattr.va_mode = (ACCESSPERMS & ~td->td_proc->p_fd->fd_cmask); + FILEDESC_UNLOCK(td->td_proc->p_fd); VOP_LEASE(nd.ni_dvp, td, td->td_proc->p_ucred, LEASE_WRITE); error = VOP_CREATE(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); NDFREE(&nd, NDF_ONLY_PNBUF); @@ -1006,8 +1011,10 @@ unp_externalize(control, controlp) unp_freerights(rp, newfds); goto next; } + FILEDESC_LOCK(td->td_proc->p_fd); /* if the new FD's will not fit free them. */ if (!fdavail(td, newfds)) { + FILEDESC_UNLOCK(td->td_proc->p_fd); error = EMSGSIZE; unp_freerights(rp, newfds); goto next; @@ -1022,6 +1029,7 @@ unp_externalize(control, controlp) *controlp = sbcreatecontrol(NULL, newlen, SCM_RIGHTS, SOL_SOCKET); if (*controlp == NULL) { + FILEDESC_UNLOCK(td->td_proc->p_fd); error = E2BIG; unp_freerights(rp, newfds); goto next; @@ -1034,10 +1042,13 @@ unp_externalize(control, controlp) panic("unp_externalize fdalloc failed"); fp = *rp++; td->td_proc->p_fd->fd_ofiles[f] = fp; + FILE_LOCK(fp); fp->f_msgcount--; + FILE_UNLOCK(fp); unp_rights--; *fdp++ = f; } + FILEDESC_UNLOCK(td->td_proc->p_fd); } else { /* We can just copy anything else across */ if (error || controlp == NULL) goto next; @@ -1064,6 +1075,7 @@ next: cm = NULL; } } + FILEDESC_UNLOCK(td->td_proc->p_fd); m_freem(control); @@ -1148,10 +1160,12 @@ unp_internalize(controlp, td) * If not, reject the entire operation. */ fdp = data; + FILEDESC_LOCK(fdescp); for (i = 0; i < oldfds; i++) { fd = *fdp++; if ((unsigned)fd >= fdescp->fd_nfiles || fdescp->fd_ofiles[fd] == NULL) { + FILEDESC_UNLOCK(fdescp); error = EBADF; goto out; } @@ -1164,6 +1178,7 @@ unp_internalize(controlp, td) *controlp = sbcreatecontrol(NULL, newlen, SCM_RIGHTS, SOL_SOCKET); if (*controlp == NULL) { + FILEDESC_UNLOCK(fdescp); error = E2BIG; goto out; } @@ -1174,10 +1189,13 @@ unp_internalize(controlp, td) for (i = 0; i < oldfds; i++) { fp = fdescp->fd_ofiles[*fdp++]; *rp++ = fp; + FILE_LOCK(fp); fp->f_count++; fp->f_msgcount++; + FILE_UNLOCK(fp); unp_rights++; } + FILEDESC_UNLOCK(fdescp); break; case SCM_TIMESTAMP: @@ -1233,42 +1251,50 @@ unp_gc() * before going through all this, set all FDs to * be NOT defered and NOT externally accessible */ + sx_slock(&filelist_lock); LIST_FOREACH(fp, &filehead, f_list) - fp->f_flag &= ~(FMARK|FDEFER); + fp->f_gcflag &= ~(FMARK|FDEFER); do { LIST_FOREACH(fp, &filehead, f_list) { + FILE_LOCK(fp); /* * If the file is not open, skip it */ - if (fp->f_count == 0) + if (fp->f_count == 0) { + FILE_UNLOCK(fp); continue; + } /* * If we already marked it as 'defer' in a * previous pass, then try process it this time * and un-mark it */ - if (fp->f_flag & FDEFER) { - fp->f_flag &= ~FDEFER; + if (fp->f_gcflag & FDEFER) { + fp->f_gcflag &= ~FDEFER; unp_defer--; } else { /* * if it's not defered, then check if it's * already marked.. if so skip it */ - if (fp->f_flag & FMARK) + if (fp->f_gcflag & FMARK) { + FILE_UNLOCK(fp); continue; + } /* * If all references are from messages * in transit, then skip it. it's not * externally accessible. */ - if (fp->f_count == fp->f_msgcount) + if (fp->f_count == fp->f_msgcount) { + FILE_UNLOCK(fp); continue; + } /* * If it got this far then it must be * externally accessible. */ - fp->f_flag |= FMARK; + fp->f_gcflag |= FMARK; } /* * either it was defered, or it is externally @@ -1276,8 +1302,11 @@ unp_gc() * Now check if it is possibly one of OUR sockets. */ if (fp->f_type != DTYPE_SOCKET || - (so = (struct socket *)fp->f_data) == 0) + (so = (struct socket *)fp->f_data) == 0) { + FILE_UNLOCK(fp); continue; + } + FILE_UNLOCK(fp); if (so->so_proto->pr_domain != &localdomain || (so->so_proto->pr_flags&PR_RIGHTS) == 0) continue; @@ -1307,6 +1336,7 @@ unp_gc() unp_scan(so->so_rcv.sb_mb, unp_mark); } } while (unp_defer); + sx_sunlock(&filelist_lock); /* * We grab an extra reference to each of the file table entries * that are not otherwise accessible and then free the rights @@ -1347,33 +1377,43 @@ unp_gc() * 91/09/19, bsy@cs.cmu.edu */ extra_ref = malloc(nfiles * sizeof(struct file *), M_FILE, M_WAITOK); + sx_slock(&filelist_lock); for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref; fp != 0; fp = nextfp) { nextfp = LIST_NEXT(fp, f_list); + FILE_LOCK(fp); /* * If it's not open, skip it */ - if (fp->f_count == 0) + if (fp->f_count == 0) { + FILE_UNLOCK(fp); continue; + } /* * If all refs are from msgs, and it's not marked accessible * then it must be referenced from some unreachable cycle * of (shut-down) FDs, so include it in our * list of FDs to remove */ - if (fp->f_count == fp->f_msgcount && !(fp->f_flag & FMARK)) { + if (fp->f_count == fp->f_msgcount && !(fp->f_gcflag & FMARK)) { *fpp++ = fp; nunref++; fp->f_count++; } + FILE_UNLOCK(fp); } + sx_sunlock(&filelist_lock); /* * for each FD on our hit list, do the following two things */ for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) { struct file *tfp = *fpp; - if (tfp->f_type == DTYPE_SOCKET && tfp->f_data != NULL) + FILE_LOCK(tfp); + if (tfp->f_type == DTYPE_SOCKET && tfp->f_data != NULL) { + FILE_UNLOCK(tfp); sorflush((struct socket *)(tfp->f_data)); + } else + FILE_UNLOCK(tfp); } for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) closef(*fpp, (struct thread *) NULL); @@ -1460,19 +1500,19 @@ static void unp_mark(fp) struct file *fp; { - - if (fp->f_flag & FMARK) + if (fp->f_gcflag & FMARK) return; unp_defer++; - fp->f_flag |= (FMARK|FDEFER); + fp->f_gcflag |= (FMARK|FDEFER); } static void unp_discard(fp) struct file *fp; { - + FILE_LOCK(fp); fp->f_msgcount--; unp_rights--; + FILE_UNLOCK(fp); (void) closef(fp, (struct thread *)NULL); } diff --git a/sys/kern/vfs_acl.c b/sys/kern/vfs_acl.c index b50c896..63be63d 100644 --- a/sys/kern/vfs_acl.c +++ b/sys/kern/vfs_acl.c @@ -703,6 +703,7 @@ __acl_get_fd(struct thread *td, struct __acl_get_fd_args *uap) if (error == 0) { error = vacl_get_acl(td, (struct vnode *)fp->f_data, SCARG(uap, type), SCARG(uap, aclp)); + fdrop(fp, td); } mtx_unlock(&Giant); return (error); @@ -724,6 +725,7 @@ __acl_set_fd(struct thread *td, struct __acl_set_fd_args *uap) if (error == 0) { error = vacl_set_acl(td, (struct vnode *)fp->f_data, SCARG(uap, type), SCARG(uap, aclp)); + fdrop(fp, td); } mtx_unlock(&Giant); return (error); @@ -767,6 +769,7 @@ __acl_delete_fd(struct thread *td, struct __acl_delete_fd_args *uap) if (error == 0) { error = vacl_delete(td, (struct vnode *)fp->f_data, SCARG(uap, type)); + fdrop(fp, td); } mtx_unlock(&Giant); return (error); @@ -811,6 +814,7 @@ __acl_aclcheck_fd(struct thread *td, struct __acl_aclcheck_fd_args *uap) if (error == 0) { error = vacl_aclcheck(td, (struct vnode *)fp->f_data, SCARG(uap, type), SCARG(uap, aclp)); + fdrop(fp, td); } mtx_unlock(&Giant); return (error); diff --git a/sys/kern/vfs_cache.c b/sys/kern/vfs_cache.c index 85bb632..d0b412d 100644 --- a/sys/kern/vfs_cache.c +++ b/sys/kern/vfs_cache.c @@ -716,9 +716,11 @@ __getcwd(td, uap) *bp = '\0'; fdp = td->td_proc->p_fd; slash_prefixed = 0; + FILEDESC_LOCK(fdp); for (vp = fdp->fd_cdir; vp != fdp->fd_rdir && vp != rootvnode;) { if (vp->v_flag & VROOT) { if (vp->v_mount == NULL) { /* forced unmount */ + FILEDESC_UNLOCK(fdp); free(buf, M_TEMP); return (EBADF); } @@ -726,23 +728,27 @@ __getcwd(td, uap) continue; } if (vp->v_dd->v_id != vp->v_ddid) { + FILEDESC_UNLOCK(fdp); numcwdfail1++; free(buf, M_TEMP); return (ENOTDIR); } ncp = TAILQ_FIRST(&vp->v_cache_dst); if (!ncp) { + FILEDESC_UNLOCK(fdp); numcwdfail2++; free(buf, M_TEMP); return (ENOENT); } if (ncp->nc_dvp != vp->v_dd) { + FILEDESC_UNLOCK(fdp); numcwdfail3++; free(buf, M_TEMP); return (EBADF); } for (i = ncp->nc_nlen - 1; i >= 0; i--) { if (bp == buf) { + FILEDESC_UNLOCK(fdp); numcwdfail4++; free(buf, M_TEMP); return (ENOMEM); @@ -750,6 +756,7 @@ __getcwd(td, uap) *--bp = ncp->nc_name[i]; } if (bp == buf) { + FILEDESC_UNLOCK(fdp); numcwdfail4++; free(buf, M_TEMP); return (ENOMEM); @@ -758,6 +765,7 @@ __getcwd(td, uap) slash_prefixed = 1; vp = vp->v_dd; } + FILEDESC_UNLOCK(fdp); if (!slash_prefixed) { if (bp == buf) { numcwdfail4++; @@ -811,9 +819,11 @@ vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf) *bp = '\0'; fdp = td->td_proc->p_fd; slash_prefixed = 0; + FILEDESC_LOCK(fdp); for (vp = vn; vp != fdp->fd_rdir && vp != rootvnode;) { if (vp->v_flag & VROOT) { if (vp->v_mount == NULL) { /* forced unmount */ + FILEDESC_UNLOCK(fdp); free(buf, M_TEMP); return (EBADF); } @@ -821,23 +831,27 @@ vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf) continue; } if (vp != vn && vp->v_dd->v_id != vp->v_ddid) { + FILEDESC_UNLOCK(fdp); numfullpathfail1++; free(buf, M_TEMP); return (ENOTDIR); } ncp = TAILQ_FIRST(&vp->v_cache_dst); if (!ncp) { + FILEDESC_UNLOCK(fdp); numfullpathfail2++; free(buf, M_TEMP); return (ENOENT); } if (vp != vn && ncp->nc_dvp != vp->v_dd) { + FILEDESC_UNLOCK(fdp); numfullpathfail3++; free(buf, M_TEMP); return (EBADF); } for (i = ncp->nc_nlen - 1; i >= 0; i--) { if (bp == buf) { + FILEDESC_UNLOCK(fdp); numfullpathfail4++; free(buf, M_TEMP); return (ENOMEM); @@ -845,6 +859,7 @@ vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf) *--bp = ncp->nc_name[i]; } if (bp == buf) { + FILEDESC_UNLOCK(fdp); numfullpathfail4++; free(buf, M_TEMP); return (ENOMEM); @@ -855,12 +870,14 @@ vn_fullpath(struct thread *td, struct vnode *vn, char **retbuf, char **freebuf) } if (!slash_prefixed) { if (bp == buf) { + FILEDESC_UNLOCK(fdp); numfullpathfail4++; free(buf, M_TEMP); return (ENOMEM); } *--bp = '/'; } + FILEDESC_UNLOCK(fdp); numfullpathfound++; *retbuf = bp; *freebuf = buf; diff --git a/sys/kern/vfs_extattr.c b/sys/kern/vfs_extattr.c index fced0d7..75ac3d0 100644 --- a/sys/kern/vfs_extattr.c +++ b/sys/kern/vfs_extattr.c @@ -454,16 +454,21 @@ checkdirs(olddp, newdp) fdp = p->p_fd; if (fdp == NULL) continue; + FILEDESC_LOCK(fdp); if (fdp->fd_cdir == olddp) { - vrele(fdp->fd_cdir); VREF(newdp); fdp->fd_cdir = newdp; + FILEDESC_UNLOCK(fdp); + vrele(olddp); + FILEDESC_LOCK(fdp); } if (fdp->fd_rdir == olddp) { - vrele(fdp->fd_rdir); VREF(newdp); fdp->fd_rdir = newdp; - } + FILEDESC_UNLOCK(fdp); + vrele(olddp); + } else + FILEDESC_UNLOCK(fdp); } sx_sunlock(&allproc_lock); if (rootvnode == olddp) { @@ -802,6 +807,7 @@ fstatfs(td, uap) if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); mp = ((struct vnode *)fp->f_data)->v_mount; + fdrop(fp, td); if (mp == NULL) return (EBADF); sp = &mp->mnt_stat; @@ -903,7 +909,7 @@ fchdir(td, uap) } */ *uap; { register struct filedesc *fdp = td->td_proc->p_fd; - struct vnode *vp, *tdp; + struct vnode *vp, *tdp, *vpold; struct mount *mp; struct file *fp; int error; @@ -912,6 +918,7 @@ fchdir(td, uap) return (error); vp = (struct vnode *)fp->f_data; VREF(vp); + fdrop(fp, td); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); if (vp->v_type != VDIR) error = ENOTDIR; @@ -932,8 +939,11 @@ fchdir(td, uap) return (error); } VOP_UNLOCK(vp, 0, td); - vrele(fdp->fd_cdir); + FILEDESC_LOCK(fdp); + vpold = fdp->fd_cdir; fdp->fd_cdir = vp; + FILEDESC_UNLOCK(fdp); + vrele(vpold); return (0); } @@ -956,14 +966,18 @@ chdir(td, uap) register struct filedesc *fdp = td->td_proc->p_fd; int error; struct nameidata nd; + struct vnode *vp; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path), td); if ((error = change_dir(&nd, td)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); - vrele(fdp->fd_cdir); + FILEDESC_LOCK(fdp); + vp = fdp->fd_cdir; fdp->fd_cdir = nd.ni_vp; + FILEDESC_UNLOCK(fdp); + vrele(vp); return (0); } @@ -977,18 +991,23 @@ chroot_refuse_vdir_fds(fdp) { struct vnode *vp; struct file *fp; + struct thread *td = curthread; int error; int fd; + FILEDESC_LOCK(fdp); for (fd = 0; fd < fdp->fd_nfiles ; fd++) { error = getvnode(fdp, fd, &fp); if (error) continue; vp = (struct vnode *)fp->f_data; + fdrop(fp, td); if (vp->v_type != VDIR) continue; + FILEDESC_UNLOCK(fdp); return(EPERM); } + FILEDESC_UNLOCK(fdp); return (0); } @@ -1024,13 +1043,18 @@ chroot(td, uap) register struct filedesc *fdp = td->td_proc->p_fd; int error; struct nameidata nd; + struct vnode *vp; error = suser_xxx(0, td->td_proc, PRISON_ROOT); if (error) return (error); + FILEDESC_LOCK(fdp); if (chroot_allow_open_directories == 0 || - (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) + (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { + FILEDESC_UNLOCK(fdp); error = chroot_refuse_vdir_fds(fdp); + } else + FILEDESC_UNLOCK(fdp); if (error) return (error); NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, @@ -1038,12 +1062,15 @@ chroot(td, uap) if ((error = change_dir(&nd, td)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); - vrele(fdp->fd_rdir); + FILEDESC_LOCK(fdp); + vp = fdp->fd_rdir; fdp->fd_rdir = nd.ni_vp; if (!fdp->fd_jdir) { fdp->fd_jdir = nd.ni_vp; VREF(fdp->fd_jdir); } + FILEDESC_UNLOCK(fdp); + vrele(vp); return (0); } @@ -1113,7 +1140,9 @@ open(td, uap) if (error) return (error); fp = nfp; + FILEDESC_LOCK(fdp); cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; + FILEDESC_UNLOCK(fdp); NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td); td->td_dupfd = -indx - 1; /* XXX check for fdopen */ /* @@ -1144,10 +1173,13 @@ open(td, uap) * Clean up the descriptor, but only if another thread hadn't * replaced or closed it. */ + FILEDESC_LOCK(fdp); if (fdp->fd_ofiles[indx] == fp) { fdp->fd_ofiles[indx] = NULL; + FILEDESC_UNLOCK(fdp); fdrop(fp, td); - } + } else + FILEDESC_UNLOCK(fdp); if (error == ERESTART) error = EINTR; @@ -1165,9 +1197,13 @@ open(td, uap) * descriptor) while we were blocked. The end result should look * like opening the file succeeded but it was immediately closed. */ + FILEDESC_LOCK(fdp); + FILE_LOCK(fp); if (fp->f_count == 1) { KASSERT(fdp->fd_ofiles[indx] != fp, ("Open file descriptor lost all refs")); + FILEDESC_UNLOCK(fdp); + FILE_UNLOCK(fp); VOP_UNLOCK(vp, 0, td); vn_close(vp, flags & FMASK, fp->f_cred, td); fdrop(fp, td); @@ -1179,6 +1215,8 @@ open(td, uap) fp->f_flag = flags & FMASK; fp->f_ops = &vnops; fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE); + FILEDESC_UNLOCK(fdp); + FILE_UNLOCK(fp); VOP_UNLOCK(vp, 0, td); if (flags & (O_EXLOCK | O_SHLOCK)) { lf.l_whence = SEEK_SET; @@ -1219,11 +1257,13 @@ open(td, uap) td->td_retval[0] = indx; return (0); bad: + FILEDESC_LOCK(fdp); if (fdp->fd_ofiles[indx] == fp) { fdp->fd_ofiles[indx] = NULL; + FILEDESC_UNLOCK(fdp); fdrop(fp, td); - } - fdrop(fp, td); + } else + FILEDESC_UNLOCK(fdp); return (error); } @@ -1307,7 +1347,9 @@ restart: error = EEXIST; } else { VATTR_NULL(&vattr); + FILEDESC_LOCK(td->td_proc->p_fd); vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ td->td_proc->p_fd->fd_cmask; + FILEDESC_UNLOCK(td->td_proc->p_fd); vattr.va_rdev = SCARG(uap, dev); whiteout = 0; @@ -1398,7 +1440,9 @@ restart: } VATTR_NULL(&vattr); vattr.va_type = VFIFO; + FILEDESC_LOCK(td->td_proc->p_fd); vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ td->td_proc->p_fd->fd_cmask; + FILEDESC_UNLOCK(td->td_proc->p_fd); VOP_LEASE(nd.ni_dvp, td, td->td_proc->p_ucred, LEASE_WRITE); error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); if (error == 0) @@ -1513,7 +1557,9 @@ restart: goto restart; } VATTR_NULL(&vattr); + FILEDESC_LOCK(td->td_proc->p_fd); vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; + FILEDESC_UNLOCK(td->td_proc->p_fd); VOP_LEASE(nd.ni_dvp, td, td->td_proc->p_ucred, LEASE_WRITE); error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); NDFREE(&nd, NDF_ONLY_PNBUF); @@ -1658,18 +1704,19 @@ lseek(td, uap) } */ *uap; { struct ucred *cred = td->td_proc->p_ucred; - register struct filedesc *fdp = td->td_proc->p_fd; register struct file *fp; - struct vattr vattr; struct vnode *vp; + struct vattr vattr; off_t offset; int error, noneg; - if ((u_int)SCARG(uap, fd) >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL) + fp = ffind_hold(td, uap->fd); + if (fp == NULL) return (EBADF); - if (fp->f_type != DTYPE_VNODE) + if (fp->f_type != DTYPE_VNODE) { + fdrop(fp, td); return (ESPIPE); + } vp = (struct vnode *)fp->f_data; noneg = (vp->v_type != VCHR); offset = SCARG(uap, offset); @@ -1694,12 +1741,14 @@ lseek(td, uap) case L_SET: break; default: + fdrop(fp, td); return (EINVAL); } if (noneg && offset < 0) return (EINVAL); fp->f_offset = offset; *(off_t *)(td->td_retval) = fp->f_offset; + fdrop(fp, td); return (0); } @@ -2307,7 +2356,9 @@ fchflags(td, uap) if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); - return setfflags(td, (struct vnode *) fp->f_data, SCARG(uap, flags)); + error = setfflags(td, (struct vnode *) fp->f_data, SCARG(uap, flags)); + fdrop(fp, td); + return (error); } /* @@ -2414,11 +2465,15 @@ fchmod(td, uap) } */ *uap; { struct file *fp; + struct vnode *vp; int error; if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); - return setfmode(td, (struct vnode *)fp->f_data, SCARG(uap, mode)); + vp = (struct vnode *)fp->f_data; + error = setfmode(td, (struct vnode *)fp->f_data, SCARG(uap, mode)); + fdrop(fp, td); + return (error); } /* @@ -2533,12 +2588,16 @@ fchown(td, uap) } */ *uap; { struct file *fp; + struct vnode *vp; int error; if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); - return setfown(td, (struct vnode *)fp->f_data, + vp = (struct vnode *)fp->f_data; + error = setfown(td, (struct vnode *)fp->f_data, SCARG(uap, uid), SCARG(uap, gid)); + fdrop(fp, td); + return (error); } /* @@ -2692,7 +2751,9 @@ futimes(td, uap) return (error); if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); - return setutimes(td, (struct vnode *)fp->f_data, ts, usrtvp == NULL); + error = setutimes(td, (struct vnode *)fp->f_data, ts, usrtvp == NULL); + fdrop(fp, td); + return (error); } /* @@ -2777,11 +2838,15 @@ ftruncate(td, uap) return(EINVAL); if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); - if ((fp->f_flag & FWRITE) == 0) + if ((fp->f_flag & FWRITE) == 0) { + fdrop(fp, td); return (EINVAL); + } vp = (struct vnode *)fp->f_data; - if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { + fdrop(fp, td); return (error); + } VOP_LEASE(vp, td, td->td_proc->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); if (vp->v_type == VDIR) @@ -2793,6 +2858,7 @@ ftruncate(td, uap) } VOP_UNLOCK(vp, 0, td); vn_finished_write(mp); + fdrop(fp, td); return (error); } @@ -2883,8 +2949,10 @@ fsync(td, uap) if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); vp = (struct vnode *)fp->f_data; - if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { + fdrop(fp, td); return (error); + } vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); if (VOP_GETVOBJECT(vp, &obj) == 0) { vm_object_page_clean(obj, 0, 0, 0); @@ -2897,6 +2965,7 @@ fsync(td, uap) VOP_UNLOCK(vp, 0, td); vn_finished_write(mp); + fdrop(fp, td); return (error); } @@ -3068,7 +3137,9 @@ restart: } VATTR_NULL(&vattr); vattr.va_type = VDIR; + FILEDESC_LOCK(td->td_proc->p_fd); vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; + FILEDESC_UNLOCK(td->td_proc->p_fd); VOP_LEASE(nd.ni_dvp, td, td->td_proc->p_ucred, LEASE_WRITE); error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); NDFREE(&nd, NDF_ONLY_PNBUF); @@ -3190,12 +3261,16 @@ ogetdirentries(td, uap) return (EINVAL); if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); - if ((fp->f_flag & FREAD) == 0) + if ((fp->f_flag & FREAD) == 0) { + fdrop(fp, td); return (EBADF); + } vp = (struct vnode *)fp->f_data; unionread: - if (vp->v_type != VDIR) + if (vp->v_type != VDIR) { + fdrop(fp, td); return (EINVAL); + } aiov.iov_base = SCARG(uap, buf); aiov.iov_len = SCARG(uap, count); auio.uio_iov = &aiov; @@ -3258,15 +3333,19 @@ unionread: FREE(dirbuf, M_TEMP); } VOP_UNLOCK(vp, 0, td); - if (error) + if (error) { + fdrop(fp, td); return (error); + } if (SCARG(uap, count) == auio.uio_resid) { if (union_dircheckp) { error = union_dircheckp(td, &vp, fp); if (error == -1) goto unionread; - if (error) + if (error) { + fdrop(fp, td); return (error); + } } if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) { @@ -3281,6 +3360,7 @@ unionread: } error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep), sizeof(long)); + fdrop(fp, td); td->td_retval[0] = SCARG(uap, count) - auio.uio_resid; return (error); } @@ -3316,12 +3396,16 @@ getdirentries(td, uap) if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); - if ((fp->f_flag & FREAD) == 0) + if ((fp->f_flag & FREAD) == 0) { + fdrop(fp, td); return (EBADF); + } vp = (struct vnode *)fp->f_data; unionread: - if (vp->v_type != VDIR) + if (vp->v_type != VDIR) { + fdrop(fp, td); return (EINVAL); + } aiov.iov_base = SCARG(uap, buf); aiov.iov_len = SCARG(uap, count); auio.uio_iov = &aiov; @@ -3336,15 +3420,19 @@ unionread: error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); fp->f_offset = auio.uio_offset; VOP_UNLOCK(vp, 0, td); - if (error) + if (error) { + fdrop(fp, td); return (error); + } if (SCARG(uap, count) == auio.uio_resid) { if (union_dircheckp) { error = union_dircheckp(td, &vp, fp); if (error == -1) goto unionread; - if (error) + if (error) { + fdrop(fp, td); return (error); + } } if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) { @@ -3362,6 +3450,7 @@ unionread: sizeof(long)); } td->td_retval[0] = SCARG(uap, count) - auio.uio_resid; + fdrop(fp, td); return (error); } #ifndef _SYS_SYSPROTO_H_ @@ -3407,9 +3496,11 @@ umask(td, uap) { register struct filedesc *fdp; + FILEDESC_LOCK(td->td_proc->p_fd); fdp = td->td_proc->p_fd; td->td_retval[0] = fdp->fd_cmask; fdp->fd_cmask = SCARG(uap, newmask) & ALLPERMS; + FILEDESC_UNLOCK(td->td_proc->p_fd); return (0); } @@ -3465,6 +3556,7 @@ out: /* * Convert a user file descriptor to a kernel file entry. + * The file entry is locked upon returning. */ int getvnode(fdp, fd, fpp) @@ -3472,15 +3564,28 @@ getvnode(fdp, fd, fpp) int fd; struct file **fpp; { + int error; struct file *fp; - if ((u_int)fd >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[fd]) == NULL) - return (EBADF); - if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) - return (EINVAL); + fp = NULL; + if (fdp == NULL) + error = EBADF; + else { + FILEDESC_LOCK(fdp); + if ((u_int)fd >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[fd]) == NULL) + error = EBADF; + else if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) { + fp = NULL; + error = EINVAL; + } else { + fhold(fp); + error = 0; + } + FILEDESC_UNLOCK(fdp); + } *fpp = fp; - return (0); + return (error); } /* * Get (NFS) file handle @@ -3681,10 +3786,13 @@ fhopen(td, uap) * descriptor but handle the case where someone might * have dup()d or close()d it when we weren't looking. */ + FILEDESC_LOCK(fdp); if (fdp->fd_ofiles[indx] == fp) { fdp->fd_ofiles[indx] = NULL; + FILEDESC_UNLOCK(fdp); fdrop(fp, td); - } + } else + FILEDESC_UNLOCK(fdp); /* * release our private reference */ @@ -3995,6 +4103,7 @@ extattr_set_fd(td, uap) error = extattr_set_vp((struct vnode *)fp->f_data, SCARG(uap, attrnamespace), attrname, SCARG(uap, iovp), SCARG(uap, iovcnt), td); + fdrop(fp, td); return (error); } @@ -4108,6 +4217,7 @@ extattr_get_fd(td, uap) SCARG(uap, attrnamespace), attrname, SCARG(uap, iovp), SCARG(uap, iovcnt), td); + fdrop(fp, td); return (error); } @@ -4173,6 +4283,7 @@ extattr_delete_fd(td, uap) struct extattr_delete_fd_args *uap; { struct file *fp; + struct vnode *vp; char attrname[EXTATTR_MAXNAMELEN]; int error; @@ -4183,9 +4294,11 @@ extattr_delete_fd(td, uap) if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); + vp = (struct vnode *)fp->f_data; error = extattr_delete_vp((struct vnode *)fp->f_data, SCARG(uap, attrnamespace), attrname, td); + fdrop(fp, td); return (error); } diff --git a/sys/kern/vfs_lookup.c b/sys/kern/vfs_lookup.c index 470abba..66d27af 100644 --- a/sys/kern/vfs_lookup.c +++ b/sys/kern/vfs_lookup.c @@ -144,11 +144,13 @@ namei(ndp) /* * Get starting point for the translation. */ + FILEDESC_LOCK(fdp); ndp->ni_rootdir = fdp->fd_rdir; ndp->ni_topdir = fdp->fd_jdir; dp = fdp->fd_cdir; VREF(dp); + FILEDESC_UNLOCK(fdp); for (;;) { /* * Check if root directory should replace current directory. diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index fced0d7..75ac3d0 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -454,16 +454,21 @@ checkdirs(olddp, newdp) fdp = p->p_fd; if (fdp == NULL) continue; + FILEDESC_LOCK(fdp); if (fdp->fd_cdir == olddp) { - vrele(fdp->fd_cdir); VREF(newdp); fdp->fd_cdir = newdp; + FILEDESC_UNLOCK(fdp); + vrele(olddp); + FILEDESC_LOCK(fdp); } if (fdp->fd_rdir == olddp) { - vrele(fdp->fd_rdir); VREF(newdp); fdp->fd_rdir = newdp; - } + FILEDESC_UNLOCK(fdp); + vrele(olddp); + } else + FILEDESC_UNLOCK(fdp); } sx_sunlock(&allproc_lock); if (rootvnode == olddp) { @@ -802,6 +807,7 @@ fstatfs(td, uap) if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); mp = ((struct vnode *)fp->f_data)->v_mount; + fdrop(fp, td); if (mp == NULL) return (EBADF); sp = &mp->mnt_stat; @@ -903,7 +909,7 @@ fchdir(td, uap) } */ *uap; { register struct filedesc *fdp = td->td_proc->p_fd; - struct vnode *vp, *tdp; + struct vnode *vp, *tdp, *vpold; struct mount *mp; struct file *fp; int error; @@ -912,6 +918,7 @@ fchdir(td, uap) return (error); vp = (struct vnode *)fp->f_data; VREF(vp); + fdrop(fp, td); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); if (vp->v_type != VDIR) error = ENOTDIR; @@ -932,8 +939,11 @@ fchdir(td, uap) return (error); } VOP_UNLOCK(vp, 0, td); - vrele(fdp->fd_cdir); + FILEDESC_LOCK(fdp); + vpold = fdp->fd_cdir; fdp->fd_cdir = vp; + FILEDESC_UNLOCK(fdp); + vrele(vpold); return (0); } @@ -956,14 +966,18 @@ chdir(td, uap) register struct filedesc *fdp = td->td_proc->p_fd; int error; struct nameidata nd; + struct vnode *vp; NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path), td); if ((error = change_dir(&nd, td)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); - vrele(fdp->fd_cdir); + FILEDESC_LOCK(fdp); + vp = fdp->fd_cdir; fdp->fd_cdir = nd.ni_vp; + FILEDESC_UNLOCK(fdp); + vrele(vp); return (0); } @@ -977,18 +991,23 @@ chroot_refuse_vdir_fds(fdp) { struct vnode *vp; struct file *fp; + struct thread *td = curthread; int error; int fd; + FILEDESC_LOCK(fdp); for (fd = 0; fd < fdp->fd_nfiles ; fd++) { error = getvnode(fdp, fd, &fp); if (error) continue; vp = (struct vnode *)fp->f_data; + fdrop(fp, td); if (vp->v_type != VDIR) continue; + FILEDESC_UNLOCK(fdp); return(EPERM); } + FILEDESC_UNLOCK(fdp); return (0); } @@ -1024,13 +1043,18 @@ chroot(td, uap) register struct filedesc *fdp = td->td_proc->p_fd; int error; struct nameidata nd; + struct vnode *vp; error = suser_xxx(0, td->td_proc, PRISON_ROOT); if (error) return (error); + FILEDESC_LOCK(fdp); if (chroot_allow_open_directories == 0 || - (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) + (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { + FILEDESC_UNLOCK(fdp); error = chroot_refuse_vdir_fds(fdp); + } else + FILEDESC_UNLOCK(fdp); if (error) return (error); NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, @@ -1038,12 +1062,15 @@ chroot(td, uap) if ((error = change_dir(&nd, td)) != 0) return (error); NDFREE(&nd, NDF_ONLY_PNBUF); - vrele(fdp->fd_rdir); + FILEDESC_LOCK(fdp); + vp = fdp->fd_rdir; fdp->fd_rdir = nd.ni_vp; if (!fdp->fd_jdir) { fdp->fd_jdir = nd.ni_vp; VREF(fdp->fd_jdir); } + FILEDESC_UNLOCK(fdp); + vrele(vp); return (0); } @@ -1113,7 +1140,9 @@ open(td, uap) if (error) return (error); fp = nfp; + FILEDESC_LOCK(fdp); cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; + FILEDESC_UNLOCK(fdp); NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td); td->td_dupfd = -indx - 1; /* XXX check for fdopen */ /* @@ -1144,10 +1173,13 @@ open(td, uap) * Clean up the descriptor, but only if another thread hadn't * replaced or closed it. */ + FILEDESC_LOCK(fdp); if (fdp->fd_ofiles[indx] == fp) { fdp->fd_ofiles[indx] = NULL; + FILEDESC_UNLOCK(fdp); fdrop(fp, td); - } + } else + FILEDESC_UNLOCK(fdp); if (error == ERESTART) error = EINTR; @@ -1165,9 +1197,13 @@ open(td, uap) * descriptor) while we were blocked. The end result should look * like opening the file succeeded but it was immediately closed. */ + FILEDESC_LOCK(fdp); + FILE_LOCK(fp); if (fp->f_count == 1) { KASSERT(fdp->fd_ofiles[indx] != fp, ("Open file descriptor lost all refs")); + FILEDESC_UNLOCK(fdp); + FILE_UNLOCK(fp); VOP_UNLOCK(vp, 0, td); vn_close(vp, flags & FMASK, fp->f_cred, td); fdrop(fp, td); @@ -1179,6 +1215,8 @@ open(td, uap) fp->f_flag = flags & FMASK; fp->f_ops = &vnops; fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE); + FILEDESC_UNLOCK(fdp); + FILE_UNLOCK(fp); VOP_UNLOCK(vp, 0, td); if (flags & (O_EXLOCK | O_SHLOCK)) { lf.l_whence = SEEK_SET; @@ -1219,11 +1257,13 @@ open(td, uap) td->td_retval[0] = indx; return (0); bad: + FILEDESC_LOCK(fdp); if (fdp->fd_ofiles[indx] == fp) { fdp->fd_ofiles[indx] = NULL; + FILEDESC_UNLOCK(fdp); fdrop(fp, td); - } - fdrop(fp, td); + } else + FILEDESC_UNLOCK(fdp); return (error); } @@ -1307,7 +1347,9 @@ restart: error = EEXIST; } else { VATTR_NULL(&vattr); + FILEDESC_LOCK(td->td_proc->p_fd); vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ td->td_proc->p_fd->fd_cmask; + FILEDESC_UNLOCK(td->td_proc->p_fd); vattr.va_rdev = SCARG(uap, dev); whiteout = 0; @@ -1398,7 +1440,9 @@ restart: } VATTR_NULL(&vattr); vattr.va_type = VFIFO; + FILEDESC_LOCK(td->td_proc->p_fd); vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ td->td_proc->p_fd->fd_cmask; + FILEDESC_UNLOCK(td->td_proc->p_fd); VOP_LEASE(nd.ni_dvp, td, td->td_proc->p_ucred, LEASE_WRITE); error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); if (error == 0) @@ -1513,7 +1557,9 @@ restart: goto restart; } VATTR_NULL(&vattr); + FILEDESC_LOCK(td->td_proc->p_fd); vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; + FILEDESC_UNLOCK(td->td_proc->p_fd); VOP_LEASE(nd.ni_dvp, td, td->td_proc->p_ucred, LEASE_WRITE); error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); NDFREE(&nd, NDF_ONLY_PNBUF); @@ -1658,18 +1704,19 @@ lseek(td, uap) } */ *uap; { struct ucred *cred = td->td_proc->p_ucred; - register struct filedesc *fdp = td->td_proc->p_fd; register struct file *fp; - struct vattr vattr; struct vnode *vp; + struct vattr vattr; off_t offset; int error, noneg; - if ((u_int)SCARG(uap, fd) >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[SCARG(uap, fd)]) == NULL) + fp = ffind_hold(td, uap->fd); + if (fp == NULL) return (EBADF); - if (fp->f_type != DTYPE_VNODE) + if (fp->f_type != DTYPE_VNODE) { + fdrop(fp, td); return (ESPIPE); + } vp = (struct vnode *)fp->f_data; noneg = (vp->v_type != VCHR); offset = SCARG(uap, offset); @@ -1694,12 +1741,14 @@ lseek(td, uap) case L_SET: break; default: + fdrop(fp, td); return (EINVAL); } if (noneg && offset < 0) return (EINVAL); fp->f_offset = offset; *(off_t *)(td->td_retval) = fp->f_offset; + fdrop(fp, td); return (0); } @@ -2307,7 +2356,9 @@ fchflags(td, uap) if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); - return setfflags(td, (struct vnode *) fp->f_data, SCARG(uap, flags)); + error = setfflags(td, (struct vnode *) fp->f_data, SCARG(uap, flags)); + fdrop(fp, td); + return (error); } /* @@ -2414,11 +2465,15 @@ fchmod(td, uap) } */ *uap; { struct file *fp; + struct vnode *vp; int error; if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); - return setfmode(td, (struct vnode *)fp->f_data, SCARG(uap, mode)); + vp = (struct vnode *)fp->f_data; + error = setfmode(td, (struct vnode *)fp->f_data, SCARG(uap, mode)); + fdrop(fp, td); + return (error); } /* @@ -2533,12 +2588,16 @@ fchown(td, uap) } */ *uap; { struct file *fp; + struct vnode *vp; int error; if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); - return setfown(td, (struct vnode *)fp->f_data, + vp = (struct vnode *)fp->f_data; + error = setfown(td, (struct vnode *)fp->f_data, SCARG(uap, uid), SCARG(uap, gid)); + fdrop(fp, td); + return (error); } /* @@ -2692,7 +2751,9 @@ futimes(td, uap) return (error); if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); - return setutimes(td, (struct vnode *)fp->f_data, ts, usrtvp == NULL); + error = setutimes(td, (struct vnode *)fp->f_data, ts, usrtvp == NULL); + fdrop(fp, td); + return (error); } /* @@ -2777,11 +2838,15 @@ ftruncate(td, uap) return(EINVAL); if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); - if ((fp->f_flag & FWRITE) == 0) + if ((fp->f_flag & FWRITE) == 0) { + fdrop(fp, td); return (EINVAL); + } vp = (struct vnode *)fp->f_data; - if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { + fdrop(fp, td); return (error); + } VOP_LEASE(vp, td, td->td_proc->p_ucred, LEASE_WRITE); vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); if (vp->v_type == VDIR) @@ -2793,6 +2858,7 @@ ftruncate(td, uap) } VOP_UNLOCK(vp, 0, td); vn_finished_write(mp); + fdrop(fp, td); return (error); } @@ -2883,8 +2949,10 @@ fsync(td, uap) if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); vp = (struct vnode *)fp->f_data; - if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { + fdrop(fp, td); return (error); + } vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); if (VOP_GETVOBJECT(vp, &obj) == 0) { vm_object_page_clean(obj, 0, 0, 0); @@ -2897,6 +2965,7 @@ fsync(td, uap) VOP_UNLOCK(vp, 0, td); vn_finished_write(mp); + fdrop(fp, td); return (error); } @@ -3068,7 +3137,9 @@ restart: } VATTR_NULL(&vattr); vattr.va_type = VDIR; + FILEDESC_LOCK(td->td_proc->p_fd); vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; + FILEDESC_UNLOCK(td->td_proc->p_fd); VOP_LEASE(nd.ni_dvp, td, td->td_proc->p_ucred, LEASE_WRITE); error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); NDFREE(&nd, NDF_ONLY_PNBUF); @@ -3190,12 +3261,16 @@ ogetdirentries(td, uap) return (EINVAL); if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); - if ((fp->f_flag & FREAD) == 0) + if ((fp->f_flag & FREAD) == 0) { + fdrop(fp, td); return (EBADF); + } vp = (struct vnode *)fp->f_data; unionread: - if (vp->v_type != VDIR) + if (vp->v_type != VDIR) { + fdrop(fp, td); return (EINVAL); + } aiov.iov_base = SCARG(uap, buf); aiov.iov_len = SCARG(uap, count); auio.uio_iov = &aiov; @@ -3258,15 +3333,19 @@ unionread: FREE(dirbuf, M_TEMP); } VOP_UNLOCK(vp, 0, td); - if (error) + if (error) { + fdrop(fp, td); return (error); + } if (SCARG(uap, count) == auio.uio_resid) { if (union_dircheckp) { error = union_dircheckp(td, &vp, fp); if (error == -1) goto unionread; - if (error) + if (error) { + fdrop(fp, td); return (error); + } } if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) { @@ -3281,6 +3360,7 @@ unionread: } error = copyout((caddr_t)&loff, (caddr_t)SCARG(uap, basep), sizeof(long)); + fdrop(fp, td); td->td_retval[0] = SCARG(uap, count) - auio.uio_resid; return (error); } @@ -3316,12 +3396,16 @@ getdirentries(td, uap) if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); - if ((fp->f_flag & FREAD) == 0) + if ((fp->f_flag & FREAD) == 0) { + fdrop(fp, td); return (EBADF); + } vp = (struct vnode *)fp->f_data; unionread: - if (vp->v_type != VDIR) + if (vp->v_type != VDIR) { + fdrop(fp, td); return (EINVAL); + } aiov.iov_base = SCARG(uap, buf); aiov.iov_len = SCARG(uap, count); auio.uio_iov = &aiov; @@ -3336,15 +3420,19 @@ unionread: error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); fp->f_offset = auio.uio_offset; VOP_UNLOCK(vp, 0, td); - if (error) + if (error) { + fdrop(fp, td); return (error); + } if (SCARG(uap, count) == auio.uio_resid) { if (union_dircheckp) { error = union_dircheckp(td, &vp, fp); if (error == -1) goto unionread; - if (error) + if (error) { + fdrop(fp, td); return (error); + } } if ((vp->v_flag & VROOT) && (vp->v_mount->mnt_flag & MNT_UNION)) { @@ -3362,6 +3450,7 @@ unionread: sizeof(long)); } td->td_retval[0] = SCARG(uap, count) - auio.uio_resid; + fdrop(fp, td); return (error); } #ifndef _SYS_SYSPROTO_H_ @@ -3407,9 +3496,11 @@ umask(td, uap) { register struct filedesc *fdp; + FILEDESC_LOCK(td->td_proc->p_fd); fdp = td->td_proc->p_fd; td->td_retval[0] = fdp->fd_cmask; fdp->fd_cmask = SCARG(uap, newmask) & ALLPERMS; + FILEDESC_UNLOCK(td->td_proc->p_fd); return (0); } @@ -3465,6 +3556,7 @@ out: /* * Convert a user file descriptor to a kernel file entry. + * The file entry is locked upon returning. */ int getvnode(fdp, fd, fpp) @@ -3472,15 +3564,28 @@ getvnode(fdp, fd, fpp) int fd; struct file **fpp; { + int error; struct file *fp; - if ((u_int)fd >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[fd]) == NULL) - return (EBADF); - if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) - return (EINVAL); + fp = NULL; + if (fdp == NULL) + error = EBADF; + else { + FILEDESC_LOCK(fdp); + if ((u_int)fd >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[fd]) == NULL) + error = EBADF; + else if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) { + fp = NULL; + error = EINVAL; + } else { + fhold(fp); + error = 0; + } + FILEDESC_UNLOCK(fdp); + } *fpp = fp; - return (0); + return (error); } /* * Get (NFS) file handle @@ -3681,10 +3786,13 @@ fhopen(td, uap) * descriptor but handle the case where someone might * have dup()d or close()d it when we weren't looking. */ + FILEDESC_LOCK(fdp); if (fdp->fd_ofiles[indx] == fp) { fdp->fd_ofiles[indx] = NULL; + FILEDESC_UNLOCK(fdp); fdrop(fp, td); - } + } else + FILEDESC_UNLOCK(fdp); /* * release our private reference */ @@ -3995,6 +4103,7 @@ extattr_set_fd(td, uap) error = extattr_set_vp((struct vnode *)fp->f_data, SCARG(uap, attrnamespace), attrname, SCARG(uap, iovp), SCARG(uap, iovcnt), td); + fdrop(fp, td); return (error); } @@ -4108,6 +4217,7 @@ extattr_get_fd(td, uap) SCARG(uap, attrnamespace), attrname, SCARG(uap, iovp), SCARG(uap, iovcnt), td); + fdrop(fp, td); return (error); } @@ -4173,6 +4283,7 @@ extattr_delete_fd(td, uap) struct extattr_delete_fd_args *uap; { struct file *fp; + struct vnode *vp; char attrname[EXTATTR_MAXNAMELEN]; int error; @@ -4183,9 +4294,11 @@ extattr_delete_fd(td, uap) if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) return (error); + vp = (struct vnode *)fp->f_data; error = extattr_delete_vp((struct vnode *)fp->f_data, SCARG(uap, attrnamespace), attrname, td); + fdrop(fp, td); return (error); } diff --git a/sys/kern/vfs_vnops.c b/sys/kern/vfs_vnops.c index 70448d6..1bbed38 100644 --- a/sys/kern/vfs_vnops.c +++ b/sys/kern/vfs_vnops.c @@ -265,6 +265,7 @@ static __inline int sequential_heuristic(struct uio *uio, struct file *fp) { + /* * Sequential heuristic - detect sequential operation */ @@ -446,7 +447,6 @@ vn_write(fp, uio, cred, flags, td) vp = (struct vnode *)fp->f_data; if (vp->v_type == VREG) bwillwrite(); - vp = (struct vnode *)fp->f_data; /* XXX needed? */ ioflag = IO_UNIT; if (vp->v_type == VREG && (fp->f_flag & O_APPEND)) ioflag |= IO_APPEND; |