diff options
Diffstat (limited to 'sys/kern')
-rw-r--r-- | sys/kern/kern_descrip.c | 327 | ||||
-rw-r--r-- | sys/kern/kern_event.c | 39 | ||||
-rw-r--r-- | sys/kern/kern_random.c | 396 | ||||
-rw-r--r-- | sys/kern/sys_generic.c | 86 | ||||
-rw-r--r-- | sys/kern/sys_pipe.c | 9 | ||||
-rw-r--r-- | sys/kern/uipc_syscalls.c | 217 | ||||
-rw-r--r-- | sys/kern/vfs_aio.c | 37 | ||||
-rw-r--r-- | sys/kern/vfs_extattr.c | 85 | ||||
-rw-r--r-- | sys/kern/vfs_syscalls.c | 85 |
9 files changed, 1062 insertions, 219 deletions
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index 5f2da95..29de2b1 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -86,7 +86,7 @@ static struct cdevsw fildesc_cdevsw = { /* bmaj */ -1 }; -static int finishdup __P((struct filedesc *fdp, int old, int new, register_t *retval)); +static int do_dup __P((struct filedesc *fdp, int old, int new, register_t *retval, struct proc *p)); static int badfo_readwrite __P((struct file *fp, struct uio *uio, struct ucred *cred, int flags, struct proc *p)); static int badfo_ioctl __P((struct file *fp, u_long com, caddr_t data, @@ -125,6 +125,9 @@ getdtablesize(p, uap) /* * Duplicate a file descriptor to a particular value. + * + * note: keep in mind that a potential race condition exists when closing + * descriptors from a shared descriptor table (via rfork). */ #ifndef _SYS_SYSPROTO_H_ struct dup2_args { @@ -142,11 +145,13 @@ dup2(p, uap) register u_int old = uap->from, new = uap->to; int i, error; +retry: if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL || new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || - new >= maxfilesperproc) + new >= maxfilesperproc) { return (EBADF); + } if (old == new) { p->p_retval[0] = new; return (0); @@ -156,15 +161,12 @@ dup2(p, uap) return (error); if (new != i) panic("dup2: fdalloc"); - } else if (fdp->fd_ofiles[new]) { - if (fdp->fd_ofileflags[new] & UF_MAPPED) - (void) munmapfd(p, new); /* - * dup2() must succeed even if the close has an error. + * fdalloc() may block, retest everything. */ - (void) closef(fdp->fd_ofiles[new], p); + goto retry; } - return (finishdup(fdp, (int)old, (int)new, p->p_retval)); + return (do_dup(fdp, (int)old, (int)new, p->p_retval, p)); } /* @@ -191,7 +193,7 @@ dup(p, uap) return (EBADF); if ((error = fdalloc(p, 0, &new))) return (error); - return (finishdup(fdp, (int)old, new, p->p_retval)); + return (do_dup(fdp, (int)old, new, p->p_retval, p)); } /* @@ -222,8 +224,8 @@ fcntl(p, uap) (fp = fdp->fd_ofiles[uap->fd]) == NULL) return (EBADF); pop = &fdp->fd_ofileflags[uap->fd]; - switch (uap->cmd) { + switch (uap->cmd) { case F_DUPFD: newmin = uap->arg; if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || @@ -231,7 +233,7 @@ fcntl(p, uap) return (EINVAL); if ((error = fdalloc(p, newmin, &i))) return (error); - return (finishdup(fdp, uap->fd, i, p->p_retval)); + return (do_dup(fdp, uap->fd, i, p->p_retval, p)); case F_GETFD: p->p_retval[0] = *pop & 1; @@ -246,26 +248,38 @@ fcntl(p, uap) return (0); case F_SETFL: + fhold(fp); fp->f_flag &= ~FCNTLFLAGS; fp->f_flag |= FFLAGS(uap->arg & ~O_ACCMODE) & FCNTLFLAGS; tmp = fp->f_flag & FNONBLOCK; error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p); - if (error) + if (error) { + fdrop(fp, p); return (error); + } tmp = fp->f_flag & FASYNC; error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, p); - if (!error) + if (!error) { + fdrop(fp, p); return (0); + } fp->f_flag &= ~FNONBLOCK; tmp = 0; (void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p); + fdrop(fp, p); return (error); case F_GETOWN: - return (fo_ioctl(fp, FIOGETOWN, (caddr_t)p->p_retval, p)); + fhold(fp); + error = fo_ioctl(fp, FIOGETOWN, (caddr_t)p->p_retval, p); + fdrop(fp, p); + return(error); case F_SETOWN: - return (fo_ioctl(fp, FIOSETOWN, (caddr_t)&uap->arg, p)); + fhold(fp); + error = fo_ioctl(fp, FIOSETOWN, (caddr_t)&uap->arg, p); + fdrop(fp, p); + return(error); case F_SETLKW: flg |= F_WAIT; @@ -275,54 +289,81 @@ fcntl(p, uap) if (fp->f_type != DTYPE_VNODE) return (EBADF); vp = (struct vnode *)fp->f_data; + + /* + * copyin/lockop may block + */ + fhold(fp); /* Copy in the lock structure */ error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl, sizeof(fl)); - if (error) + if (error) { + fdrop(fp, p); return (error); + } if (fl.l_whence == SEEK_CUR) fl.l_start += fp->f_offset; - switch (fl.l_type) { + switch (fl.l_type) { case F_RDLCK: - if ((fp->f_flag & FREAD) == 0) - return (EBADF); + if ((fp->f_flag & FREAD) == 0) { + error = EBADF; + break; + } p->p_flag |= P_ADVLOCK; - return (VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, &fl, flg)); - + error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, + &fl, flg); + break; case F_WRLCK: - if ((fp->f_flag & FWRITE) == 0) - return (EBADF); + if ((fp->f_flag & FWRITE) == 0) { + error = EBADF; + break; + } p->p_flag |= P_ADVLOCK; - return (VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, &fl, flg)); - + error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, + &fl, flg); + break; case F_UNLCK: - return (VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, &fl, - F_POSIX)); - + error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, + &fl, F_POSIX); + break; default: - return (EINVAL); + error = EINVAL; + break; } + fdrop(fp, p); + return(error); case F_GETLK: if (fp->f_type != DTYPE_VNODE) return (EBADF); vp = (struct vnode *)fp->f_data; + /* + * copyin/lockop may block + */ + fhold(fp); /* Copy in the lock structure */ error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl, sizeof(fl)); - if (error) + if (error) { + fdrop(fp, p); return (error); + } if (fl.l_type != F_RDLCK && fl.l_type != F_WRLCK && - fl.l_type != F_UNLCK) + fl.l_type != F_UNLCK) { + fdrop(fp, p); return (EINVAL); + } if (fl.l_whence == SEEK_CUR) fl.l_start += fp->f_offset; - if ((error = VOP_ADVLOCK(vp,(caddr_t)p->p_leader,F_GETLK,&fl,F_POSIX))) - return (error); - return (copyout((caddr_t)&fl, (caddr_t)(intptr_t)uap->arg, - sizeof(fl))); - + error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, + &fl, F_POSIX); + fdrop(fp, p); + if (error == 0) { + error = copyout((caddr_t)&fl, + (caddr_t)(intptr_t)uap->arg, sizeof(fl)); + } + return(error); default: return (EINVAL); } @@ -333,13 +374,29 @@ fcntl(p, uap) * Common code for dup, dup2, and fcntl(F_DUPFD). */ static int -finishdup(fdp, old, new, retval) +do_dup(fdp, old, new, retval, p) register struct filedesc *fdp; register int old, new; register_t *retval; + struct proc *p; { - register struct file *fp; + struct file *fp; + struct file *delfp; + + /* + * Save info on the descriptor being overwritten. We have + * to do the unmap now, but we cannot close it without + * introducing an ownership race for the slot. + */ + delfp = fdp->fd_ofiles[new]; +#if 0 + if (delfp && (fdp->fd_ofileflags[new] & UF_MAPPED)) + (void) munmapfd(p, new); +#endif + /* + * Duplicate the source descriptor, update lastfile + */ fp = fdp->fd_ofiles[old]; fdp->fd_ofiles[new] = fp; fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE; @@ -347,6 +404,14 @@ finishdup(fdp, old, new, retval) if (new > fdp->fd_lastfile) fdp->fd_lastfile = new; *retval = new; + + /* + * If we dup'd over a valid file, we now own the reference to it + * and must dispose of it using closef() semantics (as if a + * close() were performed on it). + */ + if (delfp) + (void) closef(delfp, p); return (0); } @@ -491,20 +556,25 @@ close(p, uap) register struct filedesc *fdp = p->p_fd; register struct file *fp; register int fd = uap->fd; - register u_char *pf; if ((unsigned)fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) return (EBADF); - pf = (u_char *)&fdp->fd_ofileflags[fd]; - if (*pf & UF_MAPPED) +#if 0 + if (fdp->fd_ofileflags[fd] & UF_MAPPED) (void) munmapfd(p, fd); +#endif fdp->fd_ofiles[fd] = NULL; + fdp->fd_ofileflags[fd] = 0; + + /* + * we now hold the fp reference that used to be owned by the descriptor + * array. + */ while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL) fdp->fd_lastfile--; if (fd < fdp->fd_freefile) fdp->fd_freefile = fd; - *pf = 0; if (fd < fdp->fd_knlistsize) knote_fdclose(p, fd); return (closef(fp, p)); @@ -535,11 +605,13 @@ ofstat(p, uap) if ((unsigned)uap->fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[uap->fd]) == NULL) return (EBADF); + fhold(fp); error = fo_stat(fp, &ub, p); if (error == 0) { cvtstat(&ub, &oub); error = copyout((caddr_t)&oub, (caddr_t)uap->sb, sizeof (oub)); } + fdrop(fp, p); return (error); } #endif /* COMPAT_43 || COMPAT_SUNOS */ @@ -567,9 +639,11 @@ fstat(p, uap) if ((unsigned)uap->fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[uap->fd]) == NULL) return (EBADF); + fhold(fp); error = fo_stat(fp, &ub, p); if (error == 0) error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub)); + fdrop(fp, p); return (error); } @@ -597,11 +671,13 @@ nfstat(p, uap) if ((unsigned)uap->fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[uap->fd]) == NULL) return (EBADF); + fhold(fp); error = fo_stat(fp, &ub, p); if (error == 0) { cvtnstat(&ub, &nub); error = copyout((caddr_t)&nub, (caddr_t)uap->sb, sizeof (nub)); } + fdrop(fp, p); return (error); } @@ -623,28 +699,33 @@ fpathconf(p, uap) struct filedesc *fdp = p->p_fd; struct file *fp; struct vnode *vp; + int error = 0; if ((unsigned)uap->fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[uap->fd]) == NULL) return (EBADF); - switch (fp->f_type) { + fhold(fp); + + switch (fp->f_type) { case DTYPE_PIPE: case DTYPE_SOCKET: if (uap->name != _PC_PIPE_BUF) return (EINVAL); p->p_retval[0] = PIPE_BUF; - return (0); - + error = 0; + break; case DTYPE_FIFO: case DTYPE_VNODE: vp = (struct vnode *)fp->f_data; - return (VOP_PATHCONF(vp, uap->name, p->p_retval)); - + error = VOP_PATHCONF(vp, uap->name, p->p_retval); + break; default: - return (EOPNOTSUPP); + error = EOPNOTSUPP; + break; } - /*NOTREACHED*/ + fdrop(fp, p); + return(error); } /* @@ -698,6 +779,15 @@ fdalloc(p, want, result) nfiles = 2 * fdp->fd_nfiles; MALLOC(newofile, struct file **, nfiles * OFILESIZE, M_FILEDESC, M_WAITOK); + + /* + * deal with file-table extend race that might have occured + * when malloc was blocked. + */ + if (fdp->fd_nfiles >= nfiles) { + FREE(newofile, M_FILEDESC); + continue; + } newofileflags = (char *) &newofile[nfiles]; /* * Copy the existing ofile and ofileflags arrays @@ -738,9 +828,10 @@ fdavail(p, n) last = min(fdp->fd_nfiles, lim); fpp = &fdp->fd_ofiles[fdp->fd_freefile]; - for (i = last - fdp->fd_freefile; --i >= 0; fpp++) + for (i = last - fdp->fd_freefile; --i >= 0; fpp++) { if (*fpp == NULL && --n <= 0) return (1); + } return (0); } @@ -757,8 +848,6 @@ falloc(p, resultfp, resultfd) register struct file *fp, *fq; int error, i; - if ((error = fdalloc(p, 0, &i))) - return (error); if (nfiles >= maxfiles) { tablefull("file"); return (ENFILE); @@ -772,6 +861,17 @@ falloc(p, resultfp, resultfd) nfiles++; MALLOC(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK); bzero(fp, sizeof(struct file)); + + /* + * wait until after malloc (which may have blocked) returns before + * allocating the slot, else a race might have shrunk it if we had + * allocated it before the malloc. + */ + if ((error = fdalloc(p, 0, &i))) { + nfiles--; + FREE(fp, M_FILE); + return (error); + } fp->f_count = 1; fp->f_cred = p->p_ucred; fp->f_ops = &badfileops; @@ -797,11 +897,9 @@ void ffree(fp) register struct file *fp; { + KASSERT((fp->f_count == 0), ("ffree: fp_fcount not 0!")); LIST_REMOVE(fp, f_list); crfree(fp->f_cred); -#if defined(DIAGNOSTIC) || defined(INVARIANTS) - fp->f_count = 0; -#endif nfiles--; FREE(fp, M_FILE); } @@ -910,9 +1008,10 @@ fdcopy(p) */ if (newfdp->fd_knlistsize != -1) { fpp = newfdp->fd_ofiles; - for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) + for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) { if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE) *fpp = NULL; + } newfdp->fd_knlist = NULL; newfdp->fd_knlistsize = -1; newfdp->fd_knhash = NULL; @@ -920,9 +1019,10 @@ fdcopy(p) } fpp = newfdp->fd_ofiles; - for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) + for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) { if (*fpp != NULL) fhold(*fpp); + } return (newfdp); } @@ -943,10 +1043,15 @@ fdfree(p) if (--fdp->fd_refcnt > 0) return; + /* + * we are the last reference to the structure, we can + * safely assume it will not change out from under us. + */ fpp = fdp->fd_ofiles; - for (i = fdp->fd_lastfile; i-- >= 0; fpp++) + for (i = fdp->fd_lastfile; i-- >= 0; fpp++) { if (*fpp) (void) closef(*fpp, p); + } if (fdp->fd_nfiles > NDFILE) FREE(fdp->fd_ofiles, M_FILEDESC); if (fdp->fd_cdir) @@ -991,29 +1096,38 @@ setugidsafety(p) struct proc *p; { struct filedesc *fdp = p->p_fd; - struct file **fpp; - char *fdfp; register int i; /* Certain daemons might not have file descriptors. */ if (fdp == NULL) return; - fpp = fdp->fd_ofiles; - fdfp = fdp->fd_ofileflags; - for (i = 0; i <= fdp->fd_lastfile; i++, fpp++, fdfp++) { + /* + * note: fdp->fd_ofiles may be reallocated out from under us while + * we are blocked in a close. Be careful! + */ + for (i = 0; i <= fdp->fd_lastfile; i++) { if (i > 2) break; - if (*fpp != NULL && is_unsafe(*fpp)) { - if ((*fdfp & UF_MAPPED) != 0) + if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) { + struct file *fp; + +#if 0 + if ((fdp->fd_ofileflags[i] & UF_MAPPED) != 0) (void) munmapfd(p, i); +#endif if (i < fdp->fd_knlistsize) knote_fdclose(p, i); - (void) closef(*fpp, p); - *fpp = NULL; - *fdfp = 0; + /* + * NULL-out descriptor prior to close to avoid + * a race while close blocks. + */ + fp = fdp->fd_ofiles[i]; + fdp->fd_ofiles[i] = NULL; + fdp->fd_ofileflags[i] = 0; if (i < fdp->fd_freefile) fdp->fd_freefile = i; + (void) closef(fp, p); } } while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL) @@ -1028,28 +1142,39 @@ fdcloseexec(p) struct proc *p; { struct filedesc *fdp = p->p_fd; - struct file **fpp; - char *fdfp; register int i; /* Certain daemons might not have file descriptors. */ if (fdp == NULL) return; - fpp = fdp->fd_ofiles; - fdfp = fdp->fd_ofileflags; - for (i = 0; i <= fdp->fd_lastfile; i++, fpp++, fdfp++) - if (*fpp != NULL && (*fdfp & UF_EXCLOSE)) { - if (*fdfp & UF_MAPPED) + /* + * We cannot cache fd_ofiles or fd_ofileflags since operations + * may block and rip them out from under us. + */ + for (i = 0; i <= fdp->fd_lastfile; i++) { + if (fdp->fd_ofiles[i] != NULL && + (fdp->fd_ofileflags[i] & UF_EXCLOSE)) { + struct file *fp; + +#if 0 + if (fdp->fd_ofileflags[i] & UF_MAPPED) (void) munmapfd(p, i); +#endif if (i < fdp->fd_knlistsize) knote_fdclose(p, i); - (void) closef(*fpp, p); - *fpp = NULL; - *fdfp = 0; + /* + * NULL-out descriptor prior to close to avoid + * a race while close blocks. + */ + fp = fdp->fd_ofiles[i]; + fdp->fd_ofiles[i] = NULL; + fdp->fd_ofileflags[i] = 0; if (i < fdp->fd_freefile) fdp->fd_freefile = i; + (void) closef(fp, p); } + } while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL) fdp->fd_lastfile--; } @@ -1199,9 +1324,10 @@ fdopen(dev, mode, type, p) * Duplicate the specified descriptor to a free descriptor. */ int -dupfdopen(fdp, indx, dfd, mode, error) - register struct filedesc *fdp; - register int indx, dfd; +dupfdopen(p, fdp, indx, dfd, mode, error) + struct proc *p; + struct filedesc *fdp; + int indx, dfd; int mode; int error; { @@ -1211,14 +1337,12 @@ dupfdopen(fdp, indx, dfd, mode, error) /* * If the to-be-dup'd fd number is greater than the allowed number * of file descriptors, or the fd to be dup'd has already been - * closed, reject. Note, check for new == old is necessary as - * falloc could allocate an already closed to-be-dup'd descriptor - * as the new descriptor. + * closed, then reject. */ - fp = fdp->fd_ofiles[indx]; if ((u_int)dfd >= fdp->fd_nfiles || - (wfp = fdp->fd_ofiles[dfd]) == NULL || fp == wfp) + (wfp = fdp->fd_ofiles[dfd]) == NULL) { return (EBADF); + } /* * There are two cases of interest here. @@ -1240,33 +1364,58 @@ dupfdopen(fdp, indx, dfd, mode, error) */ if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) return (EACCES); + fp = fdp->fd_ofiles[indx]; +#if 0 + if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED) + (void) munmapfd(p, indx); +#endif fdp->fd_ofiles[indx] = wfp; fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; fhold(wfp); if (indx > fdp->fd_lastfile) fdp->fd_lastfile = indx; + /* + * we now own the reference to fp that the ofiles[] array + * used to own. Release it. + */ + if (fp) + fdrop(fp, p); return (0); case ENXIO: /* * Steal away the file pointer from dfd, and stuff it into indx. */ + fp = fdp->fd_ofiles[indx]; +#if 0 + if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED) + (void) munmapfd(p, indx); +#endif fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd]; fdp->fd_ofiles[dfd] = NULL; fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; fdp->fd_ofileflags[dfd] = 0; + + /* + * we now own the reference to fp that the ofiles[] array + * used to own. Release it. + */ + if (fp) + fdrop(fp, p); /* * Complete the clean up of the filedesc structure by * recomputing the various hints. */ - if (indx > fdp->fd_lastfile) + if (indx > fdp->fd_lastfile) { fdp->fd_lastfile = indx; - else + } else { while (fdp->fd_lastfile > 0 && - fdp->fd_ofiles[fdp->fd_lastfile] == NULL) + fdp->fd_ofiles[fdp->fd_lastfile] == NULL) { fdp->fd_lastfile--; + } if (dfd < fdp->fd_freefile) fdp->fd_freefile = dfd; + } return (0); default: diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c index 0444d7e..64d8d9f 100644 --- a/sys/kern/kern_event.c +++ b/sys/kern/kern_event.c @@ -333,7 +333,7 @@ kevent(struct proc *p, struct kevent_args *uap) struct filedesc* fdp = p->p_fd; struct kevent *kevp; struct kqueue *kq; - struct file *fp; + struct file *fp = NULL; struct timespec ts; int i, n, nerrors, error; @@ -342,10 +342,12 @@ kevent(struct proc *p, struct kevent_args *uap) (fp->f_type != DTYPE_KQUEUE)) return (EBADF); + fhold(fp); + if (uap->timeout != NULL) { error = copyin(uap->timeout, &ts, sizeof(ts)); if (error) - return error; + goto done; uap->timeout = &ts; } @@ -357,7 +359,7 @@ kevent(struct proc *p, struct kevent_args *uap) error = copyin(uap->changelist, kq->kq_kev, n * sizeof(struct kevent)); if (error) - return (error); + goto done; for (i = 0; i < n; i++) { kevp = &kq->kq_kev[i]; kevp->flags &= ~EV_SYSFLAGS; @@ -373,7 +375,7 @@ kevent(struct proc *p, struct kevent_args *uap) uap->nevents--; nerrors++; } else { - return (error); + goto done; } } } @@ -382,10 +384,14 @@ kevent(struct proc *p, struct kevent_args *uap) } if (nerrors) { p->p_retval[0] = nerrors; - return (0); + error = 0; + goto done; } error = kqueue_scan(fp, uap->nevents, uap->eventlist, uap->timeout, p); +done: + if (fp != NULL) + fdrop(fp, p); return (error); } @@ -417,6 +423,7 @@ kqueue_register(struct kqueue *kq, struct kevent *kev, struct proc *p) if ((u_int)kev->ident >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[kev->ident]) == NULL) return (EBADF); + fhold(fp); if (kev->ident < fdp->fd_knlistsize) { SLIST_FOREACH(kn, &fdp->fd_knlist[kev->ident], kn_link) @@ -438,8 +445,10 @@ kqueue_register(struct kqueue *kq, struct kevent *kev, struct proc *p) } } - if (kn == NULL && ((kev->flags & EV_ADD) == 0)) - return (ENOENT); + if (kn == NULL && ((kev->flags & EV_ADD) == 0)) { + error = ENOENT; + goto done; + } /* * kn now contains the matching knote, or NULL if no match @@ -448,14 +457,20 @@ kqueue_register(struct kqueue *kq, struct kevent *kev, struct proc *p) if (kn == NULL) { kn = knote_alloc(); - if (kn == NULL) - return (ENOMEM); - if (fp != NULL) - fhold(fp); + if (kn == NULL) { + error = ENOMEM; + goto done; + } kn->kn_fp = fp; kn->kn_kq = kq; kn->kn_fop = fops; + /* + * apply reference count to knode structure, so + * do not release it at the end of this routine. + */ + fp = NULL; + kn->kn_sfflags = kev->fflags; kn->kn_sdata = kev->data; kev->fflags = 0; @@ -506,6 +521,8 @@ kqueue_register(struct kqueue *kq, struct kevent *kev, struct proc *p) } done: + if (fp != NULL) + fdrop(fp, p); return (error); } diff --git a/sys/kern/kern_random.c b/sys/kern/kern_random.c new file mode 100644 index 0000000..901715b --- /dev/null +++ b/sys/kern/kern_random.c @@ -0,0 +1,396 @@ +/* + * kern_random.c -- A strong random number generator + * + * $FreeBSD$ + * + * Version 0.95, last modified 18-Oct-95 + * + * Copyright Theodore Ts'o, 1994, 1995. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, and the entire permission notice in its entirety, + * including the disclaimer of warranties. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * ALTERNATIVELY, this product may be distributed under the terms of + * the GNU Public License, in which case the provisions of the GPL are + * required INSTEAD OF the above restrictions. (This clause is + * necessary due to a potential bad interaction between the GPL and + * the restrictions contained in a BSD-style copyright.) + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/md5.h> +#include <sys/poll.h> +#include <sys/random.h> +#include <sys/systm.h> +#include <sys/select.h> +#include <sys/timetc.h> + +#include <machine/ipl.h> +#include <machine/mutex.h> + +#ifdef __i386__ +#include <i386/isa/icu.h> +#endif +#ifdef __alpha__ +/* + XXX the below should be used. However there is too much "16" + hardcodeing in kern_random.c right now. -- obrien +#include <machine/ipl.h> +#if NHWI > 0 +#define ICU_LEN (NHWI) +#else +#define ICU_LEN (NSWI) +#endif +*/ +#define ICU_LEN 16 +#endif + +#define MAX_BLKDEV 4 + +/* + * The pool is stirred with a primitive polynomial of degree 128 + * over GF(2), namely x^128 + x^99 + x^59 + x^31 + x^9 + x^7 + 1. + * For a pool of size 64, try x^64+x^62+x^38+x^10+x^6+x+1. + */ +#define POOLWORDS 128 /* Power of 2 - note that this is 32-bit words */ +#define POOLBITS (POOLWORDS*32) + +#if POOLWORDS == 128 +#define TAP1 99 /* The polynomial taps */ +#define TAP2 59 +#define TAP3 31 +#define TAP4 9 +#define TAP5 7 +#elif POOLWORDS == 64 +#define TAP1 62 /* The polynomial taps */ +#define TAP2 38 +#define TAP3 10 +#define TAP4 6 +#define TAP5 1 +#else +#error No primitive polynomial available for chosen POOLWORDS +#endif + +#define WRITEBUFFER 512 /* size in bytes */ + +/* There is actually only one of these, globally. */ +struct random_bucket { + u_int add_ptr; + u_int entropy_count; + int input_rotate; + u_int32_t *pool; + struct selinfo rsel; +}; + +/* There is one of these per entropy source */ +struct timer_rand_state { + u_long last_time; + int last_delta; + int nbits; +}; + +static struct random_bucket random_state; +static u_int32_t random_pool[POOLWORDS]; +static struct timer_rand_state keyboard_timer_state; +static struct timer_rand_state extract_timer_state; +static struct timer_rand_state irq_timer_state[ICU_LEN]; +#ifdef notyet +static struct timer_rand_state blkdev_timer_state[MAX_BLKDEV]; +#endif +static struct wait_queue *random_wait; + +#ifndef MIN +#define MIN(a,b) (((a) < (b)) ? (a) : (b)) +#endif + +void +rand_initialize(void) +{ + random_state.add_ptr = 0; + random_state.entropy_count = 0; + random_state.pool = random_pool; + random_wait = NULL; + random_state.rsel.si_flags = 0; + random_state.rsel.si_pid = 0; +} + +/* + * This function adds an int into the entropy "pool". It does not + * update the entropy estimate. The caller must do this if appropriate. + * + * The pool is stirred with a primitive polynomial of degree 128 + * over GF(2), namely x^128 + x^99 + x^59 + x^31 + x^9 + x^7 + 1. + * For a pool of size 64, try x^64+x^62+x^38+x^10+x^6+x+1. + * + * We rotate the input word by a changing number of bits, to help + * assure that all bits in the entropy get toggled. Otherwise, if we + * consistently feed the entropy pool small numbers (like ticks and + * scancodes, for example), the upper bits of the entropy pool don't + * get affected. --- TYT, 10/11/95 + */ +static __inline void +add_entropy_word(struct random_bucket *r, const u_int32_t input) +{ + u_int i; + u_int32_t w; + + w = (input << r->input_rotate) | (input >> (32 - r->input_rotate)); + i = r->add_ptr = (r->add_ptr - 1) & (POOLWORDS-1); + if (i) + r->input_rotate = (r->input_rotate + 7) & 31; + else + /* + * At the beginning of the pool, add an extra 7 bits + * rotation, so that successive passes spread the + * input bits across the pool evenly. + */ + r->input_rotate = (r->input_rotate + 14) & 31; + + /* XOR in the various taps */ + w ^= r->pool[(i+TAP1)&(POOLWORDS-1)]; + w ^= r->pool[(i+TAP2)&(POOLWORDS-1)]; + w ^= r->pool[(i+TAP3)&(POOLWORDS-1)]; + w ^= r->pool[(i+TAP4)&(POOLWORDS-1)]; + w ^= r->pool[(i+TAP5)&(POOLWORDS-1)]; + w ^= r->pool[i]; + /* Rotate w left 1 bit (stolen from SHA) and store */ + r->pool[i] = (w << 1) | (w >> 31); +} + +/* + * This function adds entropy to the entropy "pool" by using timing + * delays. It uses the timer_rand_state structure to make an estimate + * of how any bits of entropy this call has added to the pool. + * + * The number "num" is also added to the pool - it should somehow describe + * the type of event which just happened. This is currently 0-255 for + * keyboard scan codes, and 256 upwards for interrupts. + * On the i386, this is assumed to be at most 16 bits, and the high bits + * are used for a high-resolution timer. + */ +static void +add_timer_randomness(struct random_bucket *r, struct timer_rand_state *state, + u_int num) +{ + int delta, delta2; + u_int nbits; + u_int32_t time; + + num ^= timecounter->tc_get_timecount(timecounter) << 16; + r->entropy_count += 2; + + time = ticks; + + add_entropy_word(r, (u_int32_t) num); + add_entropy_word(r, time); + + /* + * Calculate number of bits of randomness we probably + * added. We take into account the first and second order + * deltas in order to make our estimate. + */ + delta = time - state->last_time; + state->last_time = time; + + delta2 = delta - state->last_delta; + state->last_delta = delta; + + if (delta < 0) delta = -delta; + if (delta2 < 0) delta2 = -delta2; + delta = MIN(delta, delta2) >> 1; + for (nbits = 0; delta; nbits++) + delta >>= 1; + + r->entropy_count += nbits; + + /* Prevent overflow */ + if (r->entropy_count > POOLBITS) + r->entropy_count = POOLBITS; + + if (r->entropy_count >= 8) + selwakeup(&random_state.rsel); +} + +void +add_keyboard_randomness(u_char scancode) +{ + add_timer_randomness(&random_state, &keyboard_timer_state, scancode); +} + +void +add_interrupt_randomness(void *vsc) +{ + int intr; + struct random_softc *sc = vsc; + + (sc->sc_handler)(sc->sc_arg); + intr = sc->sc_intr; + add_timer_randomness(&random_state, &irq_timer_state[intr], intr); +} + +#ifdef notused +void +add_blkdev_randomness(int major) +{ + if (major >= MAX_BLKDEV) + return; + + add_timer_randomness(&random_state, &blkdev_timer_state[major], + 0x200+major); +} +#endif /* notused */ + +#if POOLWORDS % 16 +#error extract_entropy() assumes that POOLWORDS is a multiple of 16 words. +#endif +/* + * This function extracts randomness from the "entropy pool", and + * returns it in a buffer. This function computes how many remaining + * bits of entropy are left in the pool, but it does not restrict the + * number of bytes that are actually obtained. + */ +static __inline int +extract_entropy(struct random_bucket *r, char *buf, int nbytes) +{ + int ret, i; + u_int32_t tmp[4]; + + add_timer_randomness(r, &extract_timer_state, nbytes); + + /* Redundant, but just in case... */ + if (r->entropy_count > POOLBITS) + r->entropy_count = POOLBITS; + /* Why is this here? Left in from Ted Ts'o. Perhaps to limit time. */ + if (nbytes > 32768) + nbytes = 32768; + + ret = nbytes; + if (r->entropy_count / 8 >= nbytes) + r->entropy_count -= nbytes*8; + else + r->entropy_count = 0; + + while (nbytes) { + /* Hash the pool to get the output */ + tmp[0] = 0x67452301; + tmp[1] = 0xefcdab89; + tmp[2] = 0x98badcfe; + tmp[3] = 0x10325476; + for (i = 0; i < POOLWORDS; i += 16) + MD5Transform(tmp, (char *)(r->pool+i)); + /* Modify pool so next hash will produce different results */ + add_entropy_word(r, tmp[0]); + add_entropy_word(r, tmp[1]); + add_entropy_word(r, tmp[2]); + add_entropy_word(r, tmp[3]); + /* + * Run the MD5 Transform one more time, since we want + * to add at least minimal obscuring of the inputs to + * add_entropy_word(). --- TYT + */ + MD5Transform(tmp, (char *)(r->pool)); + + /* Copy data to destination buffer */ + i = MIN(nbytes, 16); + bcopy(tmp, buf, i); + nbytes -= i; + buf += i; + } + + /* Wipe data from memory */ + bzero(tmp, sizeof(tmp)); + + return ret; +} + +#ifdef notused /* XXX NOT the exported kernel interface */ +/* + * This function is the exported kernel interface. It returns some + * number of good random numbers, suitable for seeding TCP sequence + * numbers, etc. + */ +void +get_random_bytes(void *buf, u_int nbytes) +{ + extract_entropy(&random_state, (char *) buf, nbytes); +} +#endif /* notused */ + +u_int +read_random(void *buf, u_int nbytes) +{ + if ((nbytes * 8) > random_state.entropy_count) + nbytes = random_state.entropy_count / 8; + + return extract_entropy(&random_state, (char *)buf, nbytes); +} + +u_int +read_random_unlimited(void *buf, u_int nbytes) +{ + return extract_entropy(&random_state, (char *)buf, nbytes); +} + +#ifdef notused +u_int +write_random(const char *buf, u_int nbytes) +{ + u_int i; + u_int32_t word, *p; + + for (i = nbytes, p = (u_int32_t *)buf; + i >= sizeof(u_int32_t); + i-= sizeof(u_int32_t), p++) + add_entropy_word(&random_state, *p); + if (i) { + word = 0; + bcopy(p, &word, i); + add_entropy_word(&random_state, word); + } + return nbytes; +} +#endif /* notused */ + +int +random_poll(dev_t dev, int events, struct proc *p) +{ + int revents = 0; + + mtx_enter_sched_quick(); + if (events & (POLLIN | POLLRDNORM)) { + if (random_state.entropy_count >= 8) + revents |= events & (POLLIN | POLLRDNORM); + else + selrecord(p, &random_state.rsel); + } + mtx_exit_sched_quick(); + if (events & (POLLOUT | POLLWRNORM)) + revents |= events & (POLLOUT | POLLWRNORM); /* heh */ + + return (revents); +} + diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c index 8893063..635db71 100644 --- a/sys/kern/sys_generic.c +++ b/sys/kern/sys_generic.c @@ -57,9 +57,13 @@ #include <sys/poll.h> #include <sys/sysctl.h> #include <sys/sysent.h> +#include <sys/bio.h> +#include <sys/buf.h> #ifdef KTRACE #include <sys/ktrace.h> #endif +#include <vm/vm.h> +#include <vm/vm_page.h> #include <machine/limits.h> @@ -75,7 +79,7 @@ static int dofilewrite __P((struct proc *, struct file *, int, const void *, size_t, off_t, int)); struct file* -getfp(fdp, fd, flag) +holdfp(fdp, fd, flag) struct filedesc* fdp; int fd, flag; { @@ -83,8 +87,10 @@ getfp(fdp, fd, flag) if (((u_int)fd) >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL || - (fp->f_flag & flag) == 0) + (fp->f_flag & flag) == 0) { return (NULL); + } + fhold(fp); return (fp); } @@ -104,10 +110,13 @@ read(p, uap) register struct read_args *uap; { register struct file *fp; + int error; - if ((fp = getfp(p->p_fd, uap->fd, FREAD)) == NULL) + if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL) return (EBADF); - return (dofileread(p, fp, uap->fd, uap->buf, uap->nbyte, (off_t)-1, 0)); + error = dofileread(p, fp, uap->fd, uap->buf, uap->nbyte, (off_t)-1, 0); + fdrop(fp, p); + return(error); } /* @@ -128,13 +137,18 @@ pread(p, uap) register struct pread_args *uap; { register struct file *fp; + int error; - if ((fp = getfp(p->p_fd, uap->fd, FREAD)) == NULL) + if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL) return (EBADF); - if (fp->f_type != DTYPE_VNODE) - return (ESPIPE); - return (dofileread(p, fp, uap->fd, uap->buf, uap->nbyte, uap->offset, - FOF_OFFSET)); + if (fp->f_type != DTYPE_VNODE) { + error = ESPIPE; + } else { + error = dofileread(p, fp, uap->fd, uap->buf, uap->nbyte, + uap->offset, FOF_OFFSET); + } + fdrop(fp, p); + return(error); } /* @@ -180,10 +194,12 @@ dofileread(p, fp, fd, buf, nbyte, offset, flags) } #endif cnt = nbyte; - if ((error = fo_read(fp, &auio, fp->f_cred, flags, p))) + + if ((error = fo_read(fp, &auio, fp->f_cred, flags, p))) { if (auio.uio_resid != cnt && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) error = 0; + } cnt -= auio.uio_resid; #ifdef KTRACE if (didktr && error == 0) { @@ -224,7 +240,7 @@ readv(p, uap) struct uio ktruio; #endif - if ((fp = getfp(fdp, uap->fd, FREAD)) == NULL) + if ((fp = holdfp(fdp, uap->fd, FREAD)) == NULL) return (EBADF); /* note: can't use iovlen until iovcnt is validated */ iovlen = uap->iovcnt * sizeof (struct iovec); @@ -265,10 +281,11 @@ readv(p, uap) } #endif cnt = auio.uio_resid; - if ((error = fo_read(fp, &auio, fp->f_cred, 0, p))) + if ((error = fo_read(fp, &auio, fp->f_cred, 0, p))) { if (auio.uio_resid != cnt && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) error = 0; + } cnt -= auio.uio_resid; #ifdef KTRACE if (ktriov != NULL) { @@ -283,6 +300,7 @@ readv(p, uap) #endif p->p_retval[0] = cnt; done: + fdrop(fp, p); if (needfree) FREE(needfree, M_IOV); return (error); @@ -304,10 +322,13 @@ write(p, uap) register struct write_args *uap; { register struct file *fp; + int error; - if ((fp = getfp(p->p_fd, uap->fd, FWRITE)) == NULL) + if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL) return (EBADF); - return (dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte, (off_t)-1, 0)); + error = dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte, (off_t)-1, 0); + fdrop(fp, p); + return(error); } /* @@ -328,13 +349,18 @@ pwrite(p, uap) register struct pwrite_args *uap; { register struct file *fp; + int error; - if ((fp = getfp(p->p_fd, uap->fd, FWRITE)) == NULL) + if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL) return (EBADF); - if (fp->f_type != DTYPE_VNODE) - return (ESPIPE); - return (dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte, uap->offset, - FOF_OFFSET)); + if (fp->f_type != DTYPE_VNODE) { + error = ESPIPE; + } else { + error = dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte, + uap->offset, FOF_OFFSET); + } + fdrop(fp, p); + return(error); } static int @@ -377,6 +403,7 @@ dofilewrite(p, fp, fd, buf, nbyte, offset, flags) } #endif cnt = nbyte; + bwillwrite(); if ((error = fo_write(fp, &auio, fp->f_cred, flags, p))) { if (auio.uio_resid != cnt && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) @@ -424,9 +451,8 @@ writev(p, uap) struct uio ktruio; #endif - if ((fp = getfp(fdp, uap->fd, FWRITE)) == NULL) + if ((fp = holdfp(fdp, uap->fd, FWRITE)) == NULL) return (EBADF); - fhold(fp); /* note: can't use iovlen until iovcnt is validated */ iovlen = uap->iovcnt * sizeof (struct iovec); if (uap->iovcnt > UIO_SMALLIOV) { @@ -549,30 +575,37 @@ ioctl(p, uap) size = IOCPARM_LEN(com); if (size > IOCPARM_MAX) return (ENOTTY); + + fhold(fp); + memp = NULL; if (size > sizeof (ubuf.stkbuf)) { memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); data = memp; - } else + } else { data = ubuf.stkbuf; + } if (com&IOC_IN) { if (size) { error = copyin(uap->data, data, (u_int)size); if (error) { if (memp) free(memp, M_IOCTLOPS); + fdrop(fp, p); return (error); } - } else + } else { *(caddr_t *)data = uap->data; - } else if ((com&IOC_OUT) && size) + } + } else if ((com&IOC_OUT) && size) { /* * Zero the buffer so the user always * gets back something deterministic. */ bzero(data, size); - else if (com&IOC_VOID) + } else if (com&IOC_VOID) { *(caddr_t *)data = uap->data; + } switch (com) { @@ -604,6 +637,7 @@ ioctl(p, uap) } if (memp) free(memp, M_IOCTLOPS); + fdrop(fp, p); return (error); } @@ -900,7 +934,7 @@ pollscan(p, fds, nfd) fds->revents = 0; } else { fp = fdp->fd_ofiles[fds->fd]; - if (fp == 0) { + if (fp == NULL) { fds->revents = POLLNVAL; n++; } else { diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c index 7600ffd..4c505e7 100644 --- a/sys/kern/sys_pipe.c +++ b/sys/kern/sys_pipe.c @@ -185,6 +185,7 @@ pipe(p, uap) error = falloc(p, &rf, &fd); if (error) goto free2; + fhold(rf); p->p_retval[0] = fd; rf->f_flag = FREAD | FWRITE; rf->f_type = DTYPE_PIPE; @@ -201,11 +202,15 @@ pipe(p, uap) rpipe->pipe_peer = wpipe; wpipe->pipe_peer = rpipe; + fdrop(rf, p); return (0); free3: - fdp->fd_ofiles[p->p_retval[0]] = 0; - ffree(rf); + if (fdp->fd_ofiles[p->p_retval[0]] == rf) { + fdp->fd_ofiles[p->p_retval[0]] = NULL; + fdrop(rf, p); + } + fdrop(rf, p); free2: (void)pipeclose(wpipe); (void)pipeclose(rpipe); diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c index ae7b7e9..fa6cb6a 100644 --- a/sys/kern/uipc_syscalls.c +++ b/sys/kern/uipc_syscalls.c @@ -123,10 +123,13 @@ socket(p, uap) error = falloc(p, &fp, &fd); if (error) return (error); + fhold(fp); error = socreate(uap->domain, &so, uap->type, uap->protocol, p); if (error) { - fdp->fd_ofiles[fd] = 0; - ffree(fp); + if (fdp->fd_ofiles[fd] == fp) { + fdp->fd_ofiles[fd] = NULL; + fdrop(fp, p); + } } else { fp->f_data = (caddr_t)so; fp->f_flag = FREAD|FWRITE; @@ -134,6 +137,7 @@ socket(p, uap) fp->f_type = DTYPE_SOCKET; p->p_retval[0] = fd; } + fdrop(fp, p); return (error); } @@ -151,14 +155,17 @@ bind(p, uap) struct sockaddr *sa; int error; - error = getsock(p->p_fd, uap->s, &fp); + error = holdsock(p->p_fd, uap->s, &fp); if (error) return (error); error = getsockaddr(&sa, uap->name, uap->namelen); - if (error) + if (error) { + fdrop(fp, p); return (error); + } error = sobind((struct socket *)fp->f_data, sa, p); FREE(sa, M_SONAME); + fdrop(fp, p); return (error); } @@ -174,10 +181,12 @@ listen(p, uap) struct file *fp; int error; - error = getsock(p->p_fd, uap->s, &fp); + error = holdsock(p->p_fd, uap->s, &fp); if (error) return (error); - return (solisten((struct socket *)fp->f_data, uap->backlog, p)); + error = solisten((struct socket *)fp->f_data, uap->backlog, p); + fdrop(fp, p); + return(error); } static int @@ -191,7 +200,8 @@ accept1(p, uap, compat) int compat; { struct filedesc *fdp = p->p_fd; - struct file *fp; + struct file *lfp = NULL; + struct file *nfp = NULL; struct sockaddr *sa; int namelen, error, s; struct socket *head, *so; @@ -204,18 +214,20 @@ accept1(p, uap, compat) if(error) return (error); } - error = getsock(fdp, uap->s, &fp); + error = holdsock(fdp, uap->s, &lfp); if (error) return (error); s = splnet(); - head = (struct socket *)fp->f_data; + head = (struct socket *)lfp->f_data; if ((head->so_options & SO_ACCEPTCONN) == 0) { splx(s); - return (EINVAL); + error = EINVAL; + goto done; } if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) { splx(s); - return (EWOULDBLOCK); + error = EWOULDBLOCK; + goto done; } while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { if (head->so_state & SS_CANTRCVMORE) { @@ -226,14 +238,14 @@ accept1(p, uap, compat) "accept", 0); if (error) { splx(s); - return (error); + goto done; } } if (head->so_error) { error = head->so_error; head->so_error = 0; splx(s); - return (error); + goto done; } /* @@ -247,8 +259,8 @@ accept1(p, uap, compat) TAILQ_REMOVE(&head->so_comp, so, so_list); head->so_qlen--; - fflag = fp->f_flag; - error = falloc(p, &fp, &fd); + fflag = lfp->f_flag; + error = falloc(p, &nfp, &fd); if (error) { /* * Probably ran out of file descriptors. Put the @@ -260,9 +272,10 @@ accept1(p, uap, compat) head->so_qlen++; wakeup_one(&head->so_timeo); splx(s); - return (error); - } else - p->p_retval[0] = fd; + goto done; + } + fhold(nfp); + p->p_retval[0] = fd; /* connection has been removed from the listen queue */ KNOTE(&head->so_rcv.sb_sel.si_note, 0); @@ -272,18 +285,19 @@ accept1(p, uap, compat) if (head->so_sigio != NULL) fsetown(fgetown(head->so_sigio), &so->so_sigio); - fp->f_data = (caddr_t)so; - fp->f_flag = fflag; - fp->f_ops = &socketops; - fp->f_type = DTYPE_SOCKET; + nfp->f_data = (caddr_t)so; + nfp->f_flag = fflag; + nfp->f_ops = &socketops; + nfp->f_type = DTYPE_SOCKET; sa = 0; (void) soaccept(so, &sa); - if (sa == 0) { + if (sa == NULL) { namelen = 0; if (uap->name) goto gotnoname; splx(s); - return 0; + error = 0; + goto done; } if (uap->name) { /* check sa_len before it is destroyed */ @@ -302,11 +316,26 @@ gotnoname: } if (sa) FREE(sa, M_SONAME); + + /* + * close the new descriptor, assuming someone hasn't ripped it + * out from under us. + */ if (error) { - fdp->fd_ofiles[fd] = 0; - ffree(fp); + if (fdp->fd_ofiles[fd] == nfp) { + fdp->fd_ofiles[fd] = NULL; + fdrop(nfp, p); + } } splx(s); + + /* + * Release explicitly held references before returning. + */ +done: + if (nfp != NULL) + fdrop(nfp, p); + fdrop(lfp, p); return (error); } @@ -345,21 +374,24 @@ connect(p, uap) struct sockaddr *sa; int error, s; - error = getsock(p->p_fd, uap->s, &fp); + error = holdsock(p->p_fd, uap->s, &fp); if (error) return (error); so = (struct socket *)fp->f_data; - if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) - return (EALREADY); + if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { + error = EALREADY; + goto done; + } error = getsockaddr(&sa, uap->name, uap->namelen); if (error) - return (error); + goto done; error = soconnect(so, sa, p); if (error) goto bad; if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { FREE(sa, M_SONAME); - return (EINPROGRESS); + error = EINPROGRESS; + goto done; } s = splnet(); while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { @@ -378,6 +410,8 @@ bad: FREE(sa, M_SONAME); if (error == ERESTART) error = EINTR; +done: + fdrop(fp, p); return (error); } @@ -405,11 +439,13 @@ socketpair(p, uap) error = falloc(p, &fp1, &fd); if (error) goto free2; + fhold(fp1); sv[0] = fd; fp1->f_data = (caddr_t)so1; error = falloc(p, &fp2, &fd); if (error) goto free3; + fhold(fp2); fp2->f_data = (caddr_t)so2; sv[1] = fd; error = soconnect2(so1, so2); @@ -427,13 +463,21 @@ socketpair(p, uap) fp1->f_ops = fp2->f_ops = &socketops; fp1->f_type = fp2->f_type = DTYPE_SOCKET; error = copyout((caddr_t)sv, (caddr_t)uap->rsv, 2 * sizeof (int)); + fdrop(fp1, p); + fdrop(fp2, p); return (error); free4: - fdp->fd_ofiles[sv[1]] = 0; - ffree(fp2); + if (fdp->fd_ofiles[sv[1]] == fp2) { + fdp->fd_ofiles[sv[1]] = NULL; + fdrop(fp2, p); + } + fdrop(fp2, p); free3: - fdp->fd_ofiles[sv[0]] = 0; - ffree(fp1); + if (fdp->fd_ofiles[sv[0]] == fp1) { + fdp->fd_ofiles[sv[0]] = NULL; + fdrop(fp1, p); + } + fdrop(fp1, p); free2: (void)soclose(so2); free1: @@ -461,7 +505,7 @@ sendit(p, s, mp, flags) struct uio ktruio; #endif - error = getsock(p->p_fd, s, &fp); + error = holdsock(p->p_fd, s, &fp); if (error) return (error); auio.uio_iov = mp->msg_iov; @@ -473,15 +517,20 @@ sendit(p, s, mp, flags) auio.uio_resid = 0; iov = mp->msg_iov; for (i = 0; i < mp->msg_iovlen; i++, iov++) { - if ((auio.uio_resid += iov->iov_len) < 0) + if ((auio.uio_resid += iov->iov_len) < 0) { + fdrop(fp, p); return (EINVAL); + } } if (mp->msg_name) { error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); - if (error) + if (error) { + fdrop(fp, p); return (error); - } else + } + } else { to = 0; + } if (mp->msg_control) { if (mp->msg_controllen < sizeof(struct cmsghdr) #ifdef COMPAT_OLDSOCK @@ -511,8 +560,9 @@ sendit(p, s, mp, flags) } } #endif - } else + } else { control = 0; + } #ifdef KTRACE if (KTRPOINT(p, KTR_GENIO)) { int iovlen = auio.uio_iovcnt * sizeof (struct iovec); @@ -546,6 +596,7 @@ sendit(p, s, mp, flags) } #endif bad: + fdrop(fp, p); if (to) FREE(to, M_SONAME); return (error); @@ -702,7 +753,7 @@ recvit(p, s, mp, namelenp) struct uio ktruio; #endif - error = getsock(p->p_fd, s, &fp); + error = holdsock(p->p_fd, s, &fp); if (error) return (error); auio.uio_iov = mp->msg_iov; @@ -714,8 +765,10 @@ recvit(p, s, mp, namelenp) auio.uio_resid = 0; iov = mp->msg_iov; for (i = 0; i < mp->msg_iovlen; i++, iov++) { - if ((auio.uio_resid += iov->iov_len) < 0) + if ((auio.uio_resid += iov->iov_len) < 0) { + fdrop(fp, p); return (EINVAL); + } } #ifdef KTRACE if (KTRPOINT(p, KTR_GENIO)) { @@ -827,6 +880,7 @@ recvit(p, s, mp, namelenp) mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; } out: + fdrop(fp, p); if (fromsa) FREE(fromsa, M_SONAME); if (control) @@ -1011,10 +1065,12 @@ shutdown(p, uap) struct file *fp; int error; - error = getsock(p->p_fd, uap->s, &fp); + error = holdsock(p->p_fd, uap->s, &fp); if (error) return (error); - return (soshutdown((struct socket *)fp->f_data, uap->how)); + error = soshutdown((struct socket *)fp->f_data, uap->how); + fdrop(fp, p); + return(error); } /* ARGSUSED */ @@ -1038,7 +1094,7 @@ setsockopt(p, uap) if (uap->valsize < 0) return (EINVAL); - error = getsock(p->p_fd, uap->s, &fp); + error = holdsock(p->p_fd, uap->s, &fp); if (error) return (error); @@ -1048,8 +1104,9 @@ setsockopt(p, uap) sopt.sopt_val = uap->val; sopt.sopt_valsize = uap->valsize; sopt.sopt_p = p; - - return (sosetopt((struct socket *)fp->f_data, &sopt)); + error = sosetopt((struct socket *)fp->f_data, &sopt); + fdrop(fp, p); + return(error); } /* ARGSUSED */ @@ -1068,18 +1125,23 @@ getsockopt(p, uap) struct file *fp; struct sockopt sopt; - error = getsock(p->p_fd, uap->s, &fp); + error = holdsock(p->p_fd, uap->s, &fp); if (error) return (error); if (uap->val) { error = copyin((caddr_t)uap->avalsize, (caddr_t)&valsize, sizeof (valsize)); - if (error) + if (error) { + fdrop(fp, p); return (error); - if (valsize < 0) + } + if (valsize < 0) { + fdrop(fp, p); return (EINVAL); - } else + } + } else { valsize = 0; + } sopt.sopt_dir = SOPT_GET; sopt.sopt_level = uap->level; @@ -1094,6 +1156,7 @@ getsockopt(p, uap) error = copyout((caddr_t)&valsize, (caddr_t)uap->avalsize, sizeof (valsize)); } + fdrop(fp, p); return (error); } @@ -1116,12 +1179,14 @@ getsockname1(p, uap, compat) struct sockaddr *sa; int len, error; - error = getsock(p->p_fd, uap->fdes, &fp); + error = holdsock(p->p_fd, uap->fdes, &fp); if (error) return (error); error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len)); - if (error) + if (error) { + fdrop(fp, p); return (error); + } so = (struct socket *)fp->f_data; sa = 0; error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa); @@ -1145,6 +1210,7 @@ gotnothing: bad: if (sa) FREE(sa, M_SONAME); + fdrop(fp, p); return (error); } @@ -1187,15 +1253,19 @@ getpeername1(p, uap, compat) struct sockaddr *sa; int len, error; - error = getsock(p->p_fd, uap->fdes, &fp); + error = holdsock(p->p_fd, uap->fdes, &fp); if (error) return (error); so = (struct socket *)fp->f_data; - if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) + if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { + fdrop(fp, p); return (ENOTCONN); + } error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len)); - if (error) + if (error) { + fdrop(fp, p); return (error); + } sa = 0; error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa); if (error) @@ -1216,7 +1286,9 @@ getpeername1(p, uap, compat) gotnothing: error = copyout((caddr_t)&len, (caddr_t)uap->alen, sizeof (len)); bad: - if (sa) FREE(sa, M_SONAME); + if (sa) + FREE(sa, M_SONAME); + fdrop(fp, p); return (error); } @@ -1307,21 +1379,31 @@ getsockaddr(namp, uaddr, len) return error; } +/* + * holdsock() - load the struct file pointer associated + * with a socket into *fpp. If an error occurs, non-zero + * will be returned and *fpp will be set to NULL. + */ int -getsock(fdp, fdes, fpp) +holdsock(fdp, fdes, fpp) struct filedesc *fdp; int fdes; struct file **fpp; { - register struct file *fp; + register struct file *fp = NULL; + int error = 0; if ((unsigned)fdes >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[fdes]) == NULL) - return (EBADF); - if (fp->f_type != DTYPE_SOCKET) - return (ENOTSOCK); + (fp = fdp->fd_ofiles[fdes]) == NULL) { + error = EBADF; + } else if (fp->f_type != DTYPE_SOCKET) { + error = ENOTSOCK; + fp = NULL; + } else { + fhold(fp); + } *fpp = fp; - return (0); + return(error); } /* @@ -1433,7 +1515,7 @@ sendfile(struct proc *p, struct sendfile_args *uap) * Do argument checking. Must be a regular file in, stream * type and connected socket out, positive offset. */ - fp = getfp(fdp, uap->fd, FREAD); + fp = holdfp(fdp, uap->fd, FREAD); if (fp == NULL) { error = EBADF; goto done; @@ -1448,7 +1530,8 @@ sendfile(struct proc *p, struct sendfile_args *uap) error = EINVAL; goto done; } - error = getsock(p->p_fd, uap->s, &fp); + fdrop(fp, p); + error = holdsock(p->p_fd, uap->s, &fp); if (error) goto done; so = (struct socket *)fp->f_data; @@ -1714,5 +1797,7 @@ done: } if (vp) vrele(vp); + if (fp) + fdrop(fp, p); return (error); } diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c index a564886..d4a226c 100644 --- a/sys/kern/vfs_aio.c +++ b/sys/kern/vfs_aio.c @@ -589,6 +589,11 @@ aio_process(struct aiocblist *aiocbe) inblock_st = mycp->p_stats->p_ru.ru_inblock; oublock_st = mycp->p_stats->p_ru.ru_oublock; + /* + * Temporarily bump the ref count while reading to avoid the + * descriptor being ripped out from under us. + */ + fhold(fp); if (cb->aio_lio_opcode == LIO_READ) { auio.uio_rw = UIO_READ; error = fo_read(fp, &auio, fp->f_cred, FOF_OFFSET, mycp); @@ -596,6 +601,7 @@ aio_process(struct aiocblist *aiocbe) auio.uio_rw = UIO_WRITE; error = fo_write(fp, &auio, fp->f_cred, FOF_OFFSET, mycp); } + fdrop(fp, mycp); inblock_end = mycp->p_stats->p_ru.ru_inblock; oublock_end = mycp->p_stats->p_ru.ru_oublock; @@ -986,6 +992,8 @@ aio_qphysio(struct proc *p, struct aiocblist *aiocbe) if (ki->kaio_buffer_count >= ki->kaio_ballowed_count) return (-1); + fhold(fp); + ki->kaio_buffer_count++; lj = aiocbe->lio; @@ -1074,6 +1082,7 @@ aio_qphysio(struct proc *p, struct aiocblist *aiocbe) splx(s); if (notify) KNOTE(&aiocbe->klist, 0); + fdrop(fp, p); return 0; doerror: @@ -1082,6 +1091,7 @@ doerror: lj->lioj_buffer_count--; aiocbe->bp = NULL; relpbuf(bp, NULL); + fdrop(fp, p); return error; } @@ -1291,6 +1301,8 @@ _aio_aqueue(struct proc *p, struct aiocb *job, struct aio_liojob *lj, int type) return EINVAL; } + fhold(fp); + /* * XXX * Figure out how to do this properly. This currently won't @@ -1326,7 +1338,7 @@ aqueue_fail: TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); if (type == 0) suword(&job->_aiocb_private.error, error); - return (error); + goto done; } no_kqueue: } @@ -1363,18 +1375,19 @@ no_kqueue: ki->kaio_queue_count++; num_queue_count++; splx(s); - return 0; + error = 0; + goto done; } splx(s); } if ((error = aio_qphysio(p, aiocbe)) == 0) - return 0; - else if (error > 0) { + goto done; + if (error > 0) { suword(&job->_aiocb_private.status, 0); aiocbe->uaiocb._aiocb_private.error = error; suword(&job->_aiocb_private.error, error); - return error; + goto done; } /* No buffer for daemon I/O. */ @@ -1418,6 +1431,8 @@ retryproc: num_aio_resv_start--; } splx(s); +done: + fdrop(fp, p); return error; } @@ -1907,7 +1922,13 @@ aio_read(struct proc *p, struct aio_read_args *uap) auio.uio_procp = p; cnt = iocb.aio_nbytes; + /* + * Temporarily bump the ref count while reading to avoid the + * descriptor being ripped out from under us. + */ + fhold(fp); error = fo_read(fp, &auio, fp->f_cred, FOF_OFFSET, p); + fdrop(fp, p); if (error && (auio.uio_resid != cnt) && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) error = 0; @@ -1974,7 +1995,13 @@ aio_write(struct proc *p, struct aio_write_args *uap) auio.uio_procp = p; cnt = iocb.aio_nbytes; + /* + * Temporarily bump the ref count while writing to avoid the + * descriptor being ripped out from under us. + */ + fhold(fp); error = fo_write(fp, &auio, fp->f_cred, FOF_OFFSET, p); + fdrop(fp, p); if (error) { if (auio.uio_resid != cnt) { if (error == ERESTART || error == EINTR || error == diff --git a/sys/kern/vfs_extattr.c b/sys/kern/vfs_extattr.c index c24d227..7cf4663 100644 --- a/sys/kern/vfs_extattr.c +++ b/sys/kern/vfs_extattr.c @@ -71,6 +71,7 @@ #include <vm/vm.h> #include <vm/vm_object.h> #include <vm/vm_zone.h> +#include <vm/vm_page.h> static int change_dir __P((struct nameidata *ndp, struct proc *p)); static void checkdirs __P((struct vnode *olddp)); @@ -996,25 +997,65 @@ open(p, uap) cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); p->p_dupfd = -indx - 1; /* XXX check for fdopen */ + /* + * Bump the ref count to prevent another process from closing + * the descriptor while we are blocked in vn_open() + */ + fhold(fp); error = vn_open(&nd, &flags, cmode); if (error) { - ffree(fp); + /* + * release our own reference + */ + fdrop(fp, p); + + /* + * handle special fdopen() case. bleh. dupfdopen() is + * responsible for dropping the old contents of ofiles[indx] + * if it succeeds. + */ if ((error == ENODEV || error == ENXIO) && p->p_dupfd >= 0 && /* XXX from fdopen */ (error = - dupfdopen(fdp, indx, p->p_dupfd, flags, error)) == 0) { + dupfdopen(p, fdp, indx, p->p_dupfd, flags, error)) == 0) { p->p_retval[0] = indx; return (0); } + /* + * Clean up the descriptor, but only if another thread hadn't + * replaced or closed it. + */ + if (fdp->fd_ofiles[indx] == fp) { + fdp->fd_ofiles[indx] = NULL; + fdrop(fp, p); + } + if (error == ERESTART) error = EINTR; - fdp->fd_ofiles[indx] = NULL; return (error); } p->p_dupfd = 0; NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; + /* + * There should be 2 references on the file, one from the descriptor + * table, and one for us. + * + * Handle the case where someone closed the file (via its file + * descriptor) while we were blocked. The end result should look + * like opening the file succeeded but it was immediately closed. + */ + if (fp->f_count == 1) { + KASSERT(fdp->fd_ofiles[indx] != fp, + ("Open file descriptor lost all refs")); + VOP_UNLOCK(vp, 0, p); + vn_close(vp, flags & FMASK, fp->f_cred, p); + fdrop(fp, p); + p->p_retval[0] = indx; + return 0; + } + fp->f_data = (caddr_t)vp; fp->f_flag = flags & FMASK; fp->f_ops = &vnops; @@ -1051,12 +1092,19 @@ open(p, uap) /* assert that vn_open created a backing object if one is needed */ KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0, ("open: vmio vnode has no backing object after vn_open")); + /* + * Release our private reference, leaving the one associated with + * the descriptor table intact. + */ + fdrop(fp, p); p->p_retval[0] = indx; return (0); bad: - (void) vn_close(vp, fp->f_flag, fp->f_cred, p); - ffree(fp); - fdp->fd_ofiles[indx] = NULL; + if (fdp->fd_ofiles[indx] == fp) { + fdp->fd_ofiles[indx] = NULL; + fdrop(fp, p); + } + fdrop(fp, p); return (error); } @@ -3394,6 +3442,12 @@ fhopen(p, uap) if ((error = falloc(p, &nfp, &indx)) != 0) goto bad; fp = nfp; + + /* + * Hold an extra reference to avoid having fp ripped out + * from under us while we block in the lock op + */ + fhold(fp); nfp->f_data = (caddr_t)vp; nfp->f_flag = fmode & FMASK; nfp->f_ops = &vnops; @@ -3411,10 +3465,20 @@ fhopen(p, uap) type |= F_WAIT; VOP_UNLOCK(vp, 0, p); if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { - (void) vn_close(vp, fp->f_flag, fp->f_cred, p); - ffree(fp); - fdp->fd_ofiles[indx] = NULL; - return (error); + /* + * The lock request failed. Normally close the + * descriptor but handle the case where someone might + * have dup()d or close()d it when we weren't looking. + */ + if (fdp->fd_ofiles[indx] == fp) { + fdp->fd_ofiles[indx] = NULL; + fdrop(fp, p); + } + /* + * release our private reference + */ + fdrop(fp, p); + return(error); } vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); fp->f_flag |= FHASLOCK; @@ -3423,6 +3487,7 @@ fhopen(p, uap) vfs_object_create(vp, p, p->p_ucred); VOP_UNLOCK(vp, 0, p); + fdrop(fp, p); p->p_retval[0] = indx; return (0); diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index c24d227..7cf4663 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -71,6 +71,7 @@ #include <vm/vm.h> #include <vm/vm_object.h> #include <vm/vm_zone.h> +#include <vm/vm_page.h> static int change_dir __P((struct nameidata *ndp, struct proc *p)); static void checkdirs __P((struct vnode *olddp)); @@ -996,25 +997,65 @@ open(p, uap) cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); p->p_dupfd = -indx - 1; /* XXX check for fdopen */ + /* + * Bump the ref count to prevent another process from closing + * the descriptor while we are blocked in vn_open() + */ + fhold(fp); error = vn_open(&nd, &flags, cmode); if (error) { - ffree(fp); + /* + * release our own reference + */ + fdrop(fp, p); + + /* + * handle special fdopen() case. bleh. dupfdopen() is + * responsible for dropping the old contents of ofiles[indx] + * if it succeeds. + */ if ((error == ENODEV || error == ENXIO) && p->p_dupfd >= 0 && /* XXX from fdopen */ (error = - dupfdopen(fdp, indx, p->p_dupfd, flags, error)) == 0) { + dupfdopen(p, fdp, indx, p->p_dupfd, flags, error)) == 0) { p->p_retval[0] = indx; return (0); } + /* + * Clean up the descriptor, but only if another thread hadn't + * replaced or closed it. + */ + if (fdp->fd_ofiles[indx] == fp) { + fdp->fd_ofiles[indx] = NULL; + fdrop(fp, p); + } + if (error == ERESTART) error = EINTR; - fdp->fd_ofiles[indx] = NULL; return (error); } p->p_dupfd = 0; NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; + /* + * There should be 2 references on the file, one from the descriptor + * table, and one for us. + * + * Handle the case where someone closed the file (via its file + * descriptor) while we were blocked. The end result should look + * like opening the file succeeded but it was immediately closed. + */ + if (fp->f_count == 1) { + KASSERT(fdp->fd_ofiles[indx] != fp, + ("Open file descriptor lost all refs")); + VOP_UNLOCK(vp, 0, p); + vn_close(vp, flags & FMASK, fp->f_cred, p); + fdrop(fp, p); + p->p_retval[0] = indx; + return 0; + } + fp->f_data = (caddr_t)vp; fp->f_flag = flags & FMASK; fp->f_ops = &vnops; @@ -1051,12 +1092,19 @@ open(p, uap) /* assert that vn_open created a backing object if one is needed */ KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0, ("open: vmio vnode has no backing object after vn_open")); + /* + * Release our private reference, leaving the one associated with + * the descriptor table intact. + */ + fdrop(fp, p); p->p_retval[0] = indx; return (0); bad: - (void) vn_close(vp, fp->f_flag, fp->f_cred, p); - ffree(fp); - fdp->fd_ofiles[indx] = NULL; + if (fdp->fd_ofiles[indx] == fp) { + fdp->fd_ofiles[indx] = NULL; + fdrop(fp, p); + } + fdrop(fp, p); return (error); } @@ -3394,6 +3442,12 @@ fhopen(p, uap) if ((error = falloc(p, &nfp, &indx)) != 0) goto bad; fp = nfp; + + /* + * Hold an extra reference to avoid having fp ripped out + * from under us while we block in the lock op + */ + fhold(fp); nfp->f_data = (caddr_t)vp; nfp->f_flag = fmode & FMASK; nfp->f_ops = &vnops; @@ -3411,10 +3465,20 @@ fhopen(p, uap) type |= F_WAIT; VOP_UNLOCK(vp, 0, p); if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { - (void) vn_close(vp, fp->f_flag, fp->f_cred, p); - ffree(fp); - fdp->fd_ofiles[indx] = NULL; - return (error); + /* + * The lock request failed. Normally close the + * descriptor but handle the case where someone might + * have dup()d or close()d it when we weren't looking. + */ + if (fdp->fd_ofiles[indx] == fp) { + fdp->fd_ofiles[indx] = NULL; + fdrop(fp, p); + } + /* + * release our private reference + */ + fdrop(fp, p); + return(error); } vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); fp->f_flag |= FHASLOCK; @@ -3423,6 +3487,7 @@ fhopen(p, uap) vfs_object_create(vp, p, p->p_ucred); VOP_UNLOCK(vp, 0, p); + fdrop(fp, p); p->p_retval[0] = indx; return (0); |