diff options
author | dillon <dillon@FreeBSD.org> | 2000-11-18 21:01:04 +0000 |
---|---|---|
committer | dillon <dillon@FreeBSD.org> | 2000-11-18 21:01:04 +0000 |
commit | 15a44d16ca10bf52da55462560c345940cd19b38 (patch) | |
tree | 8d59044fc11c59a31ff7d5eb596055dcd4bfa68c /sys | |
parent | fd59970ee1df44d623fb078d21e32c352d64b79f (diff) | |
download | FreeBSD-src-15a44d16ca10bf52da55462560c345940cd19b38.zip FreeBSD-src-15a44d16ca10bf52da55462560c345940cd19b38.tar.gz |
This patchset fixes a large number of file descriptor race conditions.
Pre-rfork code assumed inherent locking of a process's file descriptor
array. However, with the advent of rfork() the file descriptor table
could be shared between processes. This patch closes over a dozen
serious race conditions related to one thread manipulating the table
(e.g. closing or dup()ing a descriptor) while another is blocked in
an open(), close(), fcntl(), read(), write(), etc...
PR: kern/11629
Discussed with: Alexander Viro <viro@math.psu.edu>
Diffstat (limited to 'sys')
-rw-r--r-- | sys/kern/kern_descrip.c | 327 | ||||
-rw-r--r-- | sys/kern/kern_event.c | 39 | ||||
-rw-r--r-- | sys/kern/kern_random.c | 396 | ||||
-rw-r--r-- | sys/kern/sys_generic.c | 86 | ||||
-rw-r--r-- | sys/kern/sys_pipe.c | 9 | ||||
-rw-r--r-- | sys/kern/uipc_syscalls.c | 217 | ||||
-rw-r--r-- | sys/kern/vfs_aio.c | 37 | ||||
-rw-r--r-- | sys/kern/vfs_extattr.c | 85 | ||||
-rw-r--r-- | sys/kern/vfs_syscalls.c | 85 | ||||
-rw-r--r-- | sys/nfs/nfs_syscalls.c | 7 | ||||
-rw-r--r-- | sys/nfsclient/nfs_nfsiod.c | 7 | ||||
-rw-r--r-- | sys/nfsserver/nfs_syscalls.c | 7 | ||||
-rw-r--r-- | sys/svr4/svr4_signal.c | 666 | ||||
-rw-r--r-- | sys/sys/filedesc.h | 6 | ||||
-rw-r--r-- | sys/sys/socketvar.h | 2 | ||||
-rw-r--r-- | sys/vm/swap_pager.c | 6 | ||||
-rw-r--r-- | sys/vm/vm_mmap.c | 52 |
17 files changed, 1789 insertions, 245 deletions
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index 5f2da95..29de2b1 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -86,7 +86,7 @@ static struct cdevsw fildesc_cdevsw = { /* bmaj */ -1 }; -static int finishdup __P((struct filedesc *fdp, int old, int new, register_t *retval)); +static int do_dup __P((struct filedesc *fdp, int old, int new, register_t *retval, struct proc *p)); static int badfo_readwrite __P((struct file *fp, struct uio *uio, struct ucred *cred, int flags, struct proc *p)); static int badfo_ioctl __P((struct file *fp, u_long com, caddr_t data, @@ -125,6 +125,9 @@ getdtablesize(p, uap) /* * Duplicate a file descriptor to a particular value. + * + * note: keep in mind that a potential race condition exists when closing + * descriptors from a shared descriptor table (via rfork). */ #ifndef _SYS_SYSPROTO_H_ struct dup2_args { @@ -142,11 +145,13 @@ dup2(p, uap) register u_int old = uap->from, new = uap->to; int i, error; +retry: if (old >= fdp->fd_nfiles || fdp->fd_ofiles[old] == NULL || new >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || - new >= maxfilesperproc) + new >= maxfilesperproc) { return (EBADF); + } if (old == new) { p->p_retval[0] = new; return (0); @@ -156,15 +161,12 @@ dup2(p, uap) return (error); if (new != i) panic("dup2: fdalloc"); - } else if (fdp->fd_ofiles[new]) { - if (fdp->fd_ofileflags[new] & UF_MAPPED) - (void) munmapfd(p, new); /* - * dup2() must succeed even if the close has an error. + * fdalloc() may block, retest everything. */ - (void) closef(fdp->fd_ofiles[new], p); + goto retry; } - return (finishdup(fdp, (int)old, (int)new, p->p_retval)); + return (do_dup(fdp, (int)old, (int)new, p->p_retval, p)); } /* @@ -191,7 +193,7 @@ dup(p, uap) return (EBADF); if ((error = fdalloc(p, 0, &new))) return (error); - return (finishdup(fdp, (int)old, new, p->p_retval)); + return (do_dup(fdp, (int)old, new, p->p_retval, p)); } /* @@ -222,8 +224,8 @@ fcntl(p, uap) (fp = fdp->fd_ofiles[uap->fd]) == NULL) return (EBADF); pop = &fdp->fd_ofileflags[uap->fd]; - switch (uap->cmd) { + switch (uap->cmd) { case F_DUPFD: newmin = uap->arg; if (newmin >= p->p_rlimit[RLIMIT_NOFILE].rlim_cur || @@ -231,7 +233,7 @@ fcntl(p, uap) return (EINVAL); if ((error = fdalloc(p, newmin, &i))) return (error); - return (finishdup(fdp, uap->fd, i, p->p_retval)); + return (do_dup(fdp, uap->fd, i, p->p_retval, p)); case F_GETFD: p->p_retval[0] = *pop & 1; @@ -246,26 +248,38 @@ fcntl(p, uap) return (0); case F_SETFL: + fhold(fp); fp->f_flag &= ~FCNTLFLAGS; fp->f_flag |= FFLAGS(uap->arg & ~O_ACCMODE) & FCNTLFLAGS; tmp = fp->f_flag & FNONBLOCK; error = fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p); - if (error) + if (error) { + fdrop(fp, p); return (error); + } tmp = fp->f_flag & FASYNC; error = fo_ioctl(fp, FIOASYNC, (caddr_t)&tmp, p); - if (!error) + if (!error) { + fdrop(fp, p); return (0); + } fp->f_flag &= ~FNONBLOCK; tmp = 0; (void)fo_ioctl(fp, FIONBIO, (caddr_t)&tmp, p); + fdrop(fp, p); return (error); case F_GETOWN: - return (fo_ioctl(fp, FIOGETOWN, (caddr_t)p->p_retval, p)); + fhold(fp); + error = fo_ioctl(fp, FIOGETOWN, (caddr_t)p->p_retval, p); + fdrop(fp, p); + return(error); case F_SETOWN: - return (fo_ioctl(fp, FIOSETOWN, (caddr_t)&uap->arg, p)); + fhold(fp); + error = fo_ioctl(fp, FIOSETOWN, (caddr_t)&uap->arg, p); + fdrop(fp, p); + return(error); case F_SETLKW: flg |= F_WAIT; @@ -275,54 +289,81 @@ fcntl(p, uap) if (fp->f_type != DTYPE_VNODE) return (EBADF); vp = (struct vnode *)fp->f_data; + + /* + * copyin/lockop may block + */ + fhold(fp); /* Copy in the lock structure */ error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl, sizeof(fl)); - if (error) + if (error) { + fdrop(fp, p); return (error); + } if (fl.l_whence == SEEK_CUR) fl.l_start += fp->f_offset; - switch (fl.l_type) { + switch (fl.l_type) { case F_RDLCK: - if ((fp->f_flag & FREAD) == 0) - return (EBADF); + if ((fp->f_flag & FREAD) == 0) { + error = EBADF; + break; + } p->p_flag |= P_ADVLOCK; - return (VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, &fl, flg)); - + error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, + &fl, flg); + break; case F_WRLCK: - if ((fp->f_flag & FWRITE) == 0) - return (EBADF); + if ((fp->f_flag & FWRITE) == 0) { + error = EBADF; + break; + } p->p_flag |= P_ADVLOCK; - return (VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, &fl, flg)); - + error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_SETLK, + &fl, flg); + break; case F_UNLCK: - return (VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, &fl, - F_POSIX)); - + error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_UNLCK, + &fl, F_POSIX); + break; default: - return (EINVAL); + error = EINVAL; + break; } + fdrop(fp, p); + return(error); case F_GETLK: if (fp->f_type != DTYPE_VNODE) return (EBADF); vp = (struct vnode *)fp->f_data; + /* + * copyin/lockop may block + */ + fhold(fp); /* Copy in the lock structure */ error = copyin((caddr_t)(intptr_t)uap->arg, (caddr_t)&fl, sizeof(fl)); - if (error) + if (error) { + fdrop(fp, p); return (error); + } if (fl.l_type != F_RDLCK && fl.l_type != F_WRLCK && - fl.l_type != F_UNLCK) + fl.l_type != F_UNLCK) { + fdrop(fp, p); return (EINVAL); + } if (fl.l_whence == SEEK_CUR) fl.l_start += fp->f_offset; - if ((error = VOP_ADVLOCK(vp,(caddr_t)p->p_leader,F_GETLK,&fl,F_POSIX))) - return (error); - return (copyout((caddr_t)&fl, (caddr_t)(intptr_t)uap->arg, - sizeof(fl))); - + error = VOP_ADVLOCK(vp, (caddr_t)p->p_leader, F_GETLK, + &fl, F_POSIX); + fdrop(fp, p); + if (error == 0) { + error = copyout((caddr_t)&fl, + (caddr_t)(intptr_t)uap->arg, sizeof(fl)); + } + return(error); default: return (EINVAL); } @@ -333,13 +374,29 @@ fcntl(p, uap) * Common code for dup, dup2, and fcntl(F_DUPFD). */ static int -finishdup(fdp, old, new, retval) +do_dup(fdp, old, new, retval, p) register struct filedesc *fdp; register int old, new; register_t *retval; + struct proc *p; { - register struct file *fp; + struct file *fp; + struct file *delfp; + + /* + * Save info on the descriptor being overwritten. We have + * to do the unmap now, but we cannot close it without + * introducing an ownership race for the slot. + */ + delfp = fdp->fd_ofiles[new]; +#if 0 + if (delfp && (fdp->fd_ofileflags[new] & UF_MAPPED)) + (void) munmapfd(p, new); +#endif + /* + * Duplicate the source descriptor, update lastfile + */ fp = fdp->fd_ofiles[old]; fdp->fd_ofiles[new] = fp; fdp->fd_ofileflags[new] = fdp->fd_ofileflags[old] &~ UF_EXCLOSE; @@ -347,6 +404,14 @@ finishdup(fdp, old, new, retval) if (new > fdp->fd_lastfile) fdp->fd_lastfile = new; *retval = new; + + /* + * If we dup'd over a valid file, we now own the reference to it + * and must dispose of it using closef() semantics (as if a + * close() were performed on it). + */ + if (delfp) + (void) closef(delfp, p); return (0); } @@ -491,20 +556,25 @@ close(p, uap) register struct filedesc *fdp = p->p_fd; register struct file *fp; register int fd = uap->fd; - register u_char *pf; if ((unsigned)fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL) return (EBADF); - pf = (u_char *)&fdp->fd_ofileflags[fd]; - if (*pf & UF_MAPPED) +#if 0 + if (fdp->fd_ofileflags[fd] & UF_MAPPED) (void) munmapfd(p, fd); +#endif fdp->fd_ofiles[fd] = NULL; + fdp->fd_ofileflags[fd] = 0; + + /* + * we now hold the fp reference that used to be owned by the descriptor + * array. + */ while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL) fdp->fd_lastfile--; if (fd < fdp->fd_freefile) fdp->fd_freefile = fd; - *pf = 0; if (fd < fdp->fd_knlistsize) knote_fdclose(p, fd); return (closef(fp, p)); @@ -535,11 +605,13 @@ ofstat(p, uap) if ((unsigned)uap->fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[uap->fd]) == NULL) return (EBADF); + fhold(fp); error = fo_stat(fp, &ub, p); if (error == 0) { cvtstat(&ub, &oub); error = copyout((caddr_t)&oub, (caddr_t)uap->sb, sizeof (oub)); } + fdrop(fp, p); return (error); } #endif /* COMPAT_43 || COMPAT_SUNOS */ @@ -567,9 +639,11 @@ fstat(p, uap) if ((unsigned)uap->fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[uap->fd]) == NULL) return (EBADF); + fhold(fp); error = fo_stat(fp, &ub, p); if (error == 0) error = copyout((caddr_t)&ub, (caddr_t)uap->sb, sizeof (ub)); + fdrop(fp, p); return (error); } @@ -597,11 +671,13 @@ nfstat(p, uap) if ((unsigned)uap->fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[uap->fd]) == NULL) return (EBADF); + fhold(fp); error = fo_stat(fp, &ub, p); if (error == 0) { cvtnstat(&ub, &nub); error = copyout((caddr_t)&nub, (caddr_t)uap->sb, sizeof (nub)); } + fdrop(fp, p); return (error); } @@ -623,28 +699,33 @@ fpathconf(p, uap) struct filedesc *fdp = p->p_fd; struct file *fp; struct vnode *vp; + int error = 0; if ((unsigned)uap->fd >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[uap->fd]) == NULL) return (EBADF); - switch (fp->f_type) { + fhold(fp); + + switch (fp->f_type) { case DTYPE_PIPE: case DTYPE_SOCKET: if (uap->name != _PC_PIPE_BUF) return (EINVAL); p->p_retval[0] = PIPE_BUF; - return (0); - + error = 0; + break; case DTYPE_FIFO: case DTYPE_VNODE: vp = (struct vnode *)fp->f_data; - return (VOP_PATHCONF(vp, uap->name, p->p_retval)); - + error = VOP_PATHCONF(vp, uap->name, p->p_retval); + break; default: - return (EOPNOTSUPP); + error = EOPNOTSUPP; + break; } - /*NOTREACHED*/ + fdrop(fp, p); + return(error); } /* @@ -698,6 +779,15 @@ fdalloc(p, want, result) nfiles = 2 * fdp->fd_nfiles; MALLOC(newofile, struct file **, nfiles * OFILESIZE, M_FILEDESC, M_WAITOK); + + /* + * deal with file-table extend race that might have occured + * when malloc was blocked. + */ + if (fdp->fd_nfiles >= nfiles) { + FREE(newofile, M_FILEDESC); + continue; + } newofileflags = (char *) &newofile[nfiles]; /* * Copy the existing ofile and ofileflags arrays @@ -738,9 +828,10 @@ fdavail(p, n) last = min(fdp->fd_nfiles, lim); fpp = &fdp->fd_ofiles[fdp->fd_freefile]; - for (i = last - fdp->fd_freefile; --i >= 0; fpp++) + for (i = last - fdp->fd_freefile; --i >= 0; fpp++) { if (*fpp == NULL && --n <= 0) return (1); + } return (0); } @@ -757,8 +848,6 @@ falloc(p, resultfp, resultfd) register struct file *fp, *fq; int error, i; - if ((error = fdalloc(p, 0, &i))) - return (error); if (nfiles >= maxfiles) { tablefull("file"); return (ENFILE); @@ -772,6 +861,17 @@ falloc(p, resultfp, resultfd) nfiles++; MALLOC(fp, struct file *, sizeof(struct file), M_FILE, M_WAITOK); bzero(fp, sizeof(struct file)); + + /* + * wait until after malloc (which may have blocked) returns before + * allocating the slot, else a race might have shrunk it if we had + * allocated it before the malloc. + */ + if ((error = fdalloc(p, 0, &i))) { + nfiles--; + FREE(fp, M_FILE); + return (error); + } fp->f_count = 1; fp->f_cred = p->p_ucred; fp->f_ops = &badfileops; @@ -797,11 +897,9 @@ void ffree(fp) register struct file *fp; { + KASSERT((fp->f_count == 0), ("ffree: fp_fcount not 0!")); LIST_REMOVE(fp, f_list); crfree(fp->f_cred); -#if defined(DIAGNOSTIC) || defined(INVARIANTS) - fp->f_count = 0; -#endif nfiles--; FREE(fp, M_FILE); } @@ -910,9 +1008,10 @@ fdcopy(p) */ if (newfdp->fd_knlistsize != -1) { fpp = newfdp->fd_ofiles; - for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) + for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) { if (*fpp != NULL && (*fpp)->f_type == DTYPE_KQUEUE) *fpp = NULL; + } newfdp->fd_knlist = NULL; newfdp->fd_knlistsize = -1; newfdp->fd_knhash = NULL; @@ -920,9 +1019,10 @@ fdcopy(p) } fpp = newfdp->fd_ofiles; - for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) + for (i = newfdp->fd_lastfile; i-- >= 0; fpp++) { if (*fpp != NULL) fhold(*fpp); + } return (newfdp); } @@ -943,10 +1043,15 @@ fdfree(p) if (--fdp->fd_refcnt > 0) return; + /* + * we are the last reference to the structure, we can + * safely assume it will not change out from under us. + */ fpp = fdp->fd_ofiles; - for (i = fdp->fd_lastfile; i-- >= 0; fpp++) + for (i = fdp->fd_lastfile; i-- >= 0; fpp++) { if (*fpp) (void) closef(*fpp, p); + } if (fdp->fd_nfiles > NDFILE) FREE(fdp->fd_ofiles, M_FILEDESC); if (fdp->fd_cdir) @@ -991,29 +1096,38 @@ setugidsafety(p) struct proc *p; { struct filedesc *fdp = p->p_fd; - struct file **fpp; - char *fdfp; register int i; /* Certain daemons might not have file descriptors. */ if (fdp == NULL) return; - fpp = fdp->fd_ofiles; - fdfp = fdp->fd_ofileflags; - for (i = 0; i <= fdp->fd_lastfile; i++, fpp++, fdfp++) { + /* + * note: fdp->fd_ofiles may be reallocated out from under us while + * we are blocked in a close. Be careful! + */ + for (i = 0; i <= fdp->fd_lastfile; i++) { if (i > 2) break; - if (*fpp != NULL && is_unsafe(*fpp)) { - if ((*fdfp & UF_MAPPED) != 0) + if (fdp->fd_ofiles[i] && is_unsafe(fdp->fd_ofiles[i])) { + struct file *fp; + +#if 0 + if ((fdp->fd_ofileflags[i] & UF_MAPPED) != 0) (void) munmapfd(p, i); +#endif if (i < fdp->fd_knlistsize) knote_fdclose(p, i); - (void) closef(*fpp, p); - *fpp = NULL; - *fdfp = 0; + /* + * NULL-out descriptor prior to close to avoid + * a race while close blocks. + */ + fp = fdp->fd_ofiles[i]; + fdp->fd_ofiles[i] = NULL; + fdp->fd_ofileflags[i] = 0; if (i < fdp->fd_freefile) fdp->fd_freefile = i; + (void) closef(fp, p); } } while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL) @@ -1028,28 +1142,39 @@ fdcloseexec(p) struct proc *p; { struct filedesc *fdp = p->p_fd; - struct file **fpp; - char *fdfp; register int i; /* Certain daemons might not have file descriptors. */ if (fdp == NULL) return; - fpp = fdp->fd_ofiles; - fdfp = fdp->fd_ofileflags; - for (i = 0; i <= fdp->fd_lastfile; i++, fpp++, fdfp++) - if (*fpp != NULL && (*fdfp & UF_EXCLOSE)) { - if (*fdfp & UF_MAPPED) + /* + * We cannot cache fd_ofiles or fd_ofileflags since operations + * may block and rip them out from under us. + */ + for (i = 0; i <= fdp->fd_lastfile; i++) { + if (fdp->fd_ofiles[i] != NULL && + (fdp->fd_ofileflags[i] & UF_EXCLOSE)) { + struct file *fp; + +#if 0 + if (fdp->fd_ofileflags[i] & UF_MAPPED) (void) munmapfd(p, i); +#endif if (i < fdp->fd_knlistsize) knote_fdclose(p, i); - (void) closef(*fpp, p); - *fpp = NULL; - *fdfp = 0; + /* + * NULL-out descriptor prior to close to avoid + * a race while close blocks. + */ + fp = fdp->fd_ofiles[i]; + fdp->fd_ofiles[i] = NULL; + fdp->fd_ofileflags[i] = 0; if (i < fdp->fd_freefile) fdp->fd_freefile = i; + (void) closef(fp, p); } + } while (fdp->fd_lastfile > 0 && fdp->fd_ofiles[fdp->fd_lastfile] == NULL) fdp->fd_lastfile--; } @@ -1199,9 +1324,10 @@ fdopen(dev, mode, type, p) * Duplicate the specified descriptor to a free descriptor. */ int -dupfdopen(fdp, indx, dfd, mode, error) - register struct filedesc *fdp; - register int indx, dfd; +dupfdopen(p, fdp, indx, dfd, mode, error) + struct proc *p; + struct filedesc *fdp; + int indx, dfd; int mode; int error; { @@ -1211,14 +1337,12 @@ dupfdopen(fdp, indx, dfd, mode, error) /* * If the to-be-dup'd fd number is greater than the allowed number * of file descriptors, or the fd to be dup'd has already been - * closed, reject. Note, check for new == old is necessary as - * falloc could allocate an already closed to-be-dup'd descriptor - * as the new descriptor. + * closed, then reject. */ - fp = fdp->fd_ofiles[indx]; if ((u_int)dfd >= fdp->fd_nfiles || - (wfp = fdp->fd_ofiles[dfd]) == NULL || fp == wfp) + (wfp = fdp->fd_ofiles[dfd]) == NULL) { return (EBADF); + } /* * There are two cases of interest here. @@ -1240,33 +1364,58 @@ dupfdopen(fdp, indx, dfd, mode, error) */ if (((mode & (FREAD|FWRITE)) | wfp->f_flag) != wfp->f_flag) return (EACCES); + fp = fdp->fd_ofiles[indx]; +#if 0 + if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED) + (void) munmapfd(p, indx); +#endif fdp->fd_ofiles[indx] = wfp; fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; fhold(wfp); if (indx > fdp->fd_lastfile) fdp->fd_lastfile = indx; + /* + * we now own the reference to fp that the ofiles[] array + * used to own. Release it. + */ + if (fp) + fdrop(fp, p); return (0); case ENXIO: /* * Steal away the file pointer from dfd, and stuff it into indx. */ + fp = fdp->fd_ofiles[indx]; +#if 0 + if (fp && fdp->fd_ofileflags[indx] & UF_MAPPED) + (void) munmapfd(p, indx); +#endif fdp->fd_ofiles[indx] = fdp->fd_ofiles[dfd]; fdp->fd_ofiles[dfd] = NULL; fdp->fd_ofileflags[indx] = fdp->fd_ofileflags[dfd]; fdp->fd_ofileflags[dfd] = 0; + + /* + * we now own the reference to fp that the ofiles[] array + * used to own. Release it. + */ + if (fp) + fdrop(fp, p); /* * Complete the clean up of the filedesc structure by * recomputing the various hints. */ - if (indx > fdp->fd_lastfile) + if (indx > fdp->fd_lastfile) { fdp->fd_lastfile = indx; - else + } else { while (fdp->fd_lastfile > 0 && - fdp->fd_ofiles[fdp->fd_lastfile] == NULL) + fdp->fd_ofiles[fdp->fd_lastfile] == NULL) { fdp->fd_lastfile--; + } if (dfd < fdp->fd_freefile) fdp->fd_freefile = dfd; + } return (0); default: diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c index 0444d7e..64d8d9f 100644 --- a/sys/kern/kern_event.c +++ b/sys/kern/kern_event.c @@ -333,7 +333,7 @@ kevent(struct proc *p, struct kevent_args *uap) struct filedesc* fdp = p->p_fd; struct kevent *kevp; struct kqueue *kq; - struct file *fp; + struct file *fp = NULL; struct timespec ts; int i, n, nerrors, error; @@ -342,10 +342,12 @@ kevent(struct proc *p, struct kevent_args *uap) (fp->f_type != DTYPE_KQUEUE)) return (EBADF); + fhold(fp); + if (uap->timeout != NULL) { error = copyin(uap->timeout, &ts, sizeof(ts)); if (error) - return error; + goto done; uap->timeout = &ts; } @@ -357,7 +359,7 @@ kevent(struct proc *p, struct kevent_args *uap) error = copyin(uap->changelist, kq->kq_kev, n * sizeof(struct kevent)); if (error) - return (error); + goto done; for (i = 0; i < n; i++) { kevp = &kq->kq_kev[i]; kevp->flags &= ~EV_SYSFLAGS; @@ -373,7 +375,7 @@ kevent(struct proc *p, struct kevent_args *uap) uap->nevents--; nerrors++; } else { - return (error); + goto done; } } } @@ -382,10 +384,14 @@ kevent(struct proc *p, struct kevent_args *uap) } if (nerrors) { p->p_retval[0] = nerrors; - return (0); + error = 0; + goto done; } error = kqueue_scan(fp, uap->nevents, uap->eventlist, uap->timeout, p); +done: + if (fp != NULL) + fdrop(fp, p); return (error); } @@ -417,6 +423,7 @@ kqueue_register(struct kqueue *kq, struct kevent *kev, struct proc *p) if ((u_int)kev->ident >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[kev->ident]) == NULL) return (EBADF); + fhold(fp); if (kev->ident < fdp->fd_knlistsize) { SLIST_FOREACH(kn, &fdp->fd_knlist[kev->ident], kn_link) @@ -438,8 +445,10 @@ kqueue_register(struct kqueue *kq, struct kevent *kev, struct proc *p) } } - if (kn == NULL && ((kev->flags & EV_ADD) == 0)) - return (ENOENT); + if (kn == NULL && ((kev->flags & EV_ADD) == 0)) { + error = ENOENT; + goto done; + } /* * kn now contains the matching knote, or NULL if no match @@ -448,14 +457,20 @@ kqueue_register(struct kqueue *kq, struct kevent *kev, struct proc *p) if (kn == NULL) { kn = knote_alloc(); - if (kn == NULL) - return (ENOMEM); - if (fp != NULL) - fhold(fp); + if (kn == NULL) { + error = ENOMEM; + goto done; + } kn->kn_fp = fp; kn->kn_kq = kq; kn->kn_fop = fops; + /* + * apply reference count to knode structure, so + * do not release it at the end of this routine. + */ + fp = NULL; + kn->kn_sfflags = kev->fflags; kn->kn_sdata = kev->data; kev->fflags = 0; @@ -506,6 +521,8 @@ kqueue_register(struct kqueue *kq, struct kevent *kev, struct proc *p) } done: + if (fp != NULL) + fdrop(fp, p); return (error); } diff --git a/sys/kern/kern_random.c b/sys/kern/kern_random.c new file mode 100644 index 0000000..901715b --- /dev/null +++ b/sys/kern/kern_random.c @@ -0,0 +1,396 @@ +/* + * kern_random.c -- A strong random number generator + * + * $FreeBSD$ + * + * Version 0.95, last modified 18-Oct-95 + * + * Copyright Theodore Ts'o, 1994, 1995. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, and the entire permission notice in its entirety, + * including the disclaimer of warranties. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote + * products derived from this software without specific prior + * written permission. + * + * ALTERNATIVELY, this product may be distributed under the terms of + * the GNU Public License, in which case the provisions of the GPL are + * required INSTEAD OF the above restrictions. (This clause is + * necessary due to a potential bad interaction between the GPL and + * the restrictions contained in a BSD-style copyright.) + * + * THIS SOFTWARE IS PROVIDED ``AS IS'' AND ANY EXPRESS OR IMPLIED + * WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE + * DISCLAIMED. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, + * INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES + * (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR + * SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, + * STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) + * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED + * OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/kernel.h> +#include <sys/md5.h> +#include <sys/poll.h> +#include <sys/random.h> +#include <sys/systm.h> +#include <sys/select.h> +#include <sys/timetc.h> + +#include <machine/ipl.h> +#include <machine/mutex.h> + +#ifdef __i386__ +#include <i386/isa/icu.h> +#endif +#ifdef __alpha__ +/* + XXX the below should be used. However there is too much "16" + hardcodeing in kern_random.c right now. -- obrien +#include <machine/ipl.h> +#if NHWI > 0 +#define ICU_LEN (NHWI) +#else +#define ICU_LEN (NSWI) +#endif +*/ +#define ICU_LEN 16 +#endif + +#define MAX_BLKDEV 4 + +/* + * The pool is stirred with a primitive polynomial of degree 128 + * over GF(2), namely x^128 + x^99 + x^59 + x^31 + x^9 + x^7 + 1. + * For a pool of size 64, try x^64+x^62+x^38+x^10+x^6+x+1. + */ +#define POOLWORDS 128 /* Power of 2 - note that this is 32-bit words */ +#define POOLBITS (POOLWORDS*32) + +#if POOLWORDS == 128 +#define TAP1 99 /* The polynomial taps */ +#define TAP2 59 +#define TAP3 31 +#define TAP4 9 +#define TAP5 7 +#elif POOLWORDS == 64 +#define TAP1 62 /* The polynomial taps */ +#define TAP2 38 +#define TAP3 10 +#define TAP4 6 +#define TAP5 1 +#else +#error No primitive polynomial available for chosen POOLWORDS +#endif + +#define WRITEBUFFER 512 /* size in bytes */ + +/* There is actually only one of these, globally. */ +struct random_bucket { + u_int add_ptr; + u_int entropy_count; + int input_rotate; + u_int32_t *pool; + struct selinfo rsel; +}; + +/* There is one of these per entropy source */ +struct timer_rand_state { + u_long last_time; + int last_delta; + int nbits; +}; + +static struct random_bucket random_state; +static u_int32_t random_pool[POOLWORDS]; +static struct timer_rand_state keyboard_timer_state; +static struct timer_rand_state extract_timer_state; +static struct timer_rand_state irq_timer_state[ICU_LEN]; +#ifdef notyet +static struct timer_rand_state blkdev_timer_state[MAX_BLKDEV]; +#endif +static struct wait_queue *random_wait; + +#ifndef MIN +#define MIN(a,b) (((a) < (b)) ? (a) : (b)) +#endif + +void +rand_initialize(void) +{ + random_state.add_ptr = 0; + random_state.entropy_count = 0; + random_state.pool = random_pool; + random_wait = NULL; + random_state.rsel.si_flags = 0; + random_state.rsel.si_pid = 0; +} + +/* + * This function adds an int into the entropy "pool". It does not + * update the entropy estimate. The caller must do this if appropriate. + * + * The pool is stirred with a primitive polynomial of degree 128 + * over GF(2), namely x^128 + x^99 + x^59 + x^31 + x^9 + x^7 + 1. + * For a pool of size 64, try x^64+x^62+x^38+x^10+x^6+x+1. + * + * We rotate the input word by a changing number of bits, to help + * assure that all bits in the entropy get toggled. Otherwise, if we + * consistently feed the entropy pool small numbers (like ticks and + * scancodes, for example), the upper bits of the entropy pool don't + * get affected. --- TYT, 10/11/95 + */ +static __inline void +add_entropy_word(struct random_bucket *r, const u_int32_t input) +{ + u_int i; + u_int32_t w; + + w = (input << r->input_rotate) | (input >> (32 - r->input_rotate)); + i = r->add_ptr = (r->add_ptr - 1) & (POOLWORDS-1); + if (i) + r->input_rotate = (r->input_rotate + 7) & 31; + else + /* + * At the beginning of the pool, add an extra 7 bits + * rotation, so that successive passes spread the + * input bits across the pool evenly. + */ + r->input_rotate = (r->input_rotate + 14) & 31; + + /* XOR in the various taps */ + w ^= r->pool[(i+TAP1)&(POOLWORDS-1)]; + w ^= r->pool[(i+TAP2)&(POOLWORDS-1)]; + w ^= r->pool[(i+TAP3)&(POOLWORDS-1)]; + w ^= r->pool[(i+TAP4)&(POOLWORDS-1)]; + w ^= r->pool[(i+TAP5)&(POOLWORDS-1)]; + w ^= r->pool[i]; + /* Rotate w left 1 bit (stolen from SHA) and store */ + r->pool[i] = (w << 1) | (w >> 31); +} + +/* + * This function adds entropy to the entropy "pool" by using timing + * delays. It uses the timer_rand_state structure to make an estimate + * of how any bits of entropy this call has added to the pool. + * + * The number "num" is also added to the pool - it should somehow describe + * the type of event which just happened. This is currently 0-255 for + * keyboard scan codes, and 256 upwards for interrupts. + * On the i386, this is assumed to be at most 16 bits, and the high bits + * are used for a high-resolution timer. + */ +static void +add_timer_randomness(struct random_bucket *r, struct timer_rand_state *state, + u_int num) +{ + int delta, delta2; + u_int nbits; + u_int32_t time; + + num ^= timecounter->tc_get_timecount(timecounter) << 16; + r->entropy_count += 2; + + time = ticks; + + add_entropy_word(r, (u_int32_t) num); + add_entropy_word(r, time); + + /* + * Calculate number of bits of randomness we probably + * added. We take into account the first and second order + * deltas in order to make our estimate. + */ + delta = time - state->last_time; + state->last_time = time; + + delta2 = delta - state->last_delta; + state->last_delta = delta; + + if (delta < 0) delta = -delta; + if (delta2 < 0) delta2 = -delta2; + delta = MIN(delta, delta2) >> 1; + for (nbits = 0; delta; nbits++) + delta >>= 1; + + r->entropy_count += nbits; + + /* Prevent overflow */ + if (r->entropy_count > POOLBITS) + r->entropy_count = POOLBITS; + + if (r->entropy_count >= 8) + selwakeup(&random_state.rsel); +} + +void +add_keyboard_randomness(u_char scancode) +{ + add_timer_randomness(&random_state, &keyboard_timer_state, scancode); +} + +void +add_interrupt_randomness(void *vsc) +{ + int intr; + struct random_softc *sc = vsc; + + (sc->sc_handler)(sc->sc_arg); + intr = sc->sc_intr; + add_timer_randomness(&random_state, &irq_timer_state[intr], intr); +} + +#ifdef notused +void +add_blkdev_randomness(int major) +{ + if (major >= MAX_BLKDEV) + return; + + add_timer_randomness(&random_state, &blkdev_timer_state[major], + 0x200+major); +} +#endif /* notused */ + +#if POOLWORDS % 16 +#error extract_entropy() assumes that POOLWORDS is a multiple of 16 words. +#endif +/* + * This function extracts randomness from the "entropy pool", and + * returns it in a buffer. This function computes how many remaining + * bits of entropy are left in the pool, but it does not restrict the + * number of bytes that are actually obtained. + */ +static __inline int +extract_entropy(struct random_bucket *r, char *buf, int nbytes) +{ + int ret, i; + u_int32_t tmp[4]; + + add_timer_randomness(r, &extract_timer_state, nbytes); + + /* Redundant, but just in case... */ + if (r->entropy_count > POOLBITS) + r->entropy_count = POOLBITS; + /* Why is this here? Left in from Ted Ts'o. Perhaps to limit time. */ + if (nbytes > 32768) + nbytes = 32768; + + ret = nbytes; + if (r->entropy_count / 8 >= nbytes) + r->entropy_count -= nbytes*8; + else + r->entropy_count = 0; + + while (nbytes) { + /* Hash the pool to get the output */ + tmp[0] = 0x67452301; + tmp[1] = 0xefcdab89; + tmp[2] = 0x98badcfe; + tmp[3] = 0x10325476; + for (i = 0; i < POOLWORDS; i += 16) + MD5Transform(tmp, (char *)(r->pool+i)); + /* Modify pool so next hash will produce different results */ + add_entropy_word(r, tmp[0]); + add_entropy_word(r, tmp[1]); + add_entropy_word(r, tmp[2]); + add_entropy_word(r, tmp[3]); + /* + * Run the MD5 Transform one more time, since we want + * to add at least minimal obscuring of the inputs to + * add_entropy_word(). --- TYT + */ + MD5Transform(tmp, (char *)(r->pool)); + + /* Copy data to destination buffer */ + i = MIN(nbytes, 16); + bcopy(tmp, buf, i); + nbytes -= i; + buf += i; + } + + /* Wipe data from memory */ + bzero(tmp, sizeof(tmp)); + + return ret; +} + +#ifdef notused /* XXX NOT the exported kernel interface */ +/* + * This function is the exported kernel interface. It returns some + * number of good random numbers, suitable for seeding TCP sequence + * numbers, etc. + */ +void +get_random_bytes(void *buf, u_int nbytes) +{ + extract_entropy(&random_state, (char *) buf, nbytes); +} +#endif /* notused */ + +u_int +read_random(void *buf, u_int nbytes) +{ + if ((nbytes * 8) > random_state.entropy_count) + nbytes = random_state.entropy_count / 8; + + return extract_entropy(&random_state, (char *)buf, nbytes); +} + +u_int +read_random_unlimited(void *buf, u_int nbytes) +{ + return extract_entropy(&random_state, (char *)buf, nbytes); +} + +#ifdef notused +u_int +write_random(const char *buf, u_int nbytes) +{ + u_int i; + u_int32_t word, *p; + + for (i = nbytes, p = (u_int32_t *)buf; + i >= sizeof(u_int32_t); + i-= sizeof(u_int32_t), p++) + add_entropy_word(&random_state, *p); + if (i) { + word = 0; + bcopy(p, &word, i); + add_entropy_word(&random_state, word); + } + return nbytes; +} +#endif /* notused */ + +int +random_poll(dev_t dev, int events, struct proc *p) +{ + int revents = 0; + + mtx_enter_sched_quick(); + if (events & (POLLIN | POLLRDNORM)) { + if (random_state.entropy_count >= 8) + revents |= events & (POLLIN | POLLRDNORM); + else + selrecord(p, &random_state.rsel); + } + mtx_exit_sched_quick(); + if (events & (POLLOUT | POLLWRNORM)) + revents |= events & (POLLOUT | POLLWRNORM); /* heh */ + + return (revents); +} + diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c index 8893063..635db71 100644 --- a/sys/kern/sys_generic.c +++ b/sys/kern/sys_generic.c @@ -57,9 +57,13 @@ #include <sys/poll.h> #include <sys/sysctl.h> #include <sys/sysent.h> +#include <sys/bio.h> +#include <sys/buf.h> #ifdef KTRACE #include <sys/ktrace.h> #endif +#include <vm/vm.h> +#include <vm/vm_page.h> #include <machine/limits.h> @@ -75,7 +79,7 @@ static int dofilewrite __P((struct proc *, struct file *, int, const void *, size_t, off_t, int)); struct file* -getfp(fdp, fd, flag) +holdfp(fdp, fd, flag) struct filedesc* fdp; int fd, flag; { @@ -83,8 +87,10 @@ getfp(fdp, fd, flag) if (((u_int)fd) >= fdp->fd_nfiles || (fp = fdp->fd_ofiles[fd]) == NULL || - (fp->f_flag & flag) == 0) + (fp->f_flag & flag) == 0) { return (NULL); + } + fhold(fp); return (fp); } @@ -104,10 +110,13 @@ read(p, uap) register struct read_args *uap; { register struct file *fp; + int error; - if ((fp = getfp(p->p_fd, uap->fd, FREAD)) == NULL) + if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL) return (EBADF); - return (dofileread(p, fp, uap->fd, uap->buf, uap->nbyte, (off_t)-1, 0)); + error = dofileread(p, fp, uap->fd, uap->buf, uap->nbyte, (off_t)-1, 0); + fdrop(fp, p); + return(error); } /* @@ -128,13 +137,18 @@ pread(p, uap) register struct pread_args *uap; { register struct file *fp; + int error; - if ((fp = getfp(p->p_fd, uap->fd, FREAD)) == NULL) + if ((fp = holdfp(p->p_fd, uap->fd, FREAD)) == NULL) return (EBADF); - if (fp->f_type != DTYPE_VNODE) - return (ESPIPE); - return (dofileread(p, fp, uap->fd, uap->buf, uap->nbyte, uap->offset, - FOF_OFFSET)); + if (fp->f_type != DTYPE_VNODE) { + error = ESPIPE; + } else { + error = dofileread(p, fp, uap->fd, uap->buf, uap->nbyte, + uap->offset, FOF_OFFSET); + } + fdrop(fp, p); + return(error); } /* @@ -180,10 +194,12 @@ dofileread(p, fp, fd, buf, nbyte, offset, flags) } #endif cnt = nbyte; - if ((error = fo_read(fp, &auio, fp->f_cred, flags, p))) + + if ((error = fo_read(fp, &auio, fp->f_cred, flags, p))) { if (auio.uio_resid != cnt && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) error = 0; + } cnt -= auio.uio_resid; #ifdef KTRACE if (didktr && error == 0) { @@ -224,7 +240,7 @@ readv(p, uap) struct uio ktruio; #endif - if ((fp = getfp(fdp, uap->fd, FREAD)) == NULL) + if ((fp = holdfp(fdp, uap->fd, FREAD)) == NULL) return (EBADF); /* note: can't use iovlen until iovcnt is validated */ iovlen = uap->iovcnt * sizeof (struct iovec); @@ -265,10 +281,11 @@ readv(p, uap) } #endif cnt = auio.uio_resid; - if ((error = fo_read(fp, &auio, fp->f_cred, 0, p))) + if ((error = fo_read(fp, &auio, fp->f_cred, 0, p))) { if (auio.uio_resid != cnt && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) error = 0; + } cnt -= auio.uio_resid; #ifdef KTRACE if (ktriov != NULL) { @@ -283,6 +300,7 @@ readv(p, uap) #endif p->p_retval[0] = cnt; done: + fdrop(fp, p); if (needfree) FREE(needfree, M_IOV); return (error); @@ -304,10 +322,13 @@ write(p, uap) register struct write_args *uap; { register struct file *fp; + int error; - if ((fp = getfp(p->p_fd, uap->fd, FWRITE)) == NULL) + if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL) return (EBADF); - return (dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte, (off_t)-1, 0)); + error = dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte, (off_t)-1, 0); + fdrop(fp, p); + return(error); } /* @@ -328,13 +349,18 @@ pwrite(p, uap) register struct pwrite_args *uap; { register struct file *fp; + int error; - if ((fp = getfp(p->p_fd, uap->fd, FWRITE)) == NULL) + if ((fp = holdfp(p->p_fd, uap->fd, FWRITE)) == NULL) return (EBADF); - if (fp->f_type != DTYPE_VNODE) - return (ESPIPE); - return (dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte, uap->offset, - FOF_OFFSET)); + if (fp->f_type != DTYPE_VNODE) { + error = ESPIPE; + } else { + error = dofilewrite(p, fp, uap->fd, uap->buf, uap->nbyte, + uap->offset, FOF_OFFSET); + } + fdrop(fp, p); + return(error); } static int @@ -377,6 +403,7 @@ dofilewrite(p, fp, fd, buf, nbyte, offset, flags) } #endif cnt = nbyte; + bwillwrite(); if ((error = fo_write(fp, &auio, fp->f_cred, flags, p))) { if (auio.uio_resid != cnt && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) @@ -424,9 +451,8 @@ writev(p, uap) struct uio ktruio; #endif - if ((fp = getfp(fdp, uap->fd, FWRITE)) == NULL) + if ((fp = holdfp(fdp, uap->fd, FWRITE)) == NULL) return (EBADF); - fhold(fp); /* note: can't use iovlen until iovcnt is validated */ iovlen = uap->iovcnt * sizeof (struct iovec); if (uap->iovcnt > UIO_SMALLIOV) { @@ -549,30 +575,37 @@ ioctl(p, uap) size = IOCPARM_LEN(com); if (size > IOCPARM_MAX) return (ENOTTY); + + fhold(fp); + memp = NULL; if (size > sizeof (ubuf.stkbuf)) { memp = (caddr_t)malloc((u_long)size, M_IOCTLOPS, M_WAITOK); data = memp; - } else + } else { data = ubuf.stkbuf; + } if (com&IOC_IN) { if (size) { error = copyin(uap->data, data, (u_int)size); if (error) { if (memp) free(memp, M_IOCTLOPS); + fdrop(fp, p); return (error); } - } else + } else { *(caddr_t *)data = uap->data; - } else if ((com&IOC_OUT) && size) + } + } else if ((com&IOC_OUT) && size) { /* * Zero the buffer so the user always * gets back something deterministic. */ bzero(data, size); - else if (com&IOC_VOID) + } else if (com&IOC_VOID) { *(caddr_t *)data = uap->data; + } switch (com) { @@ -604,6 +637,7 @@ ioctl(p, uap) } if (memp) free(memp, M_IOCTLOPS); + fdrop(fp, p); return (error); } @@ -900,7 +934,7 @@ pollscan(p, fds, nfd) fds->revents = 0; } else { fp = fdp->fd_ofiles[fds->fd]; - if (fp == 0) { + if (fp == NULL) { fds->revents = POLLNVAL; n++; } else { diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c index 7600ffd..4c505e7 100644 --- a/sys/kern/sys_pipe.c +++ b/sys/kern/sys_pipe.c @@ -185,6 +185,7 @@ pipe(p, uap) error = falloc(p, &rf, &fd); if (error) goto free2; + fhold(rf); p->p_retval[0] = fd; rf->f_flag = FREAD | FWRITE; rf->f_type = DTYPE_PIPE; @@ -201,11 +202,15 @@ pipe(p, uap) rpipe->pipe_peer = wpipe; wpipe->pipe_peer = rpipe; + fdrop(rf, p); return (0); free3: - fdp->fd_ofiles[p->p_retval[0]] = 0; - ffree(rf); + if (fdp->fd_ofiles[p->p_retval[0]] == rf) { + fdp->fd_ofiles[p->p_retval[0]] = NULL; + fdrop(rf, p); + } + fdrop(rf, p); free2: (void)pipeclose(wpipe); (void)pipeclose(rpipe); diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c index ae7b7e9..fa6cb6a 100644 --- a/sys/kern/uipc_syscalls.c +++ b/sys/kern/uipc_syscalls.c @@ -123,10 +123,13 @@ socket(p, uap) error = falloc(p, &fp, &fd); if (error) return (error); + fhold(fp); error = socreate(uap->domain, &so, uap->type, uap->protocol, p); if (error) { - fdp->fd_ofiles[fd] = 0; - ffree(fp); + if (fdp->fd_ofiles[fd] == fp) { + fdp->fd_ofiles[fd] = NULL; + fdrop(fp, p); + } } else { fp->f_data = (caddr_t)so; fp->f_flag = FREAD|FWRITE; @@ -134,6 +137,7 @@ socket(p, uap) fp->f_type = DTYPE_SOCKET; p->p_retval[0] = fd; } + fdrop(fp, p); return (error); } @@ -151,14 +155,17 @@ bind(p, uap) struct sockaddr *sa; int error; - error = getsock(p->p_fd, uap->s, &fp); + error = holdsock(p->p_fd, uap->s, &fp); if (error) return (error); error = getsockaddr(&sa, uap->name, uap->namelen); - if (error) + if (error) { + fdrop(fp, p); return (error); + } error = sobind((struct socket *)fp->f_data, sa, p); FREE(sa, M_SONAME); + fdrop(fp, p); return (error); } @@ -174,10 +181,12 @@ listen(p, uap) struct file *fp; int error; - error = getsock(p->p_fd, uap->s, &fp); + error = holdsock(p->p_fd, uap->s, &fp); if (error) return (error); - return (solisten((struct socket *)fp->f_data, uap->backlog, p)); + error = solisten((struct socket *)fp->f_data, uap->backlog, p); + fdrop(fp, p); + return(error); } static int @@ -191,7 +200,8 @@ accept1(p, uap, compat) int compat; { struct filedesc *fdp = p->p_fd; - struct file *fp; + struct file *lfp = NULL; + struct file *nfp = NULL; struct sockaddr *sa; int namelen, error, s; struct socket *head, *so; @@ -204,18 +214,20 @@ accept1(p, uap, compat) if(error) return (error); } - error = getsock(fdp, uap->s, &fp); + error = holdsock(fdp, uap->s, &lfp); if (error) return (error); s = splnet(); - head = (struct socket *)fp->f_data; + head = (struct socket *)lfp->f_data; if ((head->so_options & SO_ACCEPTCONN) == 0) { splx(s); - return (EINVAL); + error = EINVAL; + goto done; } if ((head->so_state & SS_NBIO) && TAILQ_EMPTY(&head->so_comp)) { splx(s); - return (EWOULDBLOCK); + error = EWOULDBLOCK; + goto done; } while (TAILQ_EMPTY(&head->so_comp) && head->so_error == 0) { if (head->so_state & SS_CANTRCVMORE) { @@ -226,14 +238,14 @@ accept1(p, uap, compat) "accept", 0); if (error) { splx(s); - return (error); + goto done; } } if (head->so_error) { error = head->so_error; head->so_error = 0; splx(s); - return (error); + goto done; } /* @@ -247,8 +259,8 @@ accept1(p, uap, compat) TAILQ_REMOVE(&head->so_comp, so, so_list); head->so_qlen--; - fflag = fp->f_flag; - error = falloc(p, &fp, &fd); + fflag = lfp->f_flag; + error = falloc(p, &nfp, &fd); if (error) { /* * Probably ran out of file descriptors. Put the @@ -260,9 +272,10 @@ accept1(p, uap, compat) head->so_qlen++; wakeup_one(&head->so_timeo); splx(s); - return (error); - } else - p->p_retval[0] = fd; + goto done; + } + fhold(nfp); + p->p_retval[0] = fd; /* connection has been removed from the listen queue */ KNOTE(&head->so_rcv.sb_sel.si_note, 0); @@ -272,18 +285,19 @@ accept1(p, uap, compat) if (head->so_sigio != NULL) fsetown(fgetown(head->so_sigio), &so->so_sigio); - fp->f_data = (caddr_t)so; - fp->f_flag = fflag; - fp->f_ops = &socketops; - fp->f_type = DTYPE_SOCKET; + nfp->f_data = (caddr_t)so; + nfp->f_flag = fflag; + nfp->f_ops = &socketops; + nfp->f_type = DTYPE_SOCKET; sa = 0; (void) soaccept(so, &sa); - if (sa == 0) { + if (sa == NULL) { namelen = 0; if (uap->name) goto gotnoname; splx(s); - return 0; + error = 0; + goto done; } if (uap->name) { /* check sa_len before it is destroyed */ @@ -302,11 +316,26 @@ gotnoname: } if (sa) FREE(sa, M_SONAME); + + /* + * close the new descriptor, assuming someone hasn't ripped it + * out from under us. + */ if (error) { - fdp->fd_ofiles[fd] = 0; - ffree(fp); + if (fdp->fd_ofiles[fd] == nfp) { + fdp->fd_ofiles[fd] = NULL; + fdrop(nfp, p); + } } splx(s); + + /* + * Release explicitly held references before returning. + */ +done: + if (nfp != NULL) + fdrop(nfp, p); + fdrop(lfp, p); return (error); } @@ -345,21 +374,24 @@ connect(p, uap) struct sockaddr *sa; int error, s; - error = getsock(p->p_fd, uap->s, &fp); + error = holdsock(p->p_fd, uap->s, &fp); if (error) return (error); so = (struct socket *)fp->f_data; - if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) - return (EALREADY); + if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { + error = EALREADY; + goto done; + } error = getsockaddr(&sa, uap->name, uap->namelen); if (error) - return (error); + goto done; error = soconnect(so, sa, p); if (error) goto bad; if ((so->so_state & SS_NBIO) && (so->so_state & SS_ISCONNECTING)) { FREE(sa, M_SONAME); - return (EINPROGRESS); + error = EINPROGRESS; + goto done; } s = splnet(); while ((so->so_state & SS_ISCONNECTING) && so->so_error == 0) { @@ -378,6 +410,8 @@ bad: FREE(sa, M_SONAME); if (error == ERESTART) error = EINTR; +done: + fdrop(fp, p); return (error); } @@ -405,11 +439,13 @@ socketpair(p, uap) error = falloc(p, &fp1, &fd); if (error) goto free2; + fhold(fp1); sv[0] = fd; fp1->f_data = (caddr_t)so1; error = falloc(p, &fp2, &fd); if (error) goto free3; + fhold(fp2); fp2->f_data = (caddr_t)so2; sv[1] = fd; error = soconnect2(so1, so2); @@ -427,13 +463,21 @@ socketpair(p, uap) fp1->f_ops = fp2->f_ops = &socketops; fp1->f_type = fp2->f_type = DTYPE_SOCKET; error = copyout((caddr_t)sv, (caddr_t)uap->rsv, 2 * sizeof (int)); + fdrop(fp1, p); + fdrop(fp2, p); return (error); free4: - fdp->fd_ofiles[sv[1]] = 0; - ffree(fp2); + if (fdp->fd_ofiles[sv[1]] == fp2) { + fdp->fd_ofiles[sv[1]] = NULL; + fdrop(fp2, p); + } + fdrop(fp2, p); free3: - fdp->fd_ofiles[sv[0]] = 0; - ffree(fp1); + if (fdp->fd_ofiles[sv[0]] == fp1) { + fdp->fd_ofiles[sv[0]] = NULL; + fdrop(fp1, p); + } + fdrop(fp1, p); free2: (void)soclose(so2); free1: @@ -461,7 +505,7 @@ sendit(p, s, mp, flags) struct uio ktruio; #endif - error = getsock(p->p_fd, s, &fp); + error = holdsock(p->p_fd, s, &fp); if (error) return (error); auio.uio_iov = mp->msg_iov; @@ -473,15 +517,20 @@ sendit(p, s, mp, flags) auio.uio_resid = 0; iov = mp->msg_iov; for (i = 0; i < mp->msg_iovlen; i++, iov++) { - if ((auio.uio_resid += iov->iov_len) < 0) + if ((auio.uio_resid += iov->iov_len) < 0) { + fdrop(fp, p); return (EINVAL); + } } if (mp->msg_name) { error = getsockaddr(&to, mp->msg_name, mp->msg_namelen); - if (error) + if (error) { + fdrop(fp, p); return (error); - } else + } + } else { to = 0; + } if (mp->msg_control) { if (mp->msg_controllen < sizeof(struct cmsghdr) #ifdef COMPAT_OLDSOCK @@ -511,8 +560,9 @@ sendit(p, s, mp, flags) } } #endif - } else + } else { control = 0; + } #ifdef KTRACE if (KTRPOINT(p, KTR_GENIO)) { int iovlen = auio.uio_iovcnt * sizeof (struct iovec); @@ -546,6 +596,7 @@ sendit(p, s, mp, flags) } #endif bad: + fdrop(fp, p); if (to) FREE(to, M_SONAME); return (error); @@ -702,7 +753,7 @@ recvit(p, s, mp, namelenp) struct uio ktruio; #endif - error = getsock(p->p_fd, s, &fp); + error = holdsock(p->p_fd, s, &fp); if (error) return (error); auio.uio_iov = mp->msg_iov; @@ -714,8 +765,10 @@ recvit(p, s, mp, namelenp) auio.uio_resid = 0; iov = mp->msg_iov; for (i = 0; i < mp->msg_iovlen; i++, iov++) { - if ((auio.uio_resid += iov->iov_len) < 0) + if ((auio.uio_resid += iov->iov_len) < 0) { + fdrop(fp, p); return (EINVAL); + } } #ifdef KTRACE if (KTRPOINT(p, KTR_GENIO)) { @@ -827,6 +880,7 @@ recvit(p, s, mp, namelenp) mp->msg_controllen = ctlbuf - (caddr_t)mp->msg_control; } out: + fdrop(fp, p); if (fromsa) FREE(fromsa, M_SONAME); if (control) @@ -1011,10 +1065,12 @@ shutdown(p, uap) struct file *fp; int error; - error = getsock(p->p_fd, uap->s, &fp); + error = holdsock(p->p_fd, uap->s, &fp); if (error) return (error); - return (soshutdown((struct socket *)fp->f_data, uap->how)); + error = soshutdown((struct socket *)fp->f_data, uap->how); + fdrop(fp, p); + return(error); } /* ARGSUSED */ @@ -1038,7 +1094,7 @@ setsockopt(p, uap) if (uap->valsize < 0) return (EINVAL); - error = getsock(p->p_fd, uap->s, &fp); + error = holdsock(p->p_fd, uap->s, &fp); if (error) return (error); @@ -1048,8 +1104,9 @@ setsockopt(p, uap) sopt.sopt_val = uap->val; sopt.sopt_valsize = uap->valsize; sopt.sopt_p = p; - - return (sosetopt((struct socket *)fp->f_data, &sopt)); + error = sosetopt((struct socket *)fp->f_data, &sopt); + fdrop(fp, p); + return(error); } /* ARGSUSED */ @@ -1068,18 +1125,23 @@ getsockopt(p, uap) struct file *fp; struct sockopt sopt; - error = getsock(p->p_fd, uap->s, &fp); + error = holdsock(p->p_fd, uap->s, &fp); if (error) return (error); if (uap->val) { error = copyin((caddr_t)uap->avalsize, (caddr_t)&valsize, sizeof (valsize)); - if (error) + if (error) { + fdrop(fp, p); return (error); - if (valsize < 0) + } + if (valsize < 0) { + fdrop(fp, p); return (EINVAL); - } else + } + } else { valsize = 0; + } sopt.sopt_dir = SOPT_GET; sopt.sopt_level = uap->level; @@ -1094,6 +1156,7 @@ getsockopt(p, uap) error = copyout((caddr_t)&valsize, (caddr_t)uap->avalsize, sizeof (valsize)); } + fdrop(fp, p); return (error); } @@ -1116,12 +1179,14 @@ getsockname1(p, uap, compat) struct sockaddr *sa; int len, error; - error = getsock(p->p_fd, uap->fdes, &fp); + error = holdsock(p->p_fd, uap->fdes, &fp); if (error) return (error); error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len)); - if (error) + if (error) { + fdrop(fp, p); return (error); + } so = (struct socket *)fp->f_data; sa = 0; error = (*so->so_proto->pr_usrreqs->pru_sockaddr)(so, &sa); @@ -1145,6 +1210,7 @@ gotnothing: bad: if (sa) FREE(sa, M_SONAME); + fdrop(fp, p); return (error); } @@ -1187,15 +1253,19 @@ getpeername1(p, uap, compat) struct sockaddr *sa; int len, error; - error = getsock(p->p_fd, uap->fdes, &fp); + error = holdsock(p->p_fd, uap->fdes, &fp); if (error) return (error); so = (struct socket *)fp->f_data; - if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) + if ((so->so_state & (SS_ISCONNECTED|SS_ISCONFIRMING)) == 0) { + fdrop(fp, p); return (ENOTCONN); + } error = copyin((caddr_t)uap->alen, (caddr_t)&len, sizeof (len)); - if (error) + if (error) { + fdrop(fp, p); return (error); + } sa = 0; error = (*so->so_proto->pr_usrreqs->pru_peeraddr)(so, &sa); if (error) @@ -1216,7 +1286,9 @@ getpeername1(p, uap, compat) gotnothing: error = copyout((caddr_t)&len, (caddr_t)uap->alen, sizeof (len)); bad: - if (sa) FREE(sa, M_SONAME); + if (sa) + FREE(sa, M_SONAME); + fdrop(fp, p); return (error); } @@ -1307,21 +1379,31 @@ getsockaddr(namp, uaddr, len) return error; } +/* + * holdsock() - load the struct file pointer associated + * with a socket into *fpp. If an error occurs, non-zero + * will be returned and *fpp will be set to NULL. + */ int -getsock(fdp, fdes, fpp) +holdsock(fdp, fdes, fpp) struct filedesc *fdp; int fdes; struct file **fpp; { - register struct file *fp; + register struct file *fp = NULL; + int error = 0; if ((unsigned)fdes >= fdp->fd_nfiles || - (fp = fdp->fd_ofiles[fdes]) == NULL) - return (EBADF); - if (fp->f_type != DTYPE_SOCKET) - return (ENOTSOCK); + (fp = fdp->fd_ofiles[fdes]) == NULL) { + error = EBADF; + } else if (fp->f_type != DTYPE_SOCKET) { + error = ENOTSOCK; + fp = NULL; + } else { + fhold(fp); + } *fpp = fp; - return (0); + return(error); } /* @@ -1433,7 +1515,7 @@ sendfile(struct proc *p, struct sendfile_args *uap) * Do argument checking. Must be a regular file in, stream * type and connected socket out, positive offset. */ - fp = getfp(fdp, uap->fd, FREAD); + fp = holdfp(fdp, uap->fd, FREAD); if (fp == NULL) { error = EBADF; goto done; @@ -1448,7 +1530,8 @@ sendfile(struct proc *p, struct sendfile_args *uap) error = EINVAL; goto done; } - error = getsock(p->p_fd, uap->s, &fp); + fdrop(fp, p); + error = holdsock(p->p_fd, uap->s, &fp); if (error) goto done; so = (struct socket *)fp->f_data; @@ -1714,5 +1797,7 @@ done: } if (vp) vrele(vp); + if (fp) + fdrop(fp, p); return (error); } diff --git a/sys/kern/vfs_aio.c b/sys/kern/vfs_aio.c index a564886..d4a226c 100644 --- a/sys/kern/vfs_aio.c +++ b/sys/kern/vfs_aio.c @@ -589,6 +589,11 @@ aio_process(struct aiocblist *aiocbe) inblock_st = mycp->p_stats->p_ru.ru_inblock; oublock_st = mycp->p_stats->p_ru.ru_oublock; + /* + * Temporarily bump the ref count while reading to avoid the + * descriptor being ripped out from under us. + */ + fhold(fp); if (cb->aio_lio_opcode == LIO_READ) { auio.uio_rw = UIO_READ; error = fo_read(fp, &auio, fp->f_cred, FOF_OFFSET, mycp); @@ -596,6 +601,7 @@ aio_process(struct aiocblist *aiocbe) auio.uio_rw = UIO_WRITE; error = fo_write(fp, &auio, fp->f_cred, FOF_OFFSET, mycp); } + fdrop(fp, mycp); inblock_end = mycp->p_stats->p_ru.ru_inblock; oublock_end = mycp->p_stats->p_ru.ru_oublock; @@ -986,6 +992,8 @@ aio_qphysio(struct proc *p, struct aiocblist *aiocbe) if (ki->kaio_buffer_count >= ki->kaio_ballowed_count) return (-1); + fhold(fp); + ki->kaio_buffer_count++; lj = aiocbe->lio; @@ -1074,6 +1082,7 @@ aio_qphysio(struct proc *p, struct aiocblist *aiocbe) splx(s); if (notify) KNOTE(&aiocbe->klist, 0); + fdrop(fp, p); return 0; doerror: @@ -1082,6 +1091,7 @@ doerror: lj->lioj_buffer_count--; aiocbe->bp = NULL; relpbuf(bp, NULL); + fdrop(fp, p); return error; } @@ -1291,6 +1301,8 @@ _aio_aqueue(struct proc *p, struct aiocb *job, struct aio_liojob *lj, int type) return EINVAL; } + fhold(fp); + /* * XXX * Figure out how to do this properly. This currently won't @@ -1326,7 +1338,7 @@ aqueue_fail: TAILQ_INSERT_HEAD(&aio_freejobs, aiocbe, list); if (type == 0) suword(&job->_aiocb_private.error, error); - return (error); + goto done; } no_kqueue: } @@ -1363,18 +1375,19 @@ no_kqueue: ki->kaio_queue_count++; num_queue_count++; splx(s); - return 0; + error = 0; + goto done; } splx(s); } if ((error = aio_qphysio(p, aiocbe)) == 0) - return 0; - else if (error > 0) { + goto done; + if (error > 0) { suword(&job->_aiocb_private.status, 0); aiocbe->uaiocb._aiocb_private.error = error; suword(&job->_aiocb_private.error, error); - return error; + goto done; } /* No buffer for daemon I/O. */ @@ -1418,6 +1431,8 @@ retryproc: num_aio_resv_start--; } splx(s); +done: + fdrop(fp, p); return error; } @@ -1907,7 +1922,13 @@ aio_read(struct proc *p, struct aio_read_args *uap) auio.uio_procp = p; cnt = iocb.aio_nbytes; + /* + * Temporarily bump the ref count while reading to avoid the + * descriptor being ripped out from under us. + */ + fhold(fp); error = fo_read(fp, &auio, fp->f_cred, FOF_OFFSET, p); + fdrop(fp, p); if (error && (auio.uio_resid != cnt) && (error == ERESTART || error == EINTR || error == EWOULDBLOCK)) error = 0; @@ -1974,7 +1995,13 @@ aio_write(struct proc *p, struct aio_write_args *uap) auio.uio_procp = p; cnt = iocb.aio_nbytes; + /* + * Temporarily bump the ref count while writing to avoid the + * descriptor being ripped out from under us. + */ + fhold(fp); error = fo_write(fp, &auio, fp->f_cred, FOF_OFFSET, p); + fdrop(fp, p); if (error) { if (auio.uio_resid != cnt) { if (error == ERESTART || error == EINTR || error == diff --git a/sys/kern/vfs_extattr.c b/sys/kern/vfs_extattr.c index c24d227..7cf4663 100644 --- a/sys/kern/vfs_extattr.c +++ b/sys/kern/vfs_extattr.c @@ -71,6 +71,7 @@ #include <vm/vm.h> #include <vm/vm_object.h> #include <vm/vm_zone.h> +#include <vm/vm_page.h> static int change_dir __P((struct nameidata *ndp, struct proc *p)); static void checkdirs __P((struct vnode *olddp)); @@ -996,25 +997,65 @@ open(p, uap) cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); p->p_dupfd = -indx - 1; /* XXX check for fdopen */ + /* + * Bump the ref count to prevent another process from closing + * the descriptor while we are blocked in vn_open() + */ + fhold(fp); error = vn_open(&nd, &flags, cmode); if (error) { - ffree(fp); + /* + * release our own reference + */ + fdrop(fp, p); + + /* + * handle special fdopen() case. bleh. dupfdopen() is + * responsible for dropping the old contents of ofiles[indx] + * if it succeeds. + */ if ((error == ENODEV || error == ENXIO) && p->p_dupfd >= 0 && /* XXX from fdopen */ (error = - dupfdopen(fdp, indx, p->p_dupfd, flags, error)) == 0) { + dupfdopen(p, fdp, indx, p->p_dupfd, flags, error)) == 0) { p->p_retval[0] = indx; return (0); } + /* + * Clean up the descriptor, but only if another thread hadn't + * replaced or closed it. + */ + if (fdp->fd_ofiles[indx] == fp) { + fdp->fd_ofiles[indx] = NULL; + fdrop(fp, p); + } + if (error == ERESTART) error = EINTR; - fdp->fd_ofiles[indx] = NULL; return (error); } p->p_dupfd = 0; NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; + /* + * There should be 2 references on the file, one from the descriptor + * table, and one for us. + * + * Handle the case where someone closed the file (via its file + * descriptor) while we were blocked. The end result should look + * like opening the file succeeded but it was immediately closed. + */ + if (fp->f_count == 1) { + KASSERT(fdp->fd_ofiles[indx] != fp, + ("Open file descriptor lost all refs")); + VOP_UNLOCK(vp, 0, p); + vn_close(vp, flags & FMASK, fp->f_cred, p); + fdrop(fp, p); + p->p_retval[0] = indx; + return 0; + } + fp->f_data = (caddr_t)vp; fp->f_flag = flags & FMASK; fp->f_ops = &vnops; @@ -1051,12 +1092,19 @@ open(p, uap) /* assert that vn_open created a backing object if one is needed */ KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0, ("open: vmio vnode has no backing object after vn_open")); + /* + * Release our private reference, leaving the one associated with + * the descriptor table intact. + */ + fdrop(fp, p); p->p_retval[0] = indx; return (0); bad: - (void) vn_close(vp, fp->f_flag, fp->f_cred, p); - ffree(fp); - fdp->fd_ofiles[indx] = NULL; + if (fdp->fd_ofiles[indx] == fp) { + fdp->fd_ofiles[indx] = NULL; + fdrop(fp, p); + } + fdrop(fp, p); return (error); } @@ -3394,6 +3442,12 @@ fhopen(p, uap) if ((error = falloc(p, &nfp, &indx)) != 0) goto bad; fp = nfp; + + /* + * Hold an extra reference to avoid having fp ripped out + * from under us while we block in the lock op + */ + fhold(fp); nfp->f_data = (caddr_t)vp; nfp->f_flag = fmode & FMASK; nfp->f_ops = &vnops; @@ -3411,10 +3465,20 @@ fhopen(p, uap) type |= F_WAIT; VOP_UNLOCK(vp, 0, p); if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { - (void) vn_close(vp, fp->f_flag, fp->f_cred, p); - ffree(fp); - fdp->fd_ofiles[indx] = NULL; - return (error); + /* + * The lock request failed. Normally close the + * descriptor but handle the case where someone might + * have dup()d or close()d it when we weren't looking. + */ + if (fdp->fd_ofiles[indx] == fp) { + fdp->fd_ofiles[indx] = NULL; + fdrop(fp, p); + } + /* + * release our private reference + */ + fdrop(fp, p); + return(error); } vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); fp->f_flag |= FHASLOCK; @@ -3423,6 +3487,7 @@ fhopen(p, uap) vfs_object_create(vp, p, p->p_ucred); VOP_UNLOCK(vp, 0, p); + fdrop(fp, p); p->p_retval[0] = indx; return (0); diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index c24d227..7cf4663 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -71,6 +71,7 @@ #include <vm/vm.h> #include <vm/vm_object.h> #include <vm/vm_zone.h> +#include <vm/vm_page.h> static int change_dir __P((struct nameidata *ndp, struct proc *p)); static void checkdirs __P((struct vnode *olddp)); @@ -996,25 +997,65 @@ open(p, uap) cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), p); p->p_dupfd = -indx - 1; /* XXX check for fdopen */ + /* + * Bump the ref count to prevent another process from closing + * the descriptor while we are blocked in vn_open() + */ + fhold(fp); error = vn_open(&nd, &flags, cmode); if (error) { - ffree(fp); + /* + * release our own reference + */ + fdrop(fp, p); + + /* + * handle special fdopen() case. bleh. dupfdopen() is + * responsible for dropping the old contents of ofiles[indx] + * if it succeeds. + */ if ((error == ENODEV || error == ENXIO) && p->p_dupfd >= 0 && /* XXX from fdopen */ (error = - dupfdopen(fdp, indx, p->p_dupfd, flags, error)) == 0) { + dupfdopen(p, fdp, indx, p->p_dupfd, flags, error)) == 0) { p->p_retval[0] = indx; return (0); } + /* + * Clean up the descriptor, but only if another thread hadn't + * replaced or closed it. + */ + if (fdp->fd_ofiles[indx] == fp) { + fdp->fd_ofiles[indx] = NULL; + fdrop(fp, p); + } + if (error == ERESTART) error = EINTR; - fdp->fd_ofiles[indx] = NULL; return (error); } p->p_dupfd = 0; NDFREE(&nd, NDF_ONLY_PNBUF); vp = nd.ni_vp; + /* + * There should be 2 references on the file, one from the descriptor + * table, and one for us. + * + * Handle the case where someone closed the file (via its file + * descriptor) while we were blocked. The end result should look + * like opening the file succeeded but it was immediately closed. + */ + if (fp->f_count == 1) { + KASSERT(fdp->fd_ofiles[indx] != fp, + ("Open file descriptor lost all refs")); + VOP_UNLOCK(vp, 0, p); + vn_close(vp, flags & FMASK, fp->f_cred, p); + fdrop(fp, p); + p->p_retval[0] = indx; + return 0; + } + fp->f_data = (caddr_t)vp; fp->f_flag = flags & FMASK; fp->f_ops = &vnops; @@ -1051,12 +1092,19 @@ open(p, uap) /* assert that vn_open created a backing object if one is needed */ KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0, ("open: vmio vnode has no backing object after vn_open")); + /* + * Release our private reference, leaving the one associated with + * the descriptor table intact. + */ + fdrop(fp, p); p->p_retval[0] = indx; return (0); bad: - (void) vn_close(vp, fp->f_flag, fp->f_cred, p); - ffree(fp); - fdp->fd_ofiles[indx] = NULL; + if (fdp->fd_ofiles[indx] == fp) { + fdp->fd_ofiles[indx] = NULL; + fdrop(fp, p); + } + fdrop(fp, p); return (error); } @@ -3394,6 +3442,12 @@ fhopen(p, uap) if ((error = falloc(p, &nfp, &indx)) != 0) goto bad; fp = nfp; + + /* + * Hold an extra reference to avoid having fp ripped out + * from under us while we block in the lock op + */ + fhold(fp); nfp->f_data = (caddr_t)vp; nfp->f_flag = fmode & FMASK; nfp->f_ops = &vnops; @@ -3411,10 +3465,20 @@ fhopen(p, uap) type |= F_WAIT; VOP_UNLOCK(vp, 0, p); if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, type)) != 0) { - (void) vn_close(vp, fp->f_flag, fp->f_cred, p); - ffree(fp); - fdp->fd_ofiles[indx] = NULL; - return (error); + /* + * The lock request failed. Normally close the + * descriptor but handle the case where someone might + * have dup()d or close()d it when we weren't looking. + */ + if (fdp->fd_ofiles[indx] == fp) { + fdp->fd_ofiles[indx] = NULL; + fdrop(fp, p); + } + /* + * release our private reference + */ + fdrop(fp, p); + return(error); } vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, p); fp->f_flag |= FHASLOCK; @@ -3423,6 +3487,7 @@ fhopen(p, uap) vfs_object_create(vp, p, p->p_ucred); VOP_UNLOCK(vp, 0, p); + fdrop(fp, p); p->p_retval[0] = indx; return (0); diff --git a/sys/nfs/nfs_syscalls.c b/sys/nfs/nfs_syscalls.c index 1e0162f..83cc56b 100644 --- a/sys/nfs/nfs_syscalls.c +++ b/sys/nfs/nfs_syscalls.c @@ -194,7 +194,7 @@ nfssvc(p, uap) error = copyin(uap->argp, (caddr_t)&nfsdarg, sizeof(nfsdarg)); if (error) return (error); - error = getsock(p->p_fd, nfsdarg.sock, &fp); + error = holdsock(p->p_fd, nfsdarg.sock, &fp); if (error) return (error); /* @@ -205,10 +205,13 @@ nfssvc(p, uap) else { error = getsockaddr(&nam, nfsdarg.name, nfsdarg.namelen); - if (error) + if (error) { + fdrop(fp, p); return (error); + } } error = nfssvc_addsock(fp, nam, p); + fdrop(fp, p); } else { error = copyin(uap->argp, (caddr_t)nsd, sizeof (*nsd)); if (error) diff --git a/sys/nfsclient/nfs_nfsiod.c b/sys/nfsclient/nfs_nfsiod.c index 1e0162f..83cc56b 100644 --- a/sys/nfsclient/nfs_nfsiod.c +++ b/sys/nfsclient/nfs_nfsiod.c @@ -194,7 +194,7 @@ nfssvc(p, uap) error = copyin(uap->argp, (caddr_t)&nfsdarg, sizeof(nfsdarg)); if (error) return (error); - error = getsock(p->p_fd, nfsdarg.sock, &fp); + error = holdsock(p->p_fd, nfsdarg.sock, &fp); if (error) return (error); /* @@ -205,10 +205,13 @@ nfssvc(p, uap) else { error = getsockaddr(&nam, nfsdarg.name, nfsdarg.namelen); - if (error) + if (error) { + fdrop(fp, p); return (error); + } } error = nfssvc_addsock(fp, nam, p); + fdrop(fp, p); } else { error = copyin(uap->argp, (caddr_t)nsd, sizeof (*nsd)); if (error) diff --git a/sys/nfsserver/nfs_syscalls.c b/sys/nfsserver/nfs_syscalls.c index 1e0162f..83cc56b 100644 --- a/sys/nfsserver/nfs_syscalls.c +++ b/sys/nfsserver/nfs_syscalls.c @@ -194,7 +194,7 @@ nfssvc(p, uap) error = copyin(uap->argp, (caddr_t)&nfsdarg, sizeof(nfsdarg)); if (error) return (error); - error = getsock(p->p_fd, nfsdarg.sock, &fp); + error = holdsock(p->p_fd, nfsdarg.sock, &fp); if (error) return (error); /* @@ -205,10 +205,13 @@ nfssvc(p, uap) else { error = getsockaddr(&nam, nfsdarg.name, nfsdarg.namelen); - if (error) + if (error) { + fdrop(fp, p); return (error); + } } error = nfssvc_addsock(fp, nam, p); + fdrop(fp, p); } else { error = copyin(uap->argp, (caddr_t)nsd, sizeof (*nsd)); if (error) diff --git a/sys/svr4/svr4_signal.c b/sys/svr4/svr4_signal.c new file mode 100644 index 0000000..46407ba --- /dev/null +++ b/sys/svr4/svr4_signal.c @@ -0,0 +1,666 @@ +/* + * Copyright (c) 1998 Mark Newton + * Copyright (c) 1994 Christos Zoulas + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. The name of the author may not be used to endorse or promote products + * derived from this software without specific prior written permission + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/proc.h> +#include <sys/filedesc.h> +#include <sys/signal.h> +#include <sys/signalvar.h> +#include <sys/sysproto.h> + +#include <svr4/svr4.h> +#include <svr4/svr4_types.h> +#include <svr4/svr4_signal.h> +#include <svr4/svr4_proto.h> +#include <svr4/svr4_util.h> +#include <svr4/svr4_ucontext.h> + +#define svr4_sigmask(n) (1 << (((n) - 1) & 31)) +#define svr4_sigword(n) (((n) - 1) >> 5) +#define svr4_sigemptyset(s) memset((s), 0, sizeof(*(s))) +#define svr4_sigismember(s, n) ((s)->bits[svr4_sigword(n)] & svr4_sigmask(n)) +#define svr4_sigaddset(s, n) ((s)->bits[svr4_sigword(n)] |= svr4_sigmask(n)) + +void svr4_to_bsd_sigaction __P((const struct svr4_sigaction *, + struct sigaction *)); +void bsd_to_svr4_sigaction __P((const struct sigaction *, + struct svr4_sigaction *)); +void svr4_sigfillset __P((svr4_sigset_t *)); + +int bsd_to_svr4_sig[SVR4_SIGTBLSZ] = { + SVR4_SIGHUP, + SVR4_SIGINT, + SVR4_SIGQUIT, + SVR4_SIGILL, + SVR4_SIGTRAP, + SVR4_SIGABRT, + SVR4_SIGEMT, + SVR4_SIGFPE, + SVR4_SIGKILL, + SVR4_SIGBUS, + SVR4_SIGSEGV, + SVR4_SIGSYS, + SVR4_SIGPIPE, + SVR4_SIGALRM, + SVR4_SIGTERM, + SVR4_SIGURG, + SVR4_SIGSTOP, + SVR4_SIGTSTP, + SVR4_SIGCONT, + SVR4_SIGCHLD, + SVR4_SIGTTIN, + SVR4_SIGTTOU, + SVR4_SIGIO, + SVR4_SIGXCPU, + SVR4_SIGXFSZ, + SVR4_SIGVTALRM, + SVR4_SIGPROF, + SVR4_SIGWINCH, + 0, /* SIGINFO */ + SVR4_SIGUSR1, + SVR4_SIGUSR2, +}; + +int svr4_to_bsd_sig[SVR4_SIGTBLSZ] = { + SIGHUP, + SIGINT, + SIGQUIT, + SIGILL, + SIGTRAP, + SIGABRT, + SIGEMT, + SIGFPE, + SIGKILL, + SIGBUS, + SIGSEGV, + SIGSYS, + SIGPIPE, + SIGALRM, + SIGTERM, + SIGUSR1, + SIGUSR2, + SIGCHLD, + 0, /* XXX NetBSD uses SIGPWR here, but we don't seem to have one */ + SIGWINCH, + SIGURG, + SIGIO, + SIGSTOP, + SIGTSTP, + SIGCONT, + SIGTTIN, + SIGTTOU, + SIGVTALRM, + SIGPROF, + SIGXCPU, + SIGXFSZ, +}; + +void +svr4_sigfillset(s) + svr4_sigset_t *s; +{ + int i; + + svr4_sigemptyset(s); + for (i = 0; i < SVR4_NSIG; i++) + if (svr4_to_bsd_sig[i] != 0) + svr4_sigaddset(s, i); +} + +void +svr4_to_bsd_sigset(sss, bss) + const svr4_sigset_t *sss; + sigset_t *bss; +{ + int i, newsig; + + SIGEMPTYSET(*bss); + for (i = 0; i < SVR4_NSIG; i++) + if (svr4_sigismember(sss, i + 1)) { + newsig = svr4_to_bsd_sig[i]; + if (newsig) + SIGADDSET(*bss, newsig); + } +} + +void +bsd_to_svr4_sigset(bss, sss) + const sigset_t *bss; + svr4_sigset_t *sss; +{ + int i, newsig; + + svr4_sigemptyset(sss); + sss->bits[0] = bss->__bits[0] & ~((1U << SVR4_SIGTBLSZ) - 1); + sss->bits[1] = bss->__bits[1]; + sss->bits[2] = bss->__bits[2]; + sss->bits[3] = bss->__bits[3]; + for (i = 1; i <= SVR4_SIGTBLSZ; i++) { + if (SIGISMEMBER(*bss, i)) { + newsig = bsd_to_svr4_sig[_SIG_IDX(i)]; + if (newsig) + svr4_sigaddset(sss, newsig); + } + } +} + +/* + * XXX: Only a subset of the flags is currently implemented. + */ +void +svr4_to_bsd_sigaction(ssa, bsa) + const struct svr4_sigaction *ssa; + struct sigaction *bsa; +{ + + bsa->sa_handler = (sig_t) ssa->ssa_handler; + svr4_to_bsd_sigset(&ssa->ssa_mask, &bsa->sa_mask); + bsa->sa_flags = 0; + if ((ssa->ssa_flags & SVR4_SA_ONSTACK) != 0) + bsa->sa_flags |= SA_ONSTACK; + if ((ssa->ssa_flags & SVR4_SA_RESETHAND) != 0) + bsa->sa_flags |= SA_RESETHAND; + if ((ssa->ssa_flags & SVR4_SA_RESTART) != 0) + bsa->sa_flags |= SA_RESTART; + if ((ssa->ssa_flags & SVR4_SA_SIGINFO) != 0) + DPRINTF(("svr4_to_bsd_sigaction: SA_SIGINFO ignored\n")); + if ((ssa->ssa_flags & SVR4_SA_NOCLDSTOP) != 0) + bsa->sa_flags |= SA_NOCLDSTOP; + if ((ssa->ssa_flags & SVR4_SA_NODEFER) != 0) + bsa->sa_flags |= SA_NODEFER; + if ((ssa->ssa_flags & SVR4_SA_NOCLDWAIT) != 0) + bsa->sa_flags |= SA_NOCLDWAIT; + if ((ssa->ssa_flags & ~SVR4_SA_ALLBITS) != 0) + DPRINTF(("svr4_to_bsd_sigaction: extra bits ignored\n")); +} + +void +bsd_to_svr4_sigaction(bsa, ssa) + const struct sigaction *bsa; + struct svr4_sigaction *ssa; +{ + + ssa->ssa_handler = (svr4_sig_t) bsa->sa_handler; + bsd_to_svr4_sigset(&bsa->sa_mask, &ssa->ssa_mask); + ssa->ssa_flags = 0; + if ((bsa->sa_flags & SA_ONSTACK) != 0) + ssa->ssa_flags |= SVR4_SA_ONSTACK; + if ((bsa->sa_flags & SA_RESETHAND) != 0) + ssa->ssa_flags |= SVR4_SA_RESETHAND; + if ((bsa->sa_flags & SA_RESTART) != 0) + ssa->ssa_flags |= SVR4_SA_RESTART; + if ((bsa->sa_flags & SA_NODEFER) != 0) + ssa->ssa_flags |= SVR4_SA_NODEFER; + if ((bsa->sa_flags & SA_NOCLDSTOP) != 0) + ssa->ssa_flags |= SVR4_SA_NOCLDSTOP; +} + +void +svr4_to_bsd_sigaltstack(sss, bss) + const struct svr4_sigaltstack *sss; + struct sigaltstack *bss; +{ + + bss->ss_sp = sss->ss_sp; + bss->ss_size = sss->ss_size; + bss->ss_flags = 0; + if ((sss->ss_flags & SVR4_SS_DISABLE) != 0) + bss->ss_flags |= SS_DISABLE; + if ((sss->ss_flags & SVR4_SS_ONSTACK) != 0) + bss->ss_flags |= SS_ONSTACK; + if ((sss->ss_flags & ~SVR4_SS_ALLBITS) != 0) + /*XXX*/ uprintf("svr4_to_bsd_sigaltstack: extra bits ignored\n"); +} + +void +bsd_to_svr4_sigaltstack(bss, sss) + const struct sigaltstack *bss; + struct svr4_sigaltstack *sss; +{ + + sss->ss_sp = bss->ss_sp; + sss->ss_size = bss->ss_size; + sss->ss_flags = 0; + if ((bss->ss_flags & SS_DISABLE) != 0) + sss->ss_flags |= SVR4_SS_DISABLE; + if ((bss->ss_flags & SS_ONSTACK) != 0) + sss->ss_flags |= SVR4_SS_ONSTACK; +} + +int +svr4_sys_sigaction(p, uap) + register struct proc *p; + struct svr4_sys_sigaction_args *uap; +{ + struct svr4_sigaction *nisa, *oisa, tmpisa; + struct sigaction *nbsa, *obsa, tmpbsa; + struct sigaction_args sa; + caddr_t sg; + int error; + + DPRINTF(("@@@ svr4_sys_sigaction(%d, %d, %d)\n", p->p_pid, + SCARG(uap, signum), + SVR4_SVR42BSD_SIG(SCARG(uap, signum)))); + + sg = stackgap_init(); + nisa = SCARG(uap, nsa); + oisa = SCARG(uap, osa); + + if (oisa != NULL) + obsa = stackgap_alloc(&sg, sizeof(struct sigaction)); + else + obsa = NULL; + + if (nisa != NULL) { + nbsa = stackgap_alloc(&sg, sizeof(struct sigaction)); + if ((error = copyin(nisa, &tmpisa, sizeof(tmpisa))) != 0) + return error; + svr4_to_bsd_sigaction(&tmpisa, &tmpbsa); + if ((error = copyout(&tmpbsa, nbsa, sizeof(tmpbsa))) != 0) + return error; + } else + nbsa = NULL; + +#if defined(DEBUG_SVR4) + { + int i; + for (i = 0; i < 4; i++) + DPRINTF(("\tssa_mask[%d] = %lx\n", i, + nisa->ssa_mask.bits[i])); + DPRINTF(("\tssa_handler = %lx\n", nisa->ssa_handler)); + } +#endif + + SCARG(&sa, sig) = SVR4_SVR42BSD_SIG(SCARG(uap, signum)); + SCARG(&sa, act) = nbsa; + SCARG(&sa, oact) = obsa; + + if ((error = sigaction(p, &sa)) != 0) + return error; + + if (oisa != NULL) { + if ((error = copyin(obsa, &tmpbsa, sizeof(tmpbsa))) != 0) + return error; + bsd_to_svr4_sigaction(&tmpbsa, &tmpisa); + if ((error = copyout(&tmpisa, oisa, sizeof(tmpisa))) != 0) + return error; + } + + return 0; +} + +int +svr4_sys_sigaltstack(p, uap) + register struct proc *p; + struct svr4_sys_sigaltstack_args *uap; +{ + struct svr4_sigaltstack *nsss, *osss, tmpsss; + struct sigaltstack *nbss, *obss, tmpbss; + struct sigaltstack_args sa; + caddr_t sg; + int error, *retval; + + retval = p->p_retval; + sg = stackgap_init(); + nsss = SCARG(uap, nss); + osss = SCARG(uap, oss); + + if (osss != NULL) + obss = stackgap_alloc(&sg, sizeof(struct sigaltstack)); + else + obss = NULL; + + if (nsss != NULL) { + nbss = stackgap_alloc(&sg, sizeof(struct sigaltstack)); + if ((error = copyin(nsss, &tmpsss, sizeof(tmpsss))) != 0) + return error; + svr4_to_bsd_sigaltstack(&tmpsss, &tmpbss); + if ((error = copyout(&tmpbss, nbss, sizeof(tmpbss))) != 0) + return error; + } else + nbss = NULL; + + SCARG(&sa, ss) = nbss; + SCARG(&sa, oss) = obss; + + if ((error = sigaltstack(p, &sa)) != 0) + return error; + + if (obss != NULL) { + if ((error = copyin(obss, &tmpbss, sizeof(tmpbss))) != 0) + return error; + bsd_to_svr4_sigaltstack(&tmpbss, &tmpsss); + if ((error = copyout(&tmpsss, osss, sizeof(tmpsss))) != 0) + return error; + } + + return 0; +} + +/* + * Stolen from the ibcs2 one + */ +int +svr4_sys_signal(p, uap) + register struct proc *p; + struct svr4_sys_signal_args *uap; +{ + int signum; + int error, *retval = p->p_retval; + caddr_t sg = stackgap_init(); + + DPRINTF(("@@@ svr4_sys_signal(%d)\n", p->p_pid)); + + signum = SVR4_SVR42BSD_SIG(SVR4_SIGNO(SCARG(uap, signum))); + if (signum <= 0 || signum > SVR4_NSIG) + return (EINVAL); + + switch (SVR4_SIGCALL(SCARG(uap, signum))) { + case SVR4_SIGDEFER_MASK: + if (SCARG(uap, handler) == SVR4_SIG_HOLD) + goto sighold; + /* FALLTHROUGH */ + + case SVR4_SIGNAL_MASK: + { + struct sigaction_args sa_args; + struct sigaction *nbsa, *obsa, sa; + + nbsa = stackgap_alloc(&sg, sizeof(struct sigaction)); + obsa = stackgap_alloc(&sg, sizeof(struct sigaction)); + SCARG(&sa_args, sig) = signum; + SCARG(&sa_args, act) = nbsa; + SCARG(&sa_args, oact) = obsa; + + sa.sa_handler = (sig_t) SCARG(uap, handler); + SIGEMPTYSET(sa.sa_mask); + sa.sa_flags = 0; + + if (signum != SIGALRM) + sa.sa_flags = SA_RESTART; + + if ((error = copyout(&sa, nbsa, sizeof(sa))) != 0) + return error; + if ((error = sigaction(p, &sa_args)) != 0) { + DPRINTF(("signal: sigaction failed: %d\n", + error)); + *retval = (int)SVR4_SIG_ERR; + return error; + } + if ((error = copyin(obsa, &sa, sizeof(sa))) != 0) + return error; + *retval = (int)sa.sa_handler; + return 0; + } + + case SVR4_SIGHOLD_MASK: +sighold: + { + struct sigprocmask_args sa; + sigset_t *set; + + set = stackgap_alloc(&sg, sizeof(sigset_t)); + SIGEMPTYSET(*set); + SIGADDSET(*set, signum); + SCARG(&sa, how) = SIG_BLOCK; + SCARG(&sa, set) = set; + SCARG(&sa, oset) = NULL; + return sigprocmask(p, &sa); + } + + case SVR4_SIGRELSE_MASK: + { + struct sigprocmask_args sa; + sigset_t *set; + + set = stackgap_alloc(&sg, sizeof(sigset_t)); + SIGEMPTYSET(*set); + SIGADDSET(*set, signum); + SCARG(&sa, how) = SIG_UNBLOCK; + SCARG(&sa, set) = set; + SCARG(&sa, oset) = NULL; + return sigprocmask(p, &sa); + } + + case SVR4_SIGIGNORE_MASK: + { + struct sigaction_args sa_args; + struct sigaction *bsa, sa; + + bsa = stackgap_alloc(&sg, sizeof(struct sigaction)); + SCARG(&sa_args, sig) = signum; + SCARG(&sa_args, act) = bsa; + SCARG(&sa_args, oact) = NULL; + + sa.sa_handler = SIG_IGN; + SIGEMPTYSET(sa.sa_mask); + sa.sa_flags = 0; + if ((error = copyout(&sa, bsa, sizeof(sa))) != 0) + return error; + if ((error = sigaction(p, &sa_args)) != 0) { + DPRINTF(("sigignore: sigaction failed\n")); + return error; + } + return 0; + } + + case SVR4_SIGPAUSE_MASK: + { + struct sigsuspend_args sa; + sigset_t *set; + + set = stackgap_alloc(&sg, sizeof(sigset_t)); + *set = p->p_sigmask; + SIGDELSET(*set, signum); + SCARG(&sa, sigmask) = set; + return sigsuspend(p, &sa); + } + + default: + return (ENOSYS); + } +} + + +int +svr4_sys_sigprocmask(p, uap) + struct proc *p; + struct svr4_sys_sigprocmask_args *uap; +{ + svr4_sigset_t sss; + sigset_t bss; + int error = 0, *retval; + + retval = p->p_retval; + if (SCARG(uap, oset) != NULL) { + /* Fix the return value first if needed */ + bsd_to_svr4_sigset(&p->p_sigmask, &sss); + if ((error = copyout(&sss, SCARG(uap, oset), sizeof(sss))) != 0) + return error; + } + + if (SCARG(uap, set) == NULL) + /* Just examine */ + return 0; + + if ((error = copyin(SCARG(uap, set), &sss, sizeof(sss))) != 0) + return error; + + svr4_to_bsd_sigset(&sss, &bss); + + mtx_enter_sched_quick(); + + switch (SCARG(uap, how)) { + case SVR4_SIG_BLOCK: + SIGSETOR(p->p_sigmask, bss); + SIG_CANTMASK(p->p_sigmask); + break; + + case SVR4_SIG_UNBLOCK: + SIGSETNAND(p->p_sigmask, bss); + break; + + case SVR4_SIG_SETMASK: + p->p_sigmask = bss; + SIG_CANTMASK(p->p_sigmask); + break; + + default: + error = EINVAL; + break; + } + + mtx_exit_sched_quick(); + + return error; +} + +int +svr4_sys_sigpending(p, uap) + struct proc *p; + struct svr4_sys_sigpending_args *uap; +{ + sigset_t bss; + int *retval; + svr4_sigset_t sss; + + DPRINTF(("@@@ svr4_sys_sigpending(%d)\n", p->p_pid)); + retval = p->p_retval; + switch (SCARG(uap, what)) { + case 1: /* sigpending */ + if (SCARG(uap, mask) == NULL) + return 0; + bss = p->p_siglist; + SIGSETAND(bss, p->p_sigmask); + bsd_to_svr4_sigset(&bss, &sss); + break; + + case 2: /* sigfillset */ + svr4_sigfillset(&sss); +#if defined(DEBUG_SVR4) + { + int i; + for (i = 0; i < 4; i++) + DPRINTF(("new sigset[%d] = %lx\n", i, (long)sss.bits[i])); + } +#endif + break; + + default: + return EINVAL; + } + + return copyout(&sss, SCARG(uap, mask), sizeof(sss)); +} + +int +svr4_sys_sigsuspend(p, uap) + register struct proc *p; + struct svr4_sys_sigsuspend_args *uap; +{ + svr4_sigset_t sss; + sigset_t *bss; + struct sigsuspend_args sa; + int error; + caddr_t sg = stackgap_init(); + + if ((error = copyin(SCARG(uap, ss), &sss, sizeof(sss))) != 0) + return error; + + bss = stackgap_alloc(&sg, sizeof(sigset_t)); + svr4_to_bsd_sigset(&sss, bss); + + SCARG(&sa, sigmask) = bss; + return sigsuspend(p, &sa); +} + + +int +svr4_sys_kill(p, uap) + register struct proc *p; + struct svr4_sys_kill_args *uap; +{ + struct kill_args ka; + + SCARG(&ka, pid) = SCARG(uap, pid); + SCARG(&ka, signum) = SVR4_SVR42BSD_SIG(SCARG(uap, signum)); + return kill(p, &ka); +} + + +int +svr4_sys_context(p, uap) + register struct proc *p; + struct svr4_sys_context_args *uap; +{ + struct svr4_ucontext uc; + int error; + + switch (uap->func) { + case 0: + DPRINTF(("getcontext(%p)\n", uap->uc)); + svr4_getcontext(p, &uc, &p->p_sigmask, + p->p_sigstk.ss_flags & SS_ONSTACK); + return copyout(&uc, uap->uc, sizeof(uc)); + + case 1: + DPRINTF(("setcontext(%p)\n", uap->uc)); + if ((error = copyin(uap->uc, &uc, sizeof(uc))) != 0) + return error; + DPRINTF(("uc_flags = %lx\n", uc.uc_flags)); +#if defined(DEBUG_SVR4) + { + int i; + for (i = 0; i < 4; i++) + DPRINTF(("uc_sigmask[%d] = %lx\n", i, + uc.uc_sigmask.bits[i])); + } +#endif + return svr4_setcontext(p, &uc); + + default: + DPRINTF(("context(%d, %p)\n", uap->func, + uap->uc)); + return ENOSYS; + } + return 0; +} + +int +svr4_sys_pause(p, uap) + register struct proc *p; + struct svr4_sys_pause_args *uap; +{ + struct sigsuspend_args bsa; + + SCARG(&bsa, sigmask) = &p->p_sigmask; + return sigsuspend(p, &bsa); +} diff --git a/sys/sys/filedesc.h b/sys/sys/filedesc.h index 1170bae..273ddf1 100644 --- a/sys/sys/filedesc.h +++ b/sys/sys/filedesc.h @@ -92,7 +92,9 @@ struct filedesc0 { * Per-process open flags. */ #define UF_EXCLOSE 0x01 /* auto-close on exec */ +#if 0 #define UF_MAPPED 0x02 /* mapped from device */ +#endif /* * Storage required per open file descriptor. @@ -125,7 +127,7 @@ SLIST_HEAD(sigiolst, sigio); #ifdef _KERNEL int closef __P((struct file *fp, struct proc *p)); -int dupfdopen __P((struct filedesc *fdp, int indx, int dfd, int mode, +int dupfdopen __P((struct proc *p, struct filedesc *fdp, int indx, int dfd, int mode, int error)); int falloc __P((struct proc *p, struct file **resultfp, int *resultfd)); int fdalloc __P((struct proc *p, int want, int *result)); @@ -140,7 +142,7 @@ pid_t fgetown __P((struct sigio *sigio)); int fsetown __P((pid_t pgid, struct sigio **sigiop)); void funsetown __P((struct sigio *sigio)); void funsetownlst __P((struct sigiolst *sigiolst)); -struct file *getfp __P((struct filedesc* fdp, int fd, int flag)); +struct file *holdfp __P((struct filedesc *fdp, int fd, int flag)); int getvnode __P((struct filedesc *fdp, int fd, struct file **fpp)); void setugidsafety __P((struct proc *p)); diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h index 5f2cd9a..e97d762 100644 --- a/sys/sys/socketvar.h +++ b/sys/sys/socketvar.h @@ -324,7 +324,7 @@ int soo_stat __P((struct file *fp, struct stat *ub, struct proc *p)); * From uipc_socket and friends */ struct sockaddr *dup_sockaddr __P((struct sockaddr *sa, int canwait)); -int getsock __P((struct filedesc *fdp, int fdes, struct file **fpp)); +int holdsock __P((struct filedesc *fdp, int fdes, struct file **fpp)); int sockargs __P((struct mbuf **mp, caddr_t buf, int buflen, int type)); int getsockaddr __P((struct sockaddr **namp, caddr_t uaddr, size_t len)); void sbappend __P((struct sockbuf *sb, struct mbuf *m)); diff --git a/sys/vm/swap_pager.c b/sys/vm/swap_pager.c index b1f4802..6a427c9 100644 --- a/sys/vm/swap_pager.c +++ b/sys/vm/swap_pager.c @@ -1542,8 +1542,10 @@ swp_pager_async_iodone(bp) * be overridden by the original caller of * getpages so don't play cute tricks here. * - * XXX it may not be legal to free the page - * here as this messes with the object->memq's. + * XXX IT IS NOT LEGAL TO FREE THE PAGE HERE + * AS THIS MESSES WITH object->memq, and it is + * not legal to mess with object->memq from an + * interrupt. */ m->valid = 0; diff --git a/sys/vm/vm_mmap.c b/sys/vm/vm_mmap.c index 43d75a5..6328ff7 100644 --- a/sys/vm/vm_mmap.c +++ b/sys/vm/vm_mmap.c @@ -188,7 +188,7 @@ mmap(p, uap) register struct mmap_args *uap; { register struct filedesc *fdp = p->p_fd; - register struct file *fp; + register struct file *fp = NULL; struct vnode *vp; vm_offset_t addr; vm_size_t size, pageoff; @@ -283,6 +283,12 @@ mmap(p, uap) return (EBADF); if (fp->f_type != DTYPE_VNODE) return (EINVAL); + + /* + * don't let the descriptor disappear on us if we block + */ + fhold(fp); + /* * POSIX shared-memory objects are defined to have * kernel persistence, and are not defined to support @@ -332,8 +338,10 @@ mmap(p, uap) else disablexworkaround = suser(p); if (vp->v_type == VCHR && disablexworkaround && - (flags & (MAP_PRIVATE|MAP_COPY))) - return (EINVAL); + (flags & (MAP_PRIVATE|MAP_COPY))) { + error = EINVAL; + goto done; + } /* * Ensure that file and memory protections are * compatible. Note that we only worry about @@ -344,10 +352,12 @@ mmap(p, uap) * proc does a setuid? */ maxprot = VM_PROT_EXECUTE; /* ??? */ - if (fp->f_flag & FREAD) + if (fp->f_flag & FREAD) { maxprot |= VM_PROT_READ; - else if (prot & PROT_READ) - return (EACCES); + } else if (prot & PROT_READ) { + error = EACCES; + goto done; + } /* * If we are sharing potential changes (either via * MAP_SHARED or via the implicit sharing of character @@ -364,17 +374,23 @@ mmap(p, uap) struct vattr va; if ((error = VOP_GETATTR(vp, &va, - p->p_ucred, p))) - return (error); + p->p_ucred, p))) { + goto done; + } if ((va.va_flags & - (SF_SNAPSHOT|IMMUTABLE|APPEND)) == 0) + (SF_SNAPSHOT|IMMUTABLE|APPEND)) == 0) { maxprot |= VM_PROT_WRITE; - else if (prot & PROT_WRITE) - return (EPERM); - } else if ((prot & PROT_WRITE) != 0) - return (EACCES); - } else + } else if (prot & PROT_WRITE) { + error = EPERM; + goto done; + } + } else if ((prot & PROT_WRITE) != 0) { + error = EACCES; + goto done; + } + } else { maxprot |= VM_PROT_WRITE; + } handle = (void *)vp; } @@ -387,13 +403,17 @@ mmap(p, uap) */ if (max_proc_mmap && vms->vm_map.nentries >= max_proc_mmap * vms->vm_refcnt) { - return (ENOMEM); + error = ENOMEM; + goto done; } error = vm_mmap(&vms->vm_map, &addr, size, prot, maxprot, flags, handle, pos); if (error == 0) p->p_retval[0] = (register_t) (addr + pageoff); +done: + if (fp) + fdrop(fp, p); return (error); } @@ -576,6 +596,7 @@ munmap(p, uap) return (0); } +#if 0 void munmapfd(p, fd) struct proc *p; @@ -586,6 +607,7 @@ munmapfd(p, fd) */ p->p_fd->fd_ofileflags[fd] &= ~UF_MAPPED; } +#endif #ifndef _SYS_SYSPROTO_H_ struct mprotect_args { |