diff options
author | tegge <tegge@FreeBSD.org> | 2003-06-02 16:05:32 +0000 |
---|---|---|
committer | tegge <tegge@FreeBSD.org> | 2003-06-02 16:05:32 +0000 |
commit | e41badac0ac3cc9aef1d142d0fa7cd6fd8524008 (patch) | |
tree | f3663ac6adef95bd877d201d001e851bc72e825d /sys | |
parent | 0078b69f61a8569341ecc037caa661b9951d2ff6 (diff) | |
download | FreeBSD-src-e41badac0ac3cc9aef1d142d0fa7cd6fd8524008.zip FreeBSD-src-e41badac0ac3cc9aef1d142d0fa7cd6fd8524008.tar.gz |
Add tracking of process leaders sharing a file descriptor table and
allow a file descriptor table to be shared between multiple process
leaders.
PR: 50923
Diffstat (limited to 'sys')
-rw-r--r-- | sys/kern/init_main.c | 1 | ||||
-rw-r--r-- | sys/kern/kern_descrip.c | 202 | ||||
-rw-r--r-- | sys/kern/kern_fork.c | 42 | ||||
-rw-r--r-- | sys/sys/filedesc.h | 29 | ||||
-rw-r--r-- | sys/sys/proc.h | 1 |
5 files changed, 256 insertions, 19 deletions
diff --git a/sys/kern/init_main.c b/sys/kern/init_main.c index 6caec77..b3920bd 100644 --- a/sys/kern/init_main.c +++ b/sys/kern/init_main.c @@ -407,6 +407,7 @@ proc0_init(void *dummy __unused) /* Create the file descriptor table. */ fdp = &filedesc0; p->p_fd = &fdp->fd_fd; + p->p_fdtol = NULL; mtx_init(&fdp->fd_fd.fd_mtx, FILEDESC_LOCK_DESC, NULL, MTX_DEF); fdp->fd_fd.fd_refcnt = 1; fdp->fd_fd.fd_cmask = cmask; diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index 5df41d3..cc7be07 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -73,6 +73,8 @@ #include <vm/uma.h> static MALLOC_DEFINE(M_FILEDESC, "file desc", "Open file descriptor table"); +static MALLOC_DEFINE(M_FILEDESC_TO_LEADER, "file desc to leader", + "file desc to leader structures"); static MALLOC_DEFINE(M_SIGIO, "sigio", "sigio structures"); static uma_zone_t file_zone; @@ -456,6 +458,7 @@ do_dup(td, type, old, new, retval) struct file *fp; struct file *delfp; int error, newfd; + int holdleaders; p = td->td_proc; fdp = p->p_fd; @@ -520,6 +523,15 @@ do_dup(td, type, old, new, retval) * introducing an ownership race for the slot. */ delfp = fdp->fd_ofiles[new]; + if (delfp != NULL && p->p_fdtol != NULL) { + /* + * Ask fdfree() to sleep to ensure that all relevant + * process leaders can be traversed in closef(). + */ + fdp->fd_holdleaderscount++; + holdleaders = 1; + } else + holdleaders = 0; KASSERT(delfp == NULL || type == DUP_FIXED, ("dup() picked an open file")); #if 0 @@ -546,6 +558,16 @@ do_dup(td, type, old, new, retval) mtx_lock(&Giant); (void) closef(delfp, td); mtx_unlock(&Giant); + if (holdleaders) { + FILEDESC_LOCK(fdp); + fdp->fd_holdleaderscount--; + if (fdp->fd_holdleaderscount == 0 && + fdp->fd_holdleaderswakeup != 0) { + fdp->fd_holdleaderswakeup = 0; + wakeup(&fdp->fd_holdleaderscount); + } + FILEDESC_UNLOCK(fdp); + } } return (0); } @@ -793,9 +815,11 @@ close(td, uap) struct filedesc *fdp; struct file *fp; int fd, error; + int holdleaders; fd = uap->fd; error = 0; + holdleaders = 0; fdp = td->td_proc->p_fd; mtx_lock(&Giant); FILEDESC_LOCK(fdp); @@ -811,6 +835,14 @@ close(td, uap) #endif fdp->fd_ofiles[fd] = NULL; fdp->fd_ofileflags[fd] = 0; + if (td->td_proc->p_fdtol != NULL) { + /* + * Ask fdfree() to sleep to ensure that all relevant + * process leaders can be traversed in closef(). + */ + fdp->fd_holdleaderscount++; + holdleaders = 1; + } /* * we now hold the fp reference that used to be owned by the descriptor @@ -829,6 +861,16 @@ close(td, uap) error = closef(fp, td); done2: mtx_unlock(&Giant); + if (holdleaders) { + FILEDESC_LOCK(fdp); + fdp->fd_holdleaderscount--; + if (fdp->fd_holdleaderscount == 0 && + fdp->fd_holdleaderswakeup != 0) { + fdp->fd_holdleaderswakeup = 0; + wakeup(&fdp->fd_holdleaderscount); + } + FILEDESC_UNLOCK(fdp); + } return (error); } @@ -1382,12 +1424,88 @@ fdfree(td) struct filedesc *fdp; struct file **fpp; int i; + struct filedesc_to_leader *fdtol; + struct file *fp; + struct vnode *vp; + struct flock lf; /* Certain daemons might not have file descriptors. */ fdp = td->td_proc->p_fd; if (fdp == NULL) return; + /* Check for special need to clear POSIX style locks */ + fdtol = td->td_proc->p_fdtol; + if (fdtol != NULL) { + FILEDESC_LOCK(fdp); + KASSERT(fdtol->fdl_refcount > 0, + ("filedesc_to_refcount botch: fdl_refcount=%d", + fdtol->fdl_refcount)); + if (fdtol->fdl_refcount == 1 && + (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { + i = 0; + fpp = fdp->fd_ofiles; + for (i = 0, fpp = fdp->fd_ofiles; + i < fdp->fd_lastfile; + i++, fpp++) { + if (*fpp == NULL || + (*fpp)->f_type != DTYPE_VNODE) + continue; + fp = *fpp; + fhold(fp); + FILEDESC_UNLOCK(fdp); + lf.l_whence = SEEK_SET; + lf.l_start = 0; + lf.l_len = 0; + lf.l_type = F_UNLCK; + vp = fp->f_data; + (void) VOP_ADVLOCK(vp, + (caddr_t)td->td_proc-> + p_leader, + F_UNLCK, + &lf, + F_POSIX); + FILEDESC_LOCK(fdp); + fdrop(fp, td); + fpp = fdp->fd_ofiles + i; + } + } + retry: + if (fdtol->fdl_refcount == 1) { + if (fdp->fd_holdleaderscount > 0 && + (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { + /* + * close() or do_dup() has cleared a reference + * in a shared file descriptor table. + */ + fdp->fd_holdleaderswakeup = 1; + msleep(&fdp->fd_holdleaderscount, &fdp->fd_mtx, + PLOCK, "fdlhold", 0); + goto retry; + } + if (fdtol->fdl_holdcount > 0) { + /* + * Ensure that fdtol->fdl_leader + * remains valid in closef(). + */ + fdtol->fdl_wakeup = 1; + msleep(fdtol, &fdp->fd_mtx, + PLOCK, "fdlhold", 0); + goto retry; + } + } + fdtol->fdl_refcount--; + if (fdtol->fdl_refcount == 0 && + fdtol->fdl_holdcount == 0) { + fdtol->fdl_next->fdl_prev = fdtol->fdl_prev; + fdtol->fdl_prev->fdl_next = fdtol->fdl_next; + } else + fdtol = NULL; + td->td_proc->p_fdtol = NULL; + FILEDESC_UNLOCK(fdp); + if (fdtol != NULL) + FREE(fdtol, M_FILEDESC_TO_LEADER); + } FILEDESC_LOCK(fdp); if (--fdp->fd_refcnt > 0) { FILEDESC_UNLOCK(fdp); @@ -1625,6 +1743,8 @@ closef(fp, td) { struct vnode *vp; struct flock lf; + struct filedesc_to_leader *fdtol; + struct filedesc *fdp; if (fp == NULL) return (0); @@ -1636,15 +1756,51 @@ closef(fp, td) * If the descriptor was in a message, POSIX-style locks * aren't passed with the descriptor. */ - if (td != NULL && (td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0 && + if (td != NULL && fp->f_type == DTYPE_VNODE) { - lf.l_whence = SEEK_SET; - lf.l_start = 0; - lf.l_len = 0; - lf.l_type = F_UNLCK; - vp = fp->f_data; - (void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader, - F_UNLCK, &lf, F_POSIX); + if ((td->td_proc->p_leader->p_flag & P_ADVLOCK) != 0) { + lf.l_whence = SEEK_SET; + lf.l_start = 0; + lf.l_len = 0; + lf.l_type = F_UNLCK; + vp = fp->f_data; + (void) VOP_ADVLOCK(vp, (caddr_t)td->td_proc->p_leader, + F_UNLCK, &lf, F_POSIX); + } + fdtol = td->td_proc->p_fdtol; + if (fdtol != NULL) { + /* + * Handle special case where file descriptor table + * is shared between multiple process leaders. + */ + fdp = td->td_proc->p_fd; + FILEDESC_LOCK(fdp); + for (fdtol = fdtol->fdl_next; + fdtol != td->td_proc->p_fdtol; + fdtol = fdtol->fdl_next) { + if ((fdtol->fdl_leader->p_flag & + P_ADVLOCK) == 0) + continue; + fdtol->fdl_holdcount++; + FILEDESC_UNLOCK(fdp); + lf.l_whence = SEEK_SET; + lf.l_start = 0; + lf.l_len = 0; + lf.l_type = F_UNLCK; + vp = fp->f_data; + (void) VOP_ADVLOCK(vp, + (caddr_t)fdtol->fdl_leader, + F_UNLCK, &lf, F_POSIX); + FILEDESC_LOCK(fdp); + fdtol->fdl_holdcount--; + if (fdtol->fdl_holdcount == 0 && + fdtol->fdl_wakeup != 0) { + fdtol->fdl_wakeup = 0; + wakeup(fdtol); + } + } + FILEDESC_UNLOCK(fdp); + } } return (fdrop(fp, td)); } @@ -2079,6 +2235,36 @@ dupfdopen(td, fdp, indx, dfd, mode, error) /* NOTREACHED */ } + +struct filedesc_to_leader * +filedesc_to_leader_alloc(struct filedesc_to_leader *old, + struct filedesc *fdp, + struct proc *leader) +{ + struct filedesc_to_leader *fdtol; + + MALLOC(fdtol, struct filedesc_to_leader *, + sizeof(struct filedesc_to_leader), + M_FILEDESC_TO_LEADER, + M_WAITOK); + fdtol->fdl_refcount = 1; + fdtol->fdl_holdcount = 0; + fdtol->fdl_wakeup = 0; + fdtol->fdl_leader = leader; + if (old != NULL) { + FILEDESC_LOCK(fdp); + fdtol->fdl_next = old->fdl_next; + fdtol->fdl_prev = old; + old->fdl_next = fdtol; + fdtol->fdl_next->fdl_prev = fdtol; + FILEDESC_UNLOCK(fdp); + } else { + fdtol->fdl_next = fdtol; + fdtol->fdl_prev = fdtol; + } + return fdtol; +} + /* * Get file structures. */ diff --git a/sys/kern/kern_fork.c b/sys/kern/kern_fork.c index 0ba17ce..1de3dc8 100644 --- a/sys/kern/kern_fork.c +++ b/sys/kern/kern_fork.c @@ -139,13 +139,6 @@ rfork(td, uap) /* Don't allow kernel only flags. */ if ((uap->flags & RFKERNELONLY) != 0) return (EINVAL); - /* - * Don't allow sharing of file descriptor table unless - * RFTHREAD flag is supplied - */ - if ((uap->flags & (RFPROC | RFTHREAD | RFFDG | RFCFDG)) == - RFPROC) - return(EINVAL); error = fork1(td, uap->flags, 0, &p2); if (error == 0) { td->td_retval[0] = p2 ? p2->p_pid : 0; @@ -209,6 +202,7 @@ fork1(td, flags, pages, procp) int ok; static int pidchecked = 0; struct filedesc *fd; + struct filedesc_to_leader *fdtol; struct proc *p1 = td->td_proc; struct thread *td2; struct kse *ke2; @@ -419,15 +413,40 @@ again: /* * Copy filedesc. */ - if (flags & RFCFDG) + if (flags & RFCFDG) { fd = fdinit(td->td_proc->p_fd); - else if (flags & RFFDG) { + fdtol = NULL; + } else if (flags & RFFDG) { FILEDESC_LOCK(p1->p_fd); fd = fdcopy(td->td_proc->p_fd); FILEDESC_UNLOCK(p1->p_fd); - } else + fdtol = NULL; + } else { fd = fdshare(p1->p_fd); - + if (p1->p_fdtol == NULL) + p1->p_fdtol = + filedesc_to_leader_alloc(NULL, + NULL, + p1->p_leader); + if ((flags & RFTHREAD) != 0) { + /* + * Shared file descriptor table and + * shared process leaders. + */ + fdtol = p1->p_fdtol; + FILEDESC_LOCK(p1->p_fd); + fdtol->fdl_refcount++; + FILEDESC_UNLOCK(p1->p_fd); + } else { + /* + * Shared file descriptor table, and + * different process leaders + */ + fdtol = filedesc_to_leader_alloc(p1->p_fdtol, + p1->p_fd, + p2); + } + } /* * Make a proc table entry for the new process. * Start by zeroing the section of proc that is zero-initialized, @@ -506,6 +525,7 @@ again: if (p2->p_textvp) VREF(p2->p_textvp); p2->p_fd = fd; + p2->p_fdtol = fdtol; PROC_UNLOCK(p1); PROC_UNLOCK(p2); diff --git a/sys/sys/filedesc.h b/sys/sys/filedesc.h index 4cbb404..dc3ffea 100644 --- a/sys/sys/filedesc.h +++ b/sys/sys/filedesc.h @@ -75,6 +75,8 @@ struct filedesc { u_long fd_knhashmask; /* size of knhash */ struct klist *fd_knhash; /* hash table for attached knotes */ struct mtx fd_mtx; /* mtx to protect the members of struct filedesc */ + int fd_holdleaderscount; /* block fdfree() for shared close() */ + int fd_holdleaderswakeup; /* fdfree() needs wakeup */ }; /* @@ -91,6 +93,27 @@ struct filedesc0 { char fd_dfileflags[NDFILE]; }; + + +/* + * Structure to keep track of (process leader, struct fildedesc) tuples. + * Each process has a pointer to such a structure when detailed tracking + * is needed. e.g. when rfork(RFPROC | RFMEM) causes a file descriptor + * table to be shared by processes having different "p_leader" pointers + * and thus distinct POSIX style locks. + * + * fdl_refcount and fdl_holdcount are protected by struct filedesc mtx. + */ +struct filedesc_to_leader { + int fdl_refcount; /* references from struct proc */ + int fdl_holdcount; /* temporary hold during closef */ + int fdl_wakeup; /* fdfree() waits on closef() */ + struct proc *fdl_leader; /* owner of POSIX locks */ + /* Circular list */ + struct filedesc_to_leader *fdl_prev; + struct filedesc_to_leader *fdl_next; +}; + /* * Per-process open flags. */ @@ -131,6 +154,12 @@ static __inline struct file * fget_locked(struct filedesc *fdp, int fd); int getvnode(struct filedesc *fdp, int fd, struct file **fpp); void setugidsafety(struct thread *td); +struct filedesc_to_leader * +filedesc_to_leader_alloc(struct filedesc_to_leader *old, + struct filedesc *fdp, + struct proc *leader); + + static __inline struct file * fget_locked(struct filedesc *fdp, int fd) { diff --git a/sys/sys/proc.h b/sys/sys/proc.h index 3aac050..7c949c7 100644 --- a/sys/sys/proc.h +++ b/sys/sys/proc.h @@ -510,6 +510,7 @@ struct proc { TAILQ_HEAD(, thread) p_suspended; /* (td_runq) Suspended threads. */ struct ucred *p_ucred; /* (c) Process owner's identity. */ struct filedesc *p_fd; /* (b) Ptr to open files structure. */ + struct filedesc_to_leader *p_fdtol; /* (b) Ptr to tracking node */ /* Accumulated stats for all KSEs? */ struct pstats *p_stats; /* (b) Accounting/statistics (CPU). */ struct plimit *p_limit; /* (c*) Process limits. */ |