summaryrefslogtreecommitdiffstats
path: root/sys/kern/kern_descrip.c
diff options
context:
space:
mode:
authorjeff <jeff@FreeBSD.org>2009-05-14 03:24:22 +0000
committerjeff <jeff@FreeBSD.org>2009-05-14 03:24:22 +0000
commit20397e643153b90263768cb71928b488cab2c91e (patch)
tree03247850c0c9db357665199f280535e6edca7d99 /sys/kern/kern_descrip.c
parentdc1ac440de95721f8b696146577a51ef7a418f59 (diff)
downloadFreeBSD-src-20397e643153b90263768cb71928b488cab2c91e.zip
FreeBSD-src-20397e643153b90263768cb71928b488cab2c91e.tar.gz
- Implement a lockless file descriptor lookup algorithm in
fget_unlocked(). - Save old file descriptor tables created on expansion until the entire descriptor table is freed so that pointers may be followed without regard for expanders. - Mark the file zone as NOFREE so we may attempt to reference potentially freed files. - Convert several fget_locked() users to fget_unlocked(). This requires us to manage reference counts explicitly but reduces locking overhead in the common case.
Diffstat (limited to 'sys/kern/kern_descrip.c')
-rw-r--r--sys/kern/kern_descrip.c121
1 files changed, 88 insertions, 33 deletions
diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c
index 2f46307..f29b0eb 100644
--- a/sys/kern/kern_descrip.c
+++ b/sys/kern/kern_descrip.c
@@ -125,12 +125,24 @@ static void fdused(struct filedesc *fdp, int fd);
#define OFILESIZE (sizeof(struct file *) + sizeof(char))
/*
+ * Storage to hold unused ofiles that need to be reclaimed.
+ */
+struct freetable {
+ struct file **ft_table;
+ SLIST_ENTRY(freetable) ft_next;
+};
+
+/*
* Basic allocation of descriptors:
* one of the above, plus arrays for NDFILE descriptors.
*/
struct filedesc0 {
struct filedesc fd_fd;
/*
+ * ofiles which need to be reclaimed on free.
+ */
+ SLIST_HEAD(,freetable) fd_free;
+ /*
* These arrays are used when the number of open files is
* <= NDFILE, and are then pointed to by the pointers above.
*/
@@ -1268,7 +1280,10 @@ out:
static void
fdgrowtable(struct filedesc *fdp, int nfd)
{
+ struct filedesc0 *fdp0;
+ struct freetable *fo;
struct file **ntable;
+ struct file **otable;
char *nfileflags;
int nnfiles, onfiles;
NDSLOTTYPE *nmap;
@@ -1287,7 +1302,7 @@ fdgrowtable(struct filedesc *fdp, int nfd)
/* allocate a new table and (if required) new bitmaps */
FILEDESC_XUNLOCK(fdp);
- ntable = malloc(nnfiles * OFILESIZE,
+ ntable = malloc((nnfiles * OFILESIZE) + sizeof(struct freetable),
M_FILEDESC, M_ZERO | M_WAITOK);
nfileflags = (char *)&ntable[nnfiles];
if (NDSLOTS(nnfiles) > NDSLOTS(onfiles))
@@ -1311,10 +1326,20 @@ fdgrowtable(struct filedesc *fdp, int nfd)
}
bcopy(fdp->fd_ofiles, ntable, onfiles * sizeof(*ntable));
bcopy(fdp->fd_ofileflags, nfileflags, onfiles);
- if (onfiles > NDFILE)
- free(fdp->fd_ofiles, M_FILEDESC);
- fdp->fd_ofiles = ntable;
+ otable = fdp->fd_ofiles;
fdp->fd_ofileflags = nfileflags;
+ fdp->fd_ofiles = ntable;
+ /*
+ * We must preserve ofiles until the process exits because we can't
+ * be certain that no threads have references to the old table via
+ * _fget().
+ */
+ if (onfiles > NDFILE) {
+ fo = (struct freetable *)&otable[onfiles];
+ fdp0 = (struct filedesc0 *)fdp;
+ fo->ft_table = otable;
+ SLIST_INSERT_HEAD(&fdp0->fd_free, fo, ft_next);
+ }
if (NDSLOTS(nnfiles) > NDSLOTS(onfiles)) {
bcopy(fdp->fd_map, nmap, NDSLOTS(onfiles) * sizeof(*nmap));
if (NDSLOTS(onfiles) > NDSLOTS(NDFILE))
@@ -1512,6 +1537,8 @@ fdhold(struct proc *p)
static void
fddrop(struct filedesc *fdp)
{
+ struct filedesc0 *fdp0;
+ struct freetable *ft;
int i;
mtx_lock(&fdesc_mtx);
@@ -1521,6 +1548,11 @@ fddrop(struct filedesc *fdp)
return;
FILEDESC_LOCK_DESTROY(fdp);
+ fdp0 = (struct filedesc0 *)fdp;
+ while ((ft = SLIST_FIRST(&fdp0->fd_free)) != NULL) {
+ SLIST_REMOVE_HEAD(&fdp0->fd_free, ft_next);
+ free(ft->ft_table, M_FILEDESC);
+ }
free(fdp, M_FILEDESC);
}
@@ -2022,6 +2054,38 @@ finit(struct file *fp, u_int flag, short type, void *data, struct fileops *ops)
atomic_store_rel_ptr((volatile uintptr_t *)&fp->f_ops, (uintptr_t)ops);
}
+struct file *
+fget_unlocked(struct filedesc *fdp, int fd)
+{
+ struct file *fp;
+ u_int count;
+
+ if (fd < 0 || fd >= fdp->fd_nfiles)
+ return (NULL);
+ /*
+ * Fetch the descriptor locklessly. We avoid fdrop() races by
+ * never raising a refcount above 0. To accomplish this we have
+ * to use a cmpset loop rather than an atomic_add. The descriptor
+ * must be re-verified once we acquire a reference to be certain
+ * that the identity is still correct and we did not lose a race
+ * due to preemption.
+ */
+ for (;;) {
+ fp = fdp->fd_ofiles[fd];
+ if (fp == NULL)
+ break;
+ count = fp->f_count;
+ if (count == 0)
+ continue;
+ if (atomic_cmpset_int(&fp->f_count, count, count + 1) != 1)
+ continue;
+ if (fp == ((struct file *volatile*)fdp->fd_ofiles)[fd])
+ break;
+ fdrop(fp, curthread);
+ }
+
+ return (fp);
+}
/*
* Extract the file pointer associated with the specified descriptor for the
@@ -2030,16 +2094,12 @@ finit(struct file *fp, u_int flag, short type, void *data, struct fileops *ops)
* If the descriptor doesn't exist or doesn't match 'flags', EBADF is
* returned.
*
- * If 'hold' is set (non-zero) the file's refcount will be bumped on return.
- * It should be dropped with fdrop(). If it is not set, then the refcount
- * will not be bumped however the thread's filedesc struct will be returned
- * locked (for fgetsock).
- *
* If an error occured the non-zero error is returned and *fpp is set to
- * NULL. Otherwise *fpp is set and zero is returned.
+ * NULL. Otherwise *fpp is held and set and zero is returned. Caller is
+ * responsible for fdrop().
*/
static __inline int
-_fget(struct thread *td, int fd, struct file **fpp, int flags, int hold)
+_fget(struct thread *td, int fd, struct file **fpp, int flags)
{
struct filedesc *fdp;
struct file *fp;
@@ -2047,29 +2107,22 @@ _fget(struct thread *td, int fd, struct file **fpp, int flags, int hold)
*fpp = NULL;
if (td == NULL || (fdp = td->td_proc->p_fd) == NULL)
return (EBADF);
- FILEDESC_SLOCK(fdp);
- if ((fp = fget_locked(fdp, fd)) == NULL || fp->f_ops == &badfileops) {
- FILEDESC_SUNLOCK(fdp);
+ if ((fp = fget_unlocked(fdp, fd)) == NULL)
+ return (EBADF);
+ if (fp->f_ops == &badfileops) {
+ fdrop(fp, td);
return (EBADF);
}
-
/*
* FREAD and FWRITE failure return EBADF as per POSIX.
*
* Only one flag, or 0, may be specified.
*/
- if (flags == FREAD && (fp->f_flag & FREAD) == 0) {
- FILEDESC_SUNLOCK(fdp);
- return (EBADF);
- }
- if (flags == FWRITE && (fp->f_flag & FWRITE) == 0) {
- FILEDESC_SUNLOCK(fdp);
+ if ((flags == FREAD && (fp->f_flag & FREAD) == 0) ||
+ (flags == FWRITE && (fp->f_flag & FWRITE) == 0)) {
+ fdrop(fp, td);
return (EBADF);
}
- if (hold) {
- fhold(fp);
- FILEDESC_SUNLOCK(fdp);
- }
*fpp = fp;
return (0);
}
@@ -2078,21 +2131,21 @@ int
fget(struct thread *td, int fd, struct file **fpp)
{
- return(_fget(td, fd, fpp, 0, 1));
+ return(_fget(td, fd, fpp, 0));
}
int
fget_read(struct thread *td, int fd, struct file **fpp)
{
- return(_fget(td, fd, fpp, FREAD, 1));
+ return(_fget(td, fd, fpp, FREAD));
}
int
fget_write(struct thread *td, int fd, struct file **fpp)
{
- return(_fget(td, fd, fpp, FWRITE, 1));
+ return(_fget(td, fd, fpp, FWRITE));
}
/*
@@ -2109,7 +2162,7 @@ _fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags)
int error;
*vpp = NULL;
- if ((error = _fget(td, fd, &fp, flags, 0)) != 0)
+ if ((error = _fget(td, fd, &fp, flags)) != 0)
return (error);
if (fp->f_vnode == NULL) {
error = EINVAL;
@@ -2117,7 +2170,8 @@ _fgetvp(struct thread *td, int fd, struct vnode **vpp, int flags)
*vpp = fp->f_vnode;
vref(*vpp);
}
- FILEDESC_SUNLOCK(td->td_proc->p_fd);
+ fdrop(fp, td);
+
return (error);
}
@@ -2164,7 +2218,7 @@ fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp)
*spp = NULL;
if (fflagp != NULL)
*fflagp = 0;
- if ((error = _fget(td, fd, &fp, 0, 0)) != 0)
+ if ((error = _fget(td, fd, &fp, 0)) != 0)
return (error);
if (fp->f_type != DTYPE_SOCKET) {
error = ENOTSOCK;
@@ -2176,7 +2230,8 @@ fgetsock(struct thread *td, int fd, struct socket **spp, u_int *fflagp)
soref(*spp);
SOCK_UNLOCK(*spp);
}
- FILEDESC_SUNLOCK(td->td_proc->p_fd);
+ fdrop(fp, td);
+
return (error);
}
@@ -3147,7 +3202,7 @@ filelistinit(void *dummy)
{
file_zone = uma_zcreate("Files", sizeof(struct file), NULL, NULL,
- NULL, NULL, UMA_ALIGN_PTR, 0);
+ NULL, NULL, UMA_ALIGN_PTR, UMA_ZONE_NOFREE);
mtx_init(&sigio_lock, "sigio lock", NULL, MTX_DEF);
mtx_init(&fdesc_mtx, "fdesc", NULL, MTX_DEF);
}
OpenPOWER on IntegriCloud