summaryrefslogtreecommitdiffstats
path: root/sys/kern/uipc_usrreq.c
diff options
context:
space:
mode:
authorjeff <jeff@FreeBSD.org>2007-12-30 01:42:15 +0000
committerjeff <jeff@FreeBSD.org>2007-12-30 01:42:15 +0000
commitce1863880500c459eb1395c1d6f81819e02e6608 (patch)
tree0f2354bfc200294c2629e6ecfba76e364beda579 /sys/kern/uipc_usrreq.c
parentbedce823534f9510ef9c65764069f927d359aeb8 (diff)
downloadFreeBSD-src-ce1863880500c459eb1395c1d6f81819e02e6608.zip
FreeBSD-src-ce1863880500c459eb1395c1d6f81819e02e6608.tar.gz
Remove explicit locking of struct file.
- Introduce a finit() which is used to initailize the fields of struct file in such a way that the ops vector is only valid after the data, type, and flags are valid. - Protect f_flag and f_count with atomic operations. - Remove the global list of all files and associated accounting. - Rewrite the unp garbage collection such that it no longer requires the global list of all files and instead uses a list of all unp sockets. - Mark sockets in the accept queue so we don't incorrectly gc them. Tested by: kris, pho
Diffstat (limited to 'sys/kern/uipc_usrreq.c')
-rw-r--r--sys/kern/uipc_usrreq.c412
1 files changed, 175 insertions, 237 deletions
diff --git a/sys/kern/uipc_usrreq.c b/sys/kern/uipc_usrreq.c
index 9fea71b4..1d6cc46 100644
--- a/sys/kern/uipc_usrreq.c
+++ b/sys/kern/uipc_usrreq.c
@@ -233,10 +233,11 @@ static void unp_shutdown(struct unpcb *);
static void unp_drop(struct unpcb *, int);
static void unp_gc(__unused void *, int);
static void unp_scan(struct mbuf *, void (*)(struct file *));
-static void unp_mark(struct file *);
static void unp_discard(struct file *);
static void unp_freerights(struct file **, int);
static int unp_internalize(struct mbuf **, struct thread *);
+static void unp_internalize_fp(struct file *);
+static void unp_externalize_fp(struct file *);
static struct mbuf *unp_addsockcred(struct thread *, struct mbuf *);
/*
@@ -586,9 +587,9 @@ uipc_detach(struct socket *so)
unp_drop(ref, ECONNRESET);
UNP_PCB_UNLOCK(ref);
}
+ local_unp_rights = unp_rights;
UNP_GLOBAL_WUNLOCK();
unp->unp_socket->so_pcb = NULL;
- local_unp_rights = unp_rights;
saved_unp_addr = unp->unp_addr;
unp->unp_addr = NULL;
unp->unp_refcount--;
@@ -1600,10 +1601,7 @@ unp_externalize(struct mbuf *control, struct mbuf **controlp)
panic("unp_externalize fdalloc failed");
fp = *rp++;
td->td_proc->p_fd->fd_ofiles[f] = fp;
- FILE_LOCK(fp);
- fp->f_msgcount--;
- FILE_UNLOCK(fp);
- unp_rights--;
+ unp_externalize_fp(fp);
*fdp++ = f;
}
FILEDESC_XUNLOCK(td->td_proc->p_fd);
@@ -1765,11 +1763,8 @@ unp_internalize(struct mbuf **controlp, struct thread *td)
for (i = 0; i < oldfds; i++) {
fp = fdescp->fd_ofiles[*fdp++];
*rp++ = fp;
- FILE_LOCK(fp);
- fp->f_count++;
- fp->f_msgcount++;
- FILE_UNLOCK(fp);
- unp_rights++;
+ fhold(fp);
+ unp_internalize_fp(fp);
}
FILEDESC_SUNLOCK(fdescp);
break;
@@ -1860,230 +1855,198 @@ unp_addsockcred(struct thread *td, struct mbuf *control)
return (m);
}
+static struct unpcb *
+fptounp(struct file *fp)
+{
+ struct socket *so;
+
+ if (fp->f_type != DTYPE_SOCKET)
+ return (NULL);
+ if ((so = fp->f_data) == NULL)
+ return (NULL);
+ if (so->so_proto->pr_domain != &localdomain)
+ return (NULL);
+ return sotounpcb(so);
+}
+
+static void
+unp_discard(struct file *fp)
+{
+
+ unp_externalize_fp(fp);
+ (void) closef(fp, (struct thread *)NULL);
+}
+
+static void
+unp_internalize_fp(struct file *fp)
+{
+ struct unpcb *unp;
+
+ UNP_GLOBAL_WLOCK();
+ if ((unp = fptounp(fp)) != NULL) {
+ unp->unp_file = fp;
+ unp->unp_msgcount++;
+ }
+ unp_rights++;
+ UNP_GLOBAL_WUNLOCK();
+}
+
+static void
+unp_externalize_fp(struct file *fp)
+{
+ struct unpcb *unp;
+
+ UNP_GLOBAL_WLOCK();
+ if ((unp = fptounp(fp)) != NULL)
+ unp->unp_msgcount--;
+ unp_rights--;
+ UNP_GLOBAL_WUNLOCK();
+}
+
/*
* unp_defer indicates whether additional work has been defered for a future
* pass through unp_gc(). It is thread local and does not require explicit
* synchronization.
*/
-static int unp_defer;
+static int unp_marked;
+static int unp_unreachable;
-static int unp_taskcount;
-SYSCTL_INT(_net_local, OID_AUTO, taskcount, CTLFLAG_RD, &unp_taskcount, 0, "");
+static void
+unp_accessable(struct file *fp)
+{
+ struct unpcb *unp;
+
+ unp = fptounp(fp);
+ if (fp == NULL)
+ return;
+ if (unp->unp_gcflag & UNPGC_REF)
+ return;
+ unp->unp_gcflag &= ~UNPGC_DEAD;
+ unp->unp_gcflag |= UNPGC_REF;
+ unp_marked++;
+}
+
+static void
+unp_gc_process(struct unpcb *unp)
+{
+ struct socket *soa;
+ struct socket *so;
+ struct file *fp;
+
+ /* Already processed. */
+ if (unp->unp_gcflag & UNPGC_SCANNED)
+ return;
+ fp = unp->unp_file;
+ /*
+ * Check for a socket potentially in a cycle. It must be in a
+ * queue as indicated by msgcount, and this must equal the file
+ * reference count. Note that when msgcount is 0 the file is NULL.
+ */
+ if (unp->unp_msgcount != 0 && fp->f_count != 0 &&
+ fp->f_count == unp->unp_msgcount) {
+ unp->unp_gcflag |= UNPGC_DEAD;
+ unp_unreachable++;
+ return;
+ }
+ /*
+ * Mark all sockets we reference with RIGHTS.
+ */
+ so = unp->unp_socket;
+ SOCKBUF_LOCK(&so->so_rcv);
+ unp_scan(so->so_rcv.sb_mb, unp_accessable);
+ SOCKBUF_UNLOCK(&so->so_rcv);
+ /*
+ * Mark all sockets in our accept queue.
+ */
+ ACCEPT_LOCK();
+ TAILQ_FOREACH(soa, &so->so_comp, so_list) {
+ SOCKBUF_LOCK(&soa->so_rcv);
+ unp_scan(soa->so_rcv.sb_mb, unp_accessable);
+ SOCKBUF_UNLOCK(&soa->so_rcv);
+ }
+ ACCEPT_UNLOCK();
+ unp->unp_gcflag |= UNPGC_SCANNED;
+}
static int unp_recycled;
SYSCTL_INT(_net_local, OID_AUTO, recycled, CTLFLAG_RD, &unp_recycled, 0, "");
+static int unp_taskcount;
+SYSCTL_INT(_net_local, OID_AUTO, taskcount, CTLFLAG_RD, &unp_taskcount, 0, "");
+
static void
unp_gc(__unused void *arg, int pending)
{
- struct file *fp, *nextfp;
- struct socket *so;
- struct file **extra_ref, **fpp;
- int nunref, i;
- int nfiles_snap;
- int nfiles_slack = 20;
+ struct unp_head *heads[] = { &unp_dhead, &unp_shead, NULL };
+ struct unp_head **head;
+ struct file **unref;
+ struct unpcb *unp;
+ int i;
unp_taskcount++;
- unp_defer = 0;
+ UNP_GLOBAL_RLOCK();
+ /*
+ * First clear all gc flags from previous runs.
+ */
+ for (head = heads; *head != NULL; head++)
+ LIST_FOREACH(unp, *head, unp_link)
+ unp->unp_gcflag &= ~(UNPGC_REF|UNPGC_DEAD);
/*
- * Before going through all this, set all FDs to be NOT deferred and
- * NOT externally accessible.
+ * Scan marking all reachable sockets with UNPGC_REF. Once a socket
+ * is reachable all of the sockets it references are reachable.
+ * Stop the scan once we do a complete loop without discovering
+ * a new reachable socket.
*/
- sx_slock(&filelist_lock);
- LIST_FOREACH(fp, &filehead, f_list)
- fp->f_gcflag &= ~(FMARK|FDEFER);
do {
- KASSERT(unp_defer >= 0, ("unp_gc: unp_defer %d", unp_defer));
- LIST_FOREACH(fp, &filehead, f_list) {
- FILE_LOCK(fp);
- /*
- * If the file is not open, skip it -- could be a
- * file in the process of being opened, or in the
- * process of being closed. If the file is
- * "closing", it may have been marked for deferred
- * consideration. Clear the flag now if so.
- */
- if (fp->f_count == 0) {
- if (fp->f_gcflag & FDEFER)
- unp_defer--;
- fp->f_gcflag &= ~(FMARK|FDEFER);
- FILE_UNLOCK(fp);
- continue;
- }
- /*
- * If we already marked it as 'defer' in a
- * previous pass, then try to process it this
- * time and un-mark it.
- */
- if (fp->f_gcflag & FDEFER) {
- fp->f_gcflag &= ~FDEFER;
- unp_defer--;
- } else {
- /*
- * If it's not deferred, then check if it's
- * already marked.. if so skip it
- */
- if (fp->f_gcflag & FMARK) {
- FILE_UNLOCK(fp);
- continue;
- }
- /*
- * If all references are from messages in
- * transit, then skip it. it's not externally
- * accessible.
- */
- if (fp->f_count == fp->f_msgcount) {
- FILE_UNLOCK(fp);
- continue;
- }
- /*
- * If it got this far then it must be
- * externally accessible.
- */
- fp->f_gcflag |= FMARK;
- }
- /*
- * Either it was deferred, or it is externally
- * accessible and not already marked so. Now check
- * if it is possibly one of OUR sockets.
- */
- if (fp->f_type != DTYPE_SOCKET ||
- (so = fp->f_data) == NULL) {
- FILE_UNLOCK(fp);
- continue;
- }
- if (so->so_proto->pr_domain != &localdomain ||
- (so->so_proto->pr_flags & PR_RIGHTS) == 0) {
- FILE_UNLOCK(fp);
- continue;
+ unp_unreachable = 0;
+ unp_marked = 0;
+ for (head = heads; *head != NULL; head++)
+ LIST_FOREACH(unp, *head, unp_link)
+ unp_gc_process(unp);
+ } while (unp_marked);
+ UNP_GLOBAL_RUNLOCK();
+ if (unp_unreachable == 0)
+ return;
+ /*
+ * Allocate space for a local list of dead unpcbs.
+ */
+ unref = malloc(unp_unreachable * sizeof(struct file *),
+ M_TEMP, M_WAITOK);
+ /*
+ * Iterate looking for sockets which have been specifically marked
+ * as as unreachable and store them locally.
+ */
+ UNP_GLOBAL_RLOCK();
+ for (i = 0, head = heads; *head != NULL; head++)
+ LIST_FOREACH(unp, *head, unp_link)
+ if (unp->unp_gcflag & UNPGC_DEAD) {
+ unref[i++] = unp->unp_file;
+ KASSERT(unp->unp_file != NULL,
+ ("unp_gc: Invalid unpcb."));
+ KASSERT(i <= unp_unreachable,
+ ("unp_gc: incorrect unreachable count."));
}
-
- /*
- * Tell any other threads that do a subsequent
- * fdrop() that we are scanning the message
- * buffers.
- */
- fp->f_gcflag |= FWAIT;
- FILE_UNLOCK(fp);
-
- /*
- * So, Ok, it's one of our sockets and it IS
- * externally accessible (or was deferred). Now we
- * look to see if we hold any file descriptors in its
- * message buffers. Follow those links and mark them
- * as accessible too.
- */
- SOCKBUF_LOCK(&so->so_rcv);
- unp_scan(so->so_rcv.sb_mb, unp_mark);
- SOCKBUF_UNLOCK(&so->so_rcv);
-
- /*
- * Wake up any threads waiting in fdrop().
- */
- FILE_LOCK(fp);
- fp->f_gcflag &= ~FWAIT;
- wakeup(&fp->f_gcflag);
- FILE_UNLOCK(fp);
- }
- } while (unp_defer);
- sx_sunlock(&filelist_lock);
+ UNP_GLOBAL_RUNLOCK();
/*
- * XXXRW: The following comments need updating for a post-SMPng and
- * deferred unp_gc() world, but are still generally accurate.
- *
- * We grab an extra reference to each of the file table entries that
- * are not otherwise accessible and then free the rights that are
- * stored in messages on them.
- *
- * The bug in the orginal code is a little tricky, so I'll describe
- * what's wrong with it here.
- *
- * It is incorrect to simply unp_discard each entry for f_msgcount
- * times -- consider the case of sockets A and B that contain
- * references to each other. On a last close of some other socket,
- * we trigger a gc since the number of outstanding rights (unp_rights)
- * is non-zero. If during the sweep phase the gc code unp_discards,
- * we end up doing a (full) closef on the descriptor. A closef on A
- * results in the following chain. Closef calls soo_close, which
- * calls soclose. Soclose calls first (through the switch
- * uipc_usrreq) unp_detach, which re-invokes unp_gc. Unp_gc simply
- * returns because the previous instance had set unp_gcing, and we
- * return all the way back to soclose, which marks the socket with
- * SS_NOFDREF, and then calls sofree. Sofree calls sorflush to free
- * up the rights that are queued in messages on the socket A, i.e.,
- * the reference on B. The sorflush calls via the dom_dispose switch
- * unp_dispose, which unp_scans with unp_discard. This second
- * instance of unp_discard just calls closef on B.
- *
- * Well, a similar chain occurs on B, resulting in a sorflush on B,
- * which results in another closef on A. Unfortunately, A is already
- * being closed, and the descriptor has already been marked with
- * SS_NOFDREF, and soclose panics at this point.
- *
- * Here, we first take an extra reference to each inaccessible
- * descriptor. Then, we call sorflush ourself, since we know it is a
- * Unix domain socket anyhow. After we destroy all the rights
- * carried in messages, we do a last closef to get rid of our extra
- * reference. This is the last close, and the unp_detach etc will
- * shut down the socket.
- *
- * 91/09/19, bsy@cs.cmu.edu
+ * All further operation is now done on a local list. We first ref
+ * all sockets to avoid closing them until all are flushed.
*/
-again:
- nfiles_snap = openfiles + nfiles_slack; /* some slack */
- extra_ref = malloc(nfiles_snap * sizeof(struct file *), M_TEMP,
- M_WAITOK);
- sx_slock(&filelist_lock);
- if (nfiles_snap < openfiles) {
- sx_sunlock(&filelist_lock);
- free(extra_ref, M_TEMP);
- nfiles_slack += 20;
- goto again;
- }
- for (nunref = 0, fp = LIST_FIRST(&filehead), fpp = extra_ref;
- fp != NULL; fp = nextfp) {
- nextfp = LIST_NEXT(fp, f_list);
- FILE_LOCK(fp);
- /*
- * If it's not open, skip it
- */
- if (fp->f_count == 0) {
- FILE_UNLOCK(fp);
- continue;
- }
- /*
- * If all refs are from msgs, and it's not marked accessible
- * then it must be referenced from some unreachable cycle of
- * (shut-down) FDs, so include it in our list of FDs to
- * remove.
- */
- if (fp->f_count == fp->f_msgcount && !(fp->f_gcflag & FMARK)) {
- *fpp++ = fp;
- nunref++;
- fp->f_count++;
- }
- FILE_UNLOCK(fp);
- }
- sx_sunlock(&filelist_lock);
+ for (i = 0; i < unp_unreachable; i++)
+ fhold(unref[i]);
/*
- * For each FD on our hit list, do the following two things:
+ * Now flush all sockets, free'ing rights. This will free the
+ * struct files associated with these sockets but leave each socket
+ * with one remaining ref.
*/
- for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
- struct file *tfp = *fpp;
- FILE_LOCK(tfp);
- if (tfp->f_type == DTYPE_SOCKET &&
- tfp->f_data != NULL) {
- FILE_UNLOCK(tfp);
- sorflush(tfp->f_data);
- } else {
- FILE_UNLOCK(tfp);
- }
- }
- for (i = nunref, fpp = extra_ref; --i >= 0; ++fpp) {
- closef(*fpp, (struct thread *) NULL);
- unp_recycled++;
- }
- free(extra_ref, M_TEMP);
+ for (i = 0; i < unp_unreachable; i++)
+ sorflush(unref[i]->f_data);
+ /*
+ * And finally release the sockets so they can be reclaimed.
+ */
+ for (i = 0; i < unp_unreachable; i++)
+ fdrop(unref[i], NULL);
+ unp_recycled += unp_unreachable;
+ free(unref, M_TEMP);
}
void
@@ -2143,31 +2106,6 @@ unp_scan(struct mbuf *m0, void (*op)(struct file *))
}
}
-static void
-unp_mark(struct file *fp)
-{
-
- /* XXXRW: Should probably assert file list lock here. */
-
- if (fp->f_gcflag & FMARK)
- return;
- unp_defer++;
- fp->f_gcflag |= (FMARK|FDEFER);
-}
-
-static void
-unp_discard(struct file *fp)
-{
-
- UNP_GLOBAL_WLOCK();
- FILE_LOCK(fp);
- fp->f_msgcount--;
- unp_rights--;
- FILE_UNLOCK(fp);
- UNP_GLOBAL_WUNLOCK();
- (void) closef(fp, (struct thread *)NULL);
-}
-
#ifdef DDB
static void
db_print_indent(int indent)
OpenPOWER on IntegriCloud