summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/kern/kern_event.c9
-rw-r--r--sys/kern/kern_thread.c2
-rw-r--r--sys/kern/sys_generic.c582
-rw-r--r--sys/kern/sys_pipe.c9
-rw-r--r--sys/kern/uipc_sockbuf.c3
-rw-r--r--sys/netncp/ncp_rq.c12
-rw-r--r--sys/netncp/ncp_sock.c105
-rw-r--r--sys/netncp/ncp_sock.h3
-rw-r--r--sys/netsmb/smb_trantcp.c80
-rw-r--r--sys/sys/proc.h5
-rw-r--r--sys/sys/selinfo.h16
-rw-r--r--sys/sys/socketvar.h2
-rw-r--r--sys/sys/systm.h4
13 files changed, 452 insertions, 380 deletions
diff --git a/sys/kern/kern_event.c b/sys/kern/kern_event.c
index aa446b6..4d75822 100644
--- a/sys/kern/kern_event.c
+++ b/sys/kern/kern_event.c
@@ -1400,7 +1400,8 @@ kqueue_poll(struct file *fp, int events, struct ucred *active_cred,
revents |= events & (POLLIN | POLLRDNORM);
} else {
selrecord(td, &kq->kq_sel);
- kq->kq_state |= KQ_SEL;
+ if (SEL_WAITING(&kq->kq_sel))
+ kq->kq_state |= KQ_SEL;
}
}
kqueue_release(kq, 1);
@@ -1486,8 +1487,9 @@ kqueue_close(struct file *fp, struct thread *td)
}
if ((kq->kq_state & KQ_SEL) == KQ_SEL) {
- kq->kq_state &= ~KQ_SEL;
selwakeuppri(&kq->kq_sel, PSOCK);
+ if (!SEL_WAITING(&kq->kq_sel))
+ kq->kq_state &= ~KQ_SEL;
}
KQ_UNLOCK(kq);
@@ -1522,8 +1524,9 @@ kqueue_wakeup(struct kqueue *kq)
wakeup(kq);
}
if ((kq->kq_state & KQ_SEL) == KQ_SEL) {
- kq->kq_state &= ~KQ_SEL;
selwakeuppri(&kq->kq_sel, PSOCK);
+ if (!SEL_WAITING(&kq->kq_sel))
+ kq->kq_state &= ~KQ_SEL;
}
if (!knlist_empty(&kq->kq_sel.si_note))
kqueue_schedtask(kq);
diff --git a/sys/kern/kern_thread.c b/sys/kern/kern_thread.c
index 93ff5a7..3338005 100644
--- a/sys/kern/kern_thread.c
+++ b/sys/kern/kern_thread.c
@@ -40,6 +40,7 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#include <sys/sched.h>
#include <sys/sleepqueue.h>
+#include <sys/selinfo.h>
#include <sys/turnstile.h>
#include <sys/ktr.h>
#include <sys/umtx.h>
@@ -206,6 +207,7 @@ thread_fini(void *mem, int size)
turnstile_free(td->td_turnstile);
sleepq_free(td->td_sleepqueue);
umtx_thread_fini(td);
+ seltdfini(td);
}
/*
diff --git a/sys/kern/sys_generic.c b/sys/kern/sys_generic.c
index 19e0b4d..0800c08 100644
--- a/sys/kern/sys_generic.c
+++ b/sys/kern/sys_generic.c
@@ -69,17 +69,59 @@ __FBSDID("$FreeBSD$");
#include <sys/ktrace.h>
#endif
+#include <sys/ktr.h>
+
static MALLOC_DEFINE(M_IOCTLOPS, "ioctlops", "ioctl data buffer");
static MALLOC_DEFINE(M_SELECT, "select", "select() buffer");
MALLOC_DEFINE(M_IOV, "iov", "large iov's");
static int pollscan(struct thread *, struct pollfd *, u_int);
+static int pollrescan(struct thread *);
static int selscan(struct thread *, fd_mask **, fd_mask **, int);
+static int selrescan(struct thread *, fd_mask **, fd_mask **);
+static void selfdalloc(struct thread *, void *);
+static void selfdfree(struct seltd *, struct selfd *);
static int dofileread(struct thread *, int, struct file *, struct uio *,
off_t, int);
static int dofilewrite(struct thread *, int, struct file *, struct uio *,
off_t, int);
static void doselwakeup(struct selinfo *, int);
+static void seltdinit(struct thread *);
+static int seltdwait(struct thread *, int);
+static void seltdclear(struct thread *);
+
+/*
+ * One seltd per-thread allocated on demand as needed.
+ *
+ * t - protected by st_mtx
+ * k - Only accessed by curthread or read-only
+ */
+struct seltd {
+ STAILQ_HEAD(, selfd) st_selq; /* (k) List of selfds. */
+ struct selfd *st_free1; /* (k) free fd for read set. */
+ struct selfd *st_free2; /* (k) free fd for write set. */
+ struct mtx st_mtx; /* Protects struct seltd */
+ struct cv st_wait; /* (t) Wait channel. */
+ int st_flags; /* (t) SELTD_ flags. */
+};
+
+#define SELTD_PENDING 0x0001 /* We have pending events. */
+#define SELTD_RESCAN 0x0002 /* Doing a rescan. */
+
+/*
+ * One selfd allocated per-thread per-file-descriptor.
+ * f - protected by sf_mtx
+ */
+struct selfd {
+ STAILQ_ENTRY(selfd) sf_link; /* (k) fds owned by this td. */
+ TAILQ_ENTRY(selfd) sf_threads; /* (f) fds on this selinfo. */
+ struct selinfo *sf_si; /* (f) selinfo when linked. */
+ struct mtx *sf_mtx; /* Pointer to selinfo mtx. */
+ struct seltd *sf_td; /* (k) owning seltd. */
+ void *sf_cookie; /* (k) fd or pollfd. */
+};
+
+static uma_zone_t selfd_zone;
#ifndef _SYS_SYSPROTO_H_
struct read_args {
@@ -629,14 +671,6 @@ out:
return (error);
}
-/*
- * sellock and selwait are initialized in selectinit() via SYSINIT.
- */
-struct mtx sellock;
-struct cv selwait;
-u_int nselcoll; /* Select collisions since boot */
-SYSCTL_UINT(_kern, OID_AUTO, nselcoll, CTLFLAG_RD, &nselcoll, 0, "");
-
#ifndef _SYS_SYSPROTO_H_
struct select_args {
int nd;
@@ -678,7 +712,7 @@ kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou,
fd_mask *ibits[3], *obits[3], *selbits, *sbp;
struct timeval atv, rtv, ttv;
int error, timo;
- u_int ncoll, nbufbytes, ncpbytes, nfdbits;
+ u_int nbufbytes, ncpbytes, nfdbits;
if (nd < 0)
return (EINVAL);
@@ -723,7 +757,7 @@ kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou,
sbp += ncpbytes / sizeof *sbp; \
error = copyin(name, ibits[x], ncpbytes); \
if (error != 0) \
- goto done_nosellock; \
+ goto done; \
} \
} while (0)
getbits(fd_in, 0);
@@ -737,7 +771,7 @@ kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou,
atv = *tvp;
if (itimerfix(&atv)) {
error = EINVAL;
- goto done_nosellock;
+ goto done;
}
getmicrouptime(&rtv);
timevaladd(&atv, &rtv);
@@ -746,58 +780,31 @@ kern_select(struct thread *td, int nd, fd_set *fd_in, fd_set *fd_ou,
atv.tv_usec = 0;
}
timo = 0;
- TAILQ_INIT(&td->td_selq);
- mtx_lock(&sellock);
-retry:
- ncoll = nselcoll;
- thread_lock(td);
- td->td_flags |= TDF_SELECT;
- thread_unlock(td);
- mtx_unlock(&sellock);
-
- error = selscan(td, ibits, obits, nd);
- mtx_lock(&sellock);
- if (error || td->td_retval[0])
- goto done;
- if (atv.tv_sec || atv.tv_usec) {
- getmicrouptime(&rtv);
- if (timevalcmp(&rtv, &atv, >=))
- goto done;
- ttv = atv;
- timevalsub(&ttv, &rtv);
- timo = ttv.tv_sec > 24 * 60 * 60 ?
- 24 * 60 * 60 * hz : tvtohz(&ttv);
- }
-
- /*
- * An event of interest may occur while we do not hold
- * sellock, so check TDF_SELECT and the number of
- * collisions and rescan the file descriptors if
- * necessary.
- */
- thread_lock(td);
- if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) {
- thread_unlock(td);
- goto retry;
+ seltdinit(td);
+ /* Iterate until the timeout expires or descriptors become ready. */
+ for (;;) {
+ error = selscan(td, ibits, obits, nd);
+ if (error || td->td_retval[0] != 0)
+ break;
+ if (atv.tv_sec || atv.tv_usec) {
+ getmicrouptime(&rtv);
+ if (timevalcmp(&rtv, &atv, >=))
+ break;
+ ttv = atv;
+ timevalsub(&ttv, &rtv);
+ timo = ttv.tv_sec > 24 * 60 * 60 ?
+ 24 * 60 * 60 * hz : tvtohz(&ttv);
+ }
+ error = seltdwait(td, timo);
+ if (error)
+ break;
+ error = selrescan(td, ibits, obits);
+ if (error || td->td_retval[0] != 0)
+ break;
}
- thread_unlock(td);
-
- if (timo > 0)
- error = cv_timedwait_sig(&selwait, &sellock, timo);
- else
- error = cv_wait_sig(&selwait, &sellock);
-
- if (error == 0)
- goto retry;
+ seltdclear(td);
done:
- clear_selinfo_list(td);
- thread_lock(td);
- td->td_flags &= ~TDF_SELECT;
- thread_unlock(td);
- mtx_unlock(&sellock);
-
-done_nosellock:
/* select is not restarted after signals... */
if (error == ERESTART)
error = EINTR;
@@ -820,6 +827,60 @@ done_nosellock:
return (error);
}
+/*
+ * Traverse the list of fds attached to this thread's seltd and check for
+ * completion.
+ */
+static int
+selrescan(struct thread *td, fd_mask **ibits, fd_mask **obits)
+{
+ struct seltd *stp;
+ struct selfd *sfp;
+ struct selfd *sfn;
+ struct selinfo *si;
+ struct file *fp;
+ int msk, fd;
+ int n = 0;
+ /* Note: backend also returns POLLHUP/POLLERR if appropriate. */
+ static int flag[3] = { POLLRDNORM, POLLWRNORM, POLLRDBAND };
+ struct filedesc *fdp = td->td_proc->p_fd;
+
+ stp = td->td_sel;
+ FILEDESC_SLOCK(fdp);
+ STAILQ_FOREACH_SAFE(sfp, &stp->st_selq, sf_link, sfn) {
+ fd = (int)(uintptr_t)sfp->sf_cookie;
+ si = sfp->sf_si;
+ selfdfree(stp, sfp);
+ /* If the selinfo wasn't cleared the event didn't fire. */
+ if (si != NULL)
+ continue;
+ if ((fp = fget_locked(fdp, fd)) == NULL) {
+ FILEDESC_SUNLOCK(fdp);
+ return (EBADF);
+ }
+ for (msk = 0; msk < 3; msk++) {
+ if (ibits[msk] == NULL)
+ continue;
+ if ((ibits[msk][fd/NFDBITS] &
+ ((fd_mask) 1 << (fd % NFDBITS))) == 0)
+ continue;
+ if (fo_poll(fp, flag[msk], td->td_ucred, td)) {
+ obits[msk][(fd)/NFDBITS] |=
+ ((fd_mask)1 << ((fd) % NFDBITS));
+ n++;
+ }
+ }
+ }
+ FILEDESC_SUNLOCK(fdp);
+ stp->st_flags = 0;
+ td->td_retval[0] = n;
+ return (0);
+}
+
+/*
+ * Perform the initial filedescriptor scan and register ourselves with
+ * each selinfo.
+ */
static int
selscan(td, ibits, obits, nfd)
struct thread *td;
@@ -848,6 +909,7 @@ selscan(td, ibits, obits, nfd)
FILEDESC_SUNLOCK(fdp);
return (EBADF);
}
+ selfdalloc(td, (void *)(uintptr_t)fd);
if (fo_poll(fp, flag[msk], td->td_ucred,
td)) {
obits[msk][(fd)/NFDBITS] |=
@@ -878,7 +940,7 @@ poll(td, uap)
struct pollfd smallbits[32];
struct timeval atv, rtv, ttv;
int error = 0, timo;
- u_int ncoll, nfds;
+ u_int nfds;
size_t ni;
nfds = uap->nfds;
@@ -894,8 +956,7 @@ poll(td, uap)
if ((nfds > lim_cur(td->td_proc, RLIMIT_NOFILE)) &&
(nfds > FD_SETSIZE)) {
PROC_UNLOCK(td->td_proc);
- error = EINVAL;
- goto done2;
+ return (EINVAL);
}
PROC_UNLOCK(td->td_proc);
ni = nfds * sizeof(struct pollfd);
@@ -905,13 +966,13 @@ poll(td, uap)
bits = smallbits;
error = copyin(uap->fds, bits, ni);
if (error)
- goto done_nosellock;
+ goto done;
if (uap->timeout != INFTIM) {
atv.tv_sec = uap->timeout / 1000;
atv.tv_usec = (uap->timeout % 1000) * 1000;
if (itimerfix(&atv)) {
error = EINVAL;
- goto done_nosellock;
+ goto done;
}
getmicrouptime(&rtv);
timevaladd(&atv, &rtv);
@@ -920,56 +981,31 @@ poll(td, uap)
atv.tv_usec = 0;
}
timo = 0;
- TAILQ_INIT(&td->td_selq);
- mtx_lock(&sellock);
-retry:
- ncoll = nselcoll;
- thread_lock(td);
- td->td_flags |= TDF_SELECT;
- thread_unlock(td);
- mtx_unlock(&sellock);
-
- error = pollscan(td, bits, nfds);
- mtx_lock(&sellock);
- if (error || td->td_retval[0])
- goto done;
- if (atv.tv_sec || atv.tv_usec) {
- getmicrouptime(&rtv);
- if (timevalcmp(&rtv, &atv, >=))
- goto done;
- ttv = atv;
- timevalsub(&ttv, &rtv);
- timo = ttv.tv_sec > 24 * 60 * 60 ?
- 24 * 60 * 60 * hz : tvtohz(&ttv);
- }
- /*
- * An event of interest may occur while we do not hold
- * sellock, so check TDF_SELECT and the number of collisions
- * and rescan the file descriptors if necessary.
- */
- thread_lock(td);
- if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) {
- thread_unlock(td);
- goto retry;
+ seltdinit(td);
+ /* Iterate until the timeout expires or descriptors become ready. */
+ for (;;) {
+ error = pollscan(td, bits, nfds);
+ if (error || td->td_retval[0] != 0)
+ break;
+ if (atv.tv_sec || atv.tv_usec) {
+ getmicrouptime(&rtv);
+ if (timevalcmp(&rtv, &atv, >=))
+ break;
+ ttv = atv;
+ timevalsub(&ttv, &rtv);
+ timo = ttv.tv_sec > 24 * 60 * 60 ?
+ 24 * 60 * 60 * hz : tvtohz(&ttv);
+ }
+ error = seltdwait(td, timo);
+ if (error)
+ break;
+ error = pollrescan(td);
+ if (error || td->td_retval[0] != 0)
+ break;
}
- thread_unlock(td);
-
- if (timo > 0)
- error = cv_timedwait_sig(&selwait, &sellock, timo);
- else
- error = cv_wait_sig(&selwait, &sellock);
-
- if (error == 0)
- goto retry;
+ seltdclear(td);
done:
- clear_selinfo_list(td);
- thread_lock(td);
- td->td_flags &= ~TDF_SELECT;
- thread_unlock(td);
- mtx_unlock(&sellock);
-
-done_nosellock:
/* poll is not restarted after signals... */
if (error == ERESTART)
error = EINTR;
@@ -983,17 +1019,60 @@ done_nosellock:
out:
if (ni > sizeof(smallbits))
free(bits, M_TEMP);
-done2:
return (error);
}
static int
+pollrescan(struct thread *td)
+{
+ struct seltd *stp;
+ struct selfd *sfp;
+ struct selfd *sfn;
+ struct selinfo *si;
+ struct filedesc *fdp;
+ struct file *fp;
+ struct pollfd *fd;
+ int n;
+
+ n = 0;
+ fdp = td->td_proc->p_fd;
+ stp = td->td_sel;
+ FILEDESC_SLOCK(fdp);
+ STAILQ_FOREACH_SAFE(sfp, &stp->st_selq, sf_link, sfn) {
+ fd = (struct pollfd *)sfp->sf_cookie;
+ si = sfp->sf_si;
+ selfdfree(stp, sfp);
+ /* If the selinfo wasn't cleared the event didn't fire. */
+ if (si != NULL)
+ continue;
+ fp = fdp->fd_ofiles[fd->fd];
+ if (fp == NULL) {
+ fd->revents = POLLNVAL;
+ n++;
+ continue;
+ }
+ /*
+ * Note: backend also returns POLLHUP and
+ * POLLERR if appropriate.
+ */
+ fd->revents = fo_poll(fp, fd->events, td->td_ucred, td);
+ if (fd->revents != 0)
+ n++;
+ }
+ FILEDESC_SUNLOCK(fdp);
+ stp->st_flags = 0;
+ td->td_retval[0] = n;
+ return (0);
+}
+
+
+static int
pollscan(td, fds, nfd)
struct thread *td;
struct pollfd *fds;
u_int nfd;
{
- register struct filedesc *fdp = td->td_proc->p_fd;
+ struct filedesc *fdp = td->td_proc->p_fd;
int i;
struct file *fp;
int n = 0;
@@ -1015,6 +1094,7 @@ pollscan(td, fds, nfd)
* Note: backend also returns POLLHUP and
* POLLERR if appropriate.
*/
+ selfdalloc(td, fds);
fds->revents = fo_poll(fp, fds->events,
td->td_ucred, td);
if (fds->revents != 0)
@@ -1048,23 +1128,90 @@ openbsd_poll(td, uap)
}
/*
- * Remove the references to the thread from all of the objects we were
- * polling.
- *
- * This code assumes that the underlying owner of the selinfo structure will
- * hold sellock before it changes it, and that it will unlink itself from our
- * list if it goes away.
+ * XXX This was created specifically to support netncp and netsmb. This
+ * allows the caller to specify a socket to wait for events on. It returns
+ * 0 if any events matched and an error otherwise. There is no way to
+ * determine which events fired.
*/
-void
-clear_selinfo_list(td)
- struct thread *td;
+int
+selsocket(struct socket *so, int events, struct timeval *tvp, struct thread *td)
{
- struct selinfo *si;
+ struct timeval atv, rtv, ttv;
+ int error, timo;
+
+ if (tvp != NULL) {
+ atv = *tvp;
+ if (itimerfix(&atv))
+ return (EINVAL);
+ getmicrouptime(&rtv);
+ timevaladd(&atv, &rtv);
+ } else {
+ atv.tv_sec = 0;
+ atv.tv_usec = 0;
+ }
- mtx_assert(&sellock, MA_OWNED);
- TAILQ_FOREACH(si, &td->td_selq, si_thrlist)
- si->si_thread = NULL;
- TAILQ_INIT(&td->td_selq);
+ timo = 0;
+ seltdinit(td);
+ /*
+ * Iterate until the timeout expires or the socket becomes ready.
+ */
+ for (;;) {
+ selfdalloc(td, NULL);
+ error = sopoll(so, events, NULL, td);
+ /* error here is actually the ready events. */
+ if (error)
+ return (0);
+ if (atv.tv_sec || atv.tv_usec) {
+ getmicrouptime(&rtv);
+ if (timevalcmp(&rtv, &atv, >=)) {
+ seltdclear(td);
+ return (EWOULDBLOCK);
+ }
+ ttv = atv;
+ timevalsub(&ttv, &rtv);
+ timo = ttv.tv_sec > 24 * 60 * 60 ?
+ 24 * 60 * 60 * hz : tvtohz(&ttv);
+ }
+ error = seltdwait(td, timo);
+ seltdclear(td);
+ if (error)
+ break;
+ }
+ /* XXX Duplicates ncp/smb behavior. */
+ if (error == ERESTART)
+ error = 0;
+ return (error);
+}
+
+/*
+ * Preallocate two selfds associated with 'cookie'. Some fo_poll routines
+ * have two select sets, one for read and another for write.
+ */
+static void
+selfdalloc(struct thread *td, void *cookie)
+{
+ struct seltd *stp;
+
+ stp = td->td_sel;
+ if (stp->st_free1 == NULL)
+ stp->st_free1 = uma_zalloc(selfd_zone, M_WAITOK|M_ZERO);
+ stp->st_free1->sf_td = stp;
+ stp->st_free1->sf_cookie = cookie;
+ if (stp->st_free2 == NULL)
+ stp->st_free2 = uma_zalloc(selfd_zone, M_WAITOK|M_ZERO);
+ stp->st_free2->sf_td = stp;
+ stp->st_free2->sf_cookie = cookie;
+}
+
+static void
+selfdfree(struct seltd *stp, struct selfd *sfp)
+{
+ STAILQ_REMOVE(&stp->st_selq, sfp, selfd, sf_link);
+ mtx_lock(sfp->sf_mtx);
+ if (sfp->sf_si)
+ TAILQ_REMOVE(&sfp->sf_si->si_tdlist, sfp, sf_threads);
+ mtx_unlock(sfp->sf_mtx);
+ uma_zfree(selfd_zone, sfp);
}
/*
@@ -1075,26 +1222,46 @@ selrecord(selector, sip)
struct thread *selector;
struct selinfo *sip;
{
+ struct selfd *sfp;
+ struct seltd *stp;
+ struct mtx *mtxp;
- mtx_lock(&sellock);
+ stp = selector->td_sel;
+ /*
+ * Don't record when doing a rescan.
+ */
+ if (stp->st_flags & SELTD_RESCAN)
+ return;
+ /*
+ * Grab one of the preallocated descriptors.
+ */
+ sfp = NULL;
+ if ((sfp = stp->st_free1) != NULL)
+ stp->st_free1 = NULL;
+ else if ((sfp = stp->st_free2) != NULL)
+ stp->st_free2 = NULL;
+ else
+ panic("selrecord: No free selfd on selq");
+ mtxp = mtx_pool_find(mtxpool_sleep, sip);
+ /*
+ * Initialize the sfp and queue it in the thread.
+ */
+ sfp->sf_si = sip;
+ sfp->sf_mtx = mtxp;
+ STAILQ_INSERT_TAIL(&stp->st_selq, sfp, sf_link);
/*
- * If the selinfo's thread pointer is NULL then take ownership of it.
- *
- * If the thread pointer is not NULL and it points to another
- * thread, then we have a collision.
- *
- * If the thread pointer is not NULL and points back to us then leave
- * it alone as we've already added pointed it at us and added it to
- * our list.
+ * Now that we've locked the sip, check for initialization.
*/
- if (sip->si_thread == NULL) {
- sip->si_thread = selector;
- TAILQ_INSERT_TAIL(&selector->td_selq, sip, si_thrlist);
- } else if (sip->si_thread != selector) {
- sip->si_flags |= SI_COLL;
+ mtx_lock(mtxp);
+ if (sip->si_mtx == NULL) {
+ sip->si_mtx = mtxp;
+ TAILQ_INIT(&sip->si_tdlist);
}
-
- mtx_unlock(&sellock);
+ /*
+ * Add this thread to the list of selfds listening on this selinfo.
+ */
+ TAILQ_INSERT_TAIL(&sip->si_tdlist, sfp, sf_threads);
+ mtx_unlock(sip->si_mtx);
}
/* Wake up a selecting thread. */
@@ -1122,36 +1289,115 @@ doselwakeup(sip, pri)
struct selinfo *sip;
int pri;
{
- struct thread *td;
+ struct selfd *sfp;
+ struct selfd *sfn;
+ struct seltd *stp;
- mtx_lock(&sellock);
- td = sip->si_thread;
- if ((sip->si_flags & SI_COLL) != 0) {
- nselcoll++;
- sip->si_flags &= ~SI_COLL;
- cv_broadcastpri(&selwait, pri);
- }
- if (td == NULL) {
- mtx_unlock(&sellock);
+ /* If it's not initialized there can't be any waiters. */
+ if (sip->si_mtx == NULL)
return;
+ /*
+ * Locking the selinfo locks all selfds associated with it.
+ */
+ mtx_lock(sip->si_mtx);
+ TAILQ_FOREACH_SAFE(sfp, &sip->si_tdlist, sf_threads, sfn) {
+ /*
+ * Once we remove this sfp from the list and clear the
+ * sf_si seltdclear will know to ignore this si.
+ */
+ TAILQ_REMOVE(&sip->si_tdlist, sfp, sf_threads);
+ sfp->sf_si = NULL;
+ stp = sfp->sf_td;
+ mtx_lock(&stp->st_mtx);
+ stp->st_flags |= SELTD_PENDING;
+ cv_broadcastpri(&stp->st_wait, pri);
+ mtx_unlock(&stp->st_mtx);
}
- TAILQ_REMOVE(&td->td_selq, sip, si_thrlist);
- sip->si_thread = NULL;
- thread_lock(td);
- td->td_flags &= ~TDF_SELECT;
- thread_unlock(td);
- sleepq_remove(td, &selwait);
- mtx_unlock(&sellock);
+ mtx_unlock(sip->si_mtx);
}
-static void selectinit(void *);
-SYSINIT(select, SI_SUB_LOCK, SI_ORDER_FIRST, selectinit, NULL)
+static void
+seltdinit(struct thread *td)
+{
+ struct seltd *stp;
+
+ if ((stp = td->td_sel) != NULL)
+ goto out;
+ td->td_sel = stp = malloc(sizeof(*stp), M_SELECT, M_WAITOK|M_ZERO);
+ mtx_init(&stp->st_mtx, "sellck", NULL, MTX_DEF);
+ cv_init(&stp->st_wait, "select");
+out:
+ stp->st_flags = 0;
+ STAILQ_INIT(&stp->st_selq);
+}
+
+static int
+seltdwait(struct thread *td, int timo)
+{
+ struct seltd *stp;
+ int error;
-/* ARGSUSED*/
+ stp = td->td_sel;
+ /*
+ * An event of interest may occur while we do not hold the seltd
+ * locked so check the pending flag before we sleep.
+ */
+ mtx_lock(&stp->st_mtx);
+ /*
+ * Any further calls to selrecord will be a rescan.
+ */
+ stp->st_flags |= SELTD_RESCAN;
+ if (stp->st_flags & SELTD_PENDING) {
+ mtx_unlock(&stp->st_mtx);
+ return (0);
+ }
+ if (timo > 0)
+ error = cv_timedwait_sig(&stp->st_wait, &stp->st_mtx, timo);
+ else
+ error = cv_wait_sig(&stp->st_wait, &stp->st_mtx);
+ mtx_unlock(&stp->st_mtx);
+
+ return (error);
+}
+
+void
+seltdfini(struct thread *td)
+{
+ struct seltd *stp;
+
+ stp = td->td_sel;
+ if (stp == NULL)
+ return;
+ if (stp->st_free1)
+ uma_zfree(selfd_zone, stp->st_free1);
+ if (stp->st_free2)
+ uma_zfree(selfd_zone, stp->st_free2);
+ td->td_sel = NULL;
+ free(stp, M_SELECT);
+}
+
+/*
+ * Remove the references to the thread from all of the objects we were
+ * polling.
+ */
+static void
+seltdclear(struct thread *td)
+{
+ struct seltd *stp;
+ struct selfd *sfp;
+ struct selfd *sfn;
+
+ stp = td->td_sel;
+ STAILQ_FOREACH_SAFE(sfp, &stp->st_selq, sf_link, sfn)
+ selfdfree(stp, sfp);
+ stp->st_flags = 0;
+}
+
+static void selectinit(void *);
+SYSINIT(select, SI_SUB_SYSCALLS, SI_ORDER_ANY, selectinit, NULL);
static void
-selectinit(dummy)
- void *dummy;
+selectinit(void *dummy __unused)
{
- cv_init(&selwait, "select");
- mtx_init(&sellock, "sellck", NULL, MTX_DEF);
+ selfd_zone = uma_zcreate("selfd", sizeof(struct selfd), NULL, NULL,
+ NULL, NULL, UMA_ALIGN_PTR, 0);
}
diff --git a/sys/kern/sys_pipe.c b/sys/kern/sys_pipe.c
index 2a48583..262ef0c 100644
--- a/sys/kern/sys_pipe.c
+++ b/sys/kern/sys_pipe.c
@@ -524,8 +524,9 @@ pipeselwakeup(cpipe)
PIPE_LOCK_ASSERT(cpipe, MA_OWNED);
if (cpipe->pipe_state & PIPE_SEL) {
- cpipe->pipe_state &= ~PIPE_SEL;
selwakeuppri(&cpipe->pipe_sel, PSOCK);
+ if (!SEL_WAITING(&cpipe->pipe_sel))
+ cpipe->pipe_state &= ~PIPE_SEL;
}
if ((cpipe->pipe_state & PIPE_ASYNC) && cpipe->pipe_sigio)
pgsigio(&cpipe->pipe_sigio, SIGIO, 0);
@@ -1354,12 +1355,14 @@ pipe_poll(fp, events, active_cred, td)
if (revents == 0) {
if (events & (POLLIN | POLLRDNORM)) {
selrecord(td, &rpipe->pipe_sel);
- rpipe->pipe_state |= PIPE_SEL;
+ if (SEL_WAITING(&rpipe->pipe_sel))
+ rpipe->pipe_state |= PIPE_SEL;
}
if (events & (POLLOUT | POLLWRNORM)) {
selrecord(td, &wpipe->pipe_sel);
- wpipe->pipe_state |= PIPE_SEL;
+ if (SEL_WAITING(&wpipe->pipe_sel))
+ wpipe->pipe_state |= PIPE_SEL;
}
}
#ifdef MAC
diff --git a/sys/kern/uipc_sockbuf.c b/sys/kern/uipc_sockbuf.c
index e1fc341..aa1d5ce 100644
--- a/sys/kern/uipc_sockbuf.c
+++ b/sys/kern/uipc_sockbuf.c
@@ -176,7 +176,8 @@ sowakeup(struct socket *so, struct sockbuf *sb)
SOCKBUF_LOCK_ASSERT(sb);
selwakeuppri(&sb->sb_sel, PSOCK);
- sb->sb_flags &= ~SB_SEL;
+ if (!SEL_WAITING(&sb->sb_sel))
+ sb->sb_flags &= ~SB_SEL;
if (sb->sb_flags & SB_WAIT) {
sb->sb_flags &= ~SB_WAIT;
wakeup(&sb->sb_cc);
diff --git a/sys/netncp/ncp_rq.c b/sys/netncp/ncp_rq.c
index b637c9a..38e3e54 100644
--- a/sys/netncp/ncp_rq.c
+++ b/sys/netncp/ncp_rq.c
@@ -43,6 +43,8 @@ __FBSDID("$FreeBSD$");
#include <sys/mbuf.h>
#include <sys/poll.h>
#include <sys/proc.h>
+#include <sys/socket.h>
+#include <sys/socketvar.h>
#include <sys/uio.h>
#include <netncp/ncp.h>
@@ -274,7 +276,9 @@ ncp_request_int(struct ncp_rq *rqp)
/*
* Flush out replies on previous reqs
*/
- while (ncp_poll(so, POLLIN) != 0) {
+ tv.tv_sec = 0;
+ tv.tv_usec = 0;
+ while (selsocket(so, POLLIN, &tv, td) == 0) {
if (ncp_sock_recv(so, &m, &len) != 0)
break;
m_freem(m);
@@ -319,7 +323,7 @@ ncp_request_int(struct ncp_rq *rqp)
}
tv.tv_sec = conn->li.timeout;
tv.tv_usec = 0;
- error = ncp_sock_rselect(so, td, &tv, POLLIN);
+ error = selsocket(so, POLLIN, &tv, td);
if (error == EWOULDBLOCK ) /* timeout expired */
continue;
error = ncp_chkintr(conn, td);
@@ -335,7 +339,9 @@ ncp_request_int(struct ncp_rq *rqp)
dosend = 1; /* resend rq if error */
for (;;) {
error = 0;
- if (ncp_poll(so, POLLIN) == 0)
+ tv.tv_sec = 0;
+ tv.tv_usec = 0;
+ if (selsocket(so, POLLIN, &tv, td) != 0)
break;
/* if (so->so_rcv.sb_cc == 0) {
break;
diff --git a/sys/netncp/ncp_sock.c b/sys/netncp/ncp_sock.c
index 0fe320f..426f40c 100644
--- a/sys/netncp/ncp_sock.c
+++ b/sys/netncp/ncp_sock.c
@@ -65,7 +65,6 @@ __FBSDID("$FreeBSD$");
#define ipx_setnullhost(x) ((x).x_host.s_host[0] = 0); \
((x).x_host.s_host[1] = 0); ((x).x_host.s_host[2] = 0);
-/*int ncp_poll(struct socket *so, int events);*/
/*static int ncp_getsockname(struct socket *so, caddr_t asa, int *alen);*/
static int ncp_soconnect(struct socket *so, struct sockaddr *target,
struct thread *td);
@@ -181,110 +180,6 @@ ncp_sock_send(struct socket *so, struct mbuf *top, struct ncp_rq *rqp)
return error;
}
-int
-ncp_poll(struct socket *so, int events)
-{
- struct thread *td = curthread;
- int revents;
-
- /* Fake up enough state to look like we are in poll(2). */
- mtx_lock(&sellock);
- thread_lock(td);
- td->td_flags |= TDF_SELECT;
- thread_unlock(td);
- mtx_unlock(&sellock);
- TAILQ_INIT(&td->td_selq);
-
- revents = sopoll(so, events, NULL, td);
-
- /* Tear down the fake poll(2) state. */
- mtx_lock(&sellock);
- clear_selinfo_list(td);
- thread_lock(td);
- td->td_flags &= ~TDF_SELECT;
- thread_unlock(td);
- mtx_unlock(&sellock);
-
- return (revents);
-}
-
-int
-ncp_sock_rselect(struct socket *so, struct thread *td, struct timeval *tv,
- int events)
-{
- struct timeval atv, rtv, ttv;
- int ncoll, timo, error, revents;
-
- if (tv) {
- atv = *tv;
- if (itimerfix(&atv)) {
- error = EINVAL;
- goto done_noproclock;
- }
- getmicrouptime(&rtv);
- timevaladd(&atv, &rtv);
- }
- timo = 0;
- mtx_lock(&sellock);
-
-retry:
- ncoll = nselcoll;
- thread_lock(td);
- td->td_flags |= TDF_SELECT;
- thread_unlock(td);
- mtx_unlock(&sellock);
-
- TAILQ_INIT(&td->td_selq);
- revents = sopoll(so, events, NULL, td);
- mtx_lock(&sellock);
- if (revents) {
- error = 0;
- goto done;
- }
- if (tv) {
- getmicrouptime(&rtv);
- if (timevalcmp(&rtv, &atv, >=)) {
- error = EWOULDBLOCK;
- goto done;
- }
- ttv = atv;
- timevalsub(&ttv, &rtv);
- timo = tvtohz(&ttv);
- }
- /*
- * An event of our interest may occur during locking a thread.
- * In order to avoid missing the event that occurred during locking
- * the process, test TDF_SELECT and rescan file descriptors if
- * necessary.
- */
- thread_lock(td);
- if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) {
- thread_unlock(td);
- goto retry;
- }
- thread_unlock(td);
-
- if (timo > 0)
- error = cv_timedwait(&selwait, &sellock, timo);
- else {
- cv_wait(&selwait, &sellock);
- error = 0;
- }
-
-done:
- clear_selinfo_list(td);
-
- thread_lock(td);
- td->td_flags &= ~TDF_SELECT;
- thread_unlock(td);
- mtx_unlock(&sellock);
-
-done_noproclock:
- if (error == ERESTART)
- error = 0;
- return (error);
-}
-
/*
* Connect to specified server via IPX
*/
diff --git a/sys/netncp/ncp_sock.h b/sys/netncp/ncp_sock.h
index 7833760..a3998a4 100644
--- a/sys/netncp/ncp_sock.h
+++ b/sys/netncp/ncp_sock.h
@@ -45,9 +45,6 @@ int ncp_sock_connect(struct ncp_conn *ncp);
int ncp_sock_recv(struct socket *so, struct mbuf **mp, int *rlen);
int ncp_sock_send(struct socket *so, struct mbuf *data, struct ncp_rq *rqp);
int ncp_sock_disconnect(struct ncp_conn *conn);
-int ncp_poll(struct socket *so, int events);
-int ncp_sock_rselect(struct socket *so, struct thread *td, struct timeval *tv,
- int events);
int ncp_sock_checksum(struct ncp_conn *conn, int enable);
void ncp_check_rq(struct ncp_conn *conn);
diff --git a/sys/netsmb/smb_trantcp.c b/sys/netsmb/smb_trantcp.c
index 7bdf211..5e34943 100644
--- a/sys/netsmb/smb_trantcp.c
+++ b/sys/netsmb/smb_trantcp.c
@@ -95,84 +95,6 @@ nb_setsockopt_int(struct socket *so, int level, int name, int val)
}
static int
-nbssn_rselect(struct nbpcb *nbp, struct timeval *tv, int events,
- struct thread *td)
-{
- struct timeval atv, rtv, ttv;
- int ncoll, timo, error, revents;
-
- if (tv) {
- atv = *tv;
- if (itimerfix(&atv)) {
- error = EINVAL;
- goto done_noproclock;
- }
- getmicrouptime(&rtv);
- timevaladd(&atv, &rtv);
- }
- timo = 0;
- mtx_lock(&sellock);
-retry:
-
- ncoll = nselcoll;
- thread_lock(td);
- td->td_flags |= TDF_SELECT;
- thread_unlock(td);
- mtx_unlock(&sellock);
-
- /* XXX: Should be done when the thread is initialized. */
- TAILQ_INIT(&td->td_selq);
- revents = sopoll(nbp->nbp_tso, events, NULL, td);
- mtx_lock(&sellock);
- if (revents) {
- error = 0;
- goto done;
- }
- if (tv) {
- getmicrouptime(&rtv);
- if (timevalcmp(&rtv, &atv, >=)) {
- error = EWOULDBLOCK;
- goto done;
- }
- ttv = atv;
- timevalsub(&ttv, &rtv);
- timo = tvtohz(&ttv);
- }
- /*
- * An event of our interest may occur during locking a process.
- * In order to avoid missing the event that occurred during locking
- * the process, test P_SELECT and rescan file descriptors if
- * necessary.
- */
- thread_lock(td);
- if ((td->td_flags & TDF_SELECT) == 0 || nselcoll != ncoll) {
- thread_unlock(td);
- goto retry;
- }
- thread_unlock(td);
-
- if (timo > 0)
- error = cv_timedwait(&selwait, &sellock, timo);
- else {
- cv_wait(&selwait, &sellock);
- error = 0;
- }
-
-done:
- clear_selinfo_list(td);
-
- thread_lock(td);
- td->td_flags &= ~TDF_SELECT;
- thread_unlock(td);
- mtx_unlock(&sellock);
-
-done_noproclock:
- if (error == ERESTART)
- return 0;
- return error;
-}
-
-static int
nb_intr(struct nbpcb *nbp, struct proc *p)
{
return 0;
@@ -302,7 +224,7 @@ nbssn_rq_request(struct nbpcb *nbp, struct thread *td)
if (error)
return error;
TIMESPEC_TO_TIMEVAL(&tv, &nbp->nbp_timo);
- error = nbssn_rselect(nbp, &tv, POLLIN, td);
+ error = selsocket(nbp->nbp_tso, POLLIN, &tv, td);
if (error == EWOULDBLOCK) { /* Timeout */
NBDEBUG("initial request timeout\n");
return ETIMEDOUT;
diff --git a/sys/sys/proc.h b/sys/sys/proc.h
index 7ebb8c2..9c77dcc 100644
--- a/sys/sys/proc.h
+++ b/sys/sys/proc.h
@@ -142,7 +142,6 @@ struct pargs {
* m - Giant
* n - not locked, lazy
* o - ktrace lock
- * p - select lock (sellock)
* q - td_contested lock
* r - p_peers lock
* t - thread lock
@@ -210,7 +209,7 @@ struct thread {
TAILQ_ENTRY(thread) td_slpq; /* (t) Sleep queue. */
TAILQ_ENTRY(thread) td_lockq; /* (t) Lock queue. */
- TAILQ_HEAD(, selinfo) td_selq; /* (p) List of selinfos. */
+ struct seltd *td_sel; /* Select queue/channel. */
struct sleepqueue *td_sleepqueue; /* (k) Associated sleep queue. */
struct turnstile *td_turnstile; /* (k) Associated turnstile. */
struct umtx_q *td_umtxq; /* (c?) Link for when we're blocked. */
@@ -322,7 +321,7 @@ do { \
#define TDF_SINTR 0x00000008 /* Sleep is interruptible. */
#define TDF_TIMEOUT 0x00000010 /* Timing out during sleep. */
#define TDF_IDLETD 0x00000020 /* This is a per-CPU idle thread. */
-#define TDF_SELECT 0x00000040 /* Selecting; wakeup/waiting danger. */
+#define TDF_UNUSEDx40 0x00000040 /* --available-- */
#define TDF_SLEEPABORT 0x00000080 /* sleepq_abort was called. */
#define TDF_KTH_SUSP 0x00000100 /* kthread is suspended */
#define TDF_UBORROWING 0x00000200 /* Thread is borrowing user pri. */
diff --git a/sys/sys/selinfo.h b/sys/sys/selinfo.h
index 946da8c..2d2f848 100644
--- a/sys/sys/selinfo.h
+++ b/sys/sys/selinfo.h
@@ -35,26 +35,26 @@
#include <sys/event.h> /* for struct klist */
+struct selfd;
+TAILQ_HEAD(selfdlist, selfd);
+
/*
* Used to maintain information about processes that wish to be
* notified when I/O becomes possible.
*/
struct selinfo {
- TAILQ_ENTRY(selinfo) si_thrlist; /* list hung off of thread */
- struct thread *si_thread; /* thread waiting */
- struct knlist si_note; /* kernel note list */
- short si_flags; /* see below */
+ struct selfdlist si_tdlist; /* List of sleeping threads. */
+ struct knlist si_note; /* kernel note list */
+ struct mtx *si_mtx; /* Lock for tdlist. */
};
-#define SI_COLL 0x0001 /* collision occurred */
-#define SEL_WAITING(si) \
- ((si)->si_thread != NULL || ((si)->si_flags & SI_COLL) != 0)
+#define SEL_WAITING(si) (!TAILQ_EMPTY(&(si)->si_tdlist))
#ifdef _KERNEL
-void clear_selinfo_list(struct thread *td);
void selrecord(struct thread *selector, struct selinfo *sip);
void selwakeup(struct selinfo *sip);
void selwakeuppri(struct selinfo *sip, int pri);
+void seltdfini(struct thread *td);
#endif
#endif /* !_SYS_SELINFO_H_ */
diff --git a/sys/sys/socketvar.h b/sys/sys/socketvar.h
index 83ef094..bc8df7b 100644
--- a/sys/sys/socketvar.h
+++ b/sys/sys/socketvar.h
@@ -546,6 +546,8 @@ int sosetopt(struct socket *so, struct sockopt *sopt);
int soshutdown(struct socket *so, int how);
void sotoxsocket(struct socket *so, struct xsocket *xso);
void sowakeup(struct socket *so, struct sockbuf *sb);
+int selsocket(struct socket *so, int events, struct timeval *tv,
+ struct thread *td);
#ifdef SOCKBUF_DEBUG
void sblastrecordchk(struct sockbuf *, const char *, int);
diff --git a/sys/sys/systm.h b/sys/sys/systm.h
index 6809696..91189e3 100644
--- a/sys/sys/systm.h
+++ b/sys/sys/systm.h
@@ -56,10 +56,6 @@ extern int kstack_pages; /* number of kernel stack pages */
extern int nswap; /* size of swap space */
-extern u_int nselcoll; /* select collisions since boot */
-extern struct mtx sellock; /* select lock variable */
-extern struct cv selwait; /* select conditional variable */
-
extern long physmem; /* physical memory */
extern long realmem; /* 'real' memory */
OpenPOWER on IntegriCloud