summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/compat/freebsd32/freebsd32_misc.c34
-rw-r--r--sys/kern/uipc_syscalls.c543
-rw-r--r--sys/sys/sf_base.h3
-rw-r--r--sys/sys/sf_sync.h23
4 files changed, 587 insertions, 16 deletions
diff --git a/sys/compat/freebsd32/freebsd32_misc.c b/sys/compat/freebsd32/freebsd32_misc.c
index c8681c6..719a057 100644
--- a/sys/compat/freebsd32/freebsd32_misc.c
+++ b/sys/compat/freebsd32/freebsd32_misc.c
@@ -1644,18 +1644,28 @@ struct sf_hdtr32 {
int trl_cnt;
};
+struct sf_hdtr_kq32 {
+ int kq_fd;
+ uint32_t kq_flags;
+ uint32_t kq_udata; /* 32-bit void ptr */
+ uint32_t kq_ident; /* 32-bit uintptr_t */
+};
+
static int
freebsd32_do_sendfile(struct thread *td,
struct freebsd32_sendfile_args *uap, int compat)
{
struct sf_hdtr32 hdtr32;
struct sf_hdtr hdtr;
+ struct sf_hdtr_kq32 hdtr_kq32;
+ struct sf_hdtr_kq hdtr_kq;
struct uio *hdr_uio, *trl_uio;
struct iovec32 *iov32;
off_t offset;
int error;
off_t sbytes;
struct sendfile_sync *sfs;
+ int do_kqueue = 0;
offset = PAIR32TO64(off_t, uap->offset);
if (offset < 0)
@@ -1687,10 +1697,32 @@ freebsd32_do_sendfile(struct thread *td,
if (error)
goto out;
}
+
+ /*
+ * If SF_KQUEUE is set, then we need to also copy in
+ * the kqueue data after the normal hdtr set and set do_kqueue=1.
+ */
+ if (uap->flags & SF_KQUEUE) {
+ error = copyin(((char *) uap->hdtr) + sizeof(hdtr32),
+ &hdtr_kq32,
+ sizeof(hdtr_kq32));
+ if (error != 0)
+ goto out;
+
+ /* 32->64 bit fields */
+ CP(hdtr_kq32, hdtr_kq, kq_fd);
+ CP(hdtr_kq32, hdtr_kq, kq_flags);
+ PTRIN_CP(hdtr_kq32, hdtr_kq, kq_udata);
+ CP(hdtr_kq32, hdtr_kq, kq_ident);
+ do_kqueue = 1;
+ }
}
+
+ /* Call sendfile */
+ /* XXX stack depth! */
error = _do_sendfile(td, uap->fd, uap->s, uap->flags, compat,
- offset, uap->nbytes, &sbytes, hdr_uio, trl_uio);
+ offset, uap->nbytes, &sbytes, hdr_uio, trl_uio, &hdtr_kq);
if (uap->sbytes != NULL)
copyout(&sbytes, uap->sbytes, sizeof(off_t));
diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c
index 6dbdd52..9420dfd 100644
--- a/sys/kern/uipc_syscalls.c
+++ b/sys/kern/uipc_syscalls.c
@@ -123,6 +123,10 @@ static int getpeername1(struct thread *td, struct getpeername_args *uap,
counter_u64_t sfstat[sizeof(struct sfstat) / sizeof(uint64_t)];
+static int filt_sfsync_attach(struct knote *kn);
+static void filt_sfsync_detach(struct knote *kn);
+static int filt_sfsync(struct knote *kn, long hint);
+
/*
* sendfile(2)-related variables and associated sysctls
*/
@@ -132,8 +136,28 @@ static int sfreadahead = 1;
SYSCTL_INT(_kern_ipc_sendfile, OID_AUTO, readahead, CTLFLAG_RW,
&sfreadahead, 0, "Number of sendfile(2) read-ahead MAXBSIZE blocks");
+#ifdef SFSYNC_DEBUG
+static int sf_sync_debug = 0;
+SYSCTL_INT(_debug, OID_AUTO, sf_sync_debug, CTLFLAG_RW,
+ &sf_sync_debug, 0, "Output debugging during sf_sync lifecycle");
+#define SFSYNC_DPRINTF(s, ...) \
+ do { \
+ if (sf_sync_debug) \
+ printf((s), ##__VA_ARGS__); \
+ } while (0)
+#else
+#define SFSYNC_DPRINTF(c, ...)
+#endif
+
static uma_zone_t zone_sfsync;
+static struct filterops sendfile_filtops = {
+ .f_isfd = 0,
+ .f_attach = filt_sfsync_attach,
+ .f_detach = filt_sfsync_detach,
+ .f_event = filt_sfsync,
+};
+
static void
sfstat_init(const void *unused)
{
@@ -152,6 +176,7 @@ sf_sync_init(const void *unused)
NULL, NULL,
UMA_ALIGN_CACHE,
0);
+ kqueue_add_filteropts(EVFILT_SENDFILE, &sendfile_filtops);
}
SYSINIT(sf_sync, SI_SUB_MBUF, SI_ORDER_FIRST, sf_sync_init, NULL);
@@ -1860,6 +1885,118 @@ getsockaddr(namp, uaddr, len)
return (error);
}
+static int
+filt_sfsync_attach(struct knote *kn)
+{
+ struct sendfile_sync *sfs = (struct sendfile_sync *) kn->kn_sdata;
+ struct knlist *knl = &sfs->klist;
+
+ SFSYNC_DPRINTF("%s: kn=%p, sfs=%p\n", __func__, kn, sfs);
+
+ /*
+ * Validate that we actually received this via the kernel API.
+ */
+ if ((kn->kn_flags & EV_FLAG1) == 0)
+ return (EPERM);
+
+ kn->kn_ptr.p_v = sfs;
+ kn->kn_flags &= ~EV_FLAG1;
+
+ knl->kl_lock(knl->kl_lockarg);
+ /*
+ * If we're in the "freeing" state,
+ * don't allow the add. That way we don't
+ * end up racing with some other thread that
+ * is trying to finish some setup.
+ */
+ if (sfs->state == SF_STATE_FREEING) {
+ knl->kl_unlock(knl->kl_lockarg);
+ return (EINVAL);
+ }
+ knlist_add(&sfs->klist, kn, 1);
+ knl->kl_unlock(knl->kl_lockarg);
+
+ return (0);
+}
+
+/*
+ * Called when a knote is being detached.
+ */
+static void
+filt_sfsync_detach(struct knote *kn)
+{
+ struct knlist *knl;
+ struct sendfile_sync *sfs;
+ int do_free = 0;
+
+ sfs = kn->kn_ptr.p_v;
+ knl = &sfs->klist;
+
+ SFSYNC_DPRINTF("%s: kn=%p, sfs=%p\n", __func__, kn, sfs);
+
+ knl->kl_lock(knl->kl_lockarg);
+ if (!knlist_empty(knl))
+ knlist_remove(knl, kn, 1);
+
+ /*
+ * If the list is empty _AND_ the refcount is 0
+ * _AND_ we've finished the setup phase and now
+ * we're in the running phase, we can free the
+ * underlying sendfile_sync.
+ *
+ * But we shouldn't do it before finishing the
+ * underlying divorce from the knote.
+ *
+ * So, we have the sfsync lock held; transition
+ * it to "freeing", then unlock, then free
+ * normally.
+ */
+ if (knlist_empty(knl)) {
+ if (sfs->state == SF_STATE_COMPLETED && sfs->count == 0) {
+ SFSYNC_DPRINTF("%s: (%llu) sfs=%p; completed, "
+ "count==0, empty list: time to free!\n",
+ __func__,
+ (unsigned long long) curthread->td_tid,
+ sfs);
+ sf_sync_set_state(sfs, SF_STATE_FREEING, 1);
+ do_free = 1;
+ }
+ }
+ knl->kl_unlock(knl->kl_lockarg);
+
+ /*
+ * Only call free if we're the one who has transitioned things
+ * to free. Otherwise we could race with another thread that
+ * is currently tearing things down.
+ */
+ if (do_free == 1) {
+ SFSYNC_DPRINTF("%s: (%llu) sfs=%p, %s:%d\n",
+ __func__,
+ (unsigned long long) curthread->td_tid,
+ sfs,
+ __FILE__,
+ __LINE__);
+ sf_sync_free(sfs);
+ }
+}
+
+static int
+filt_sfsync(struct knote *kn, long hint)
+{
+ struct sendfile_sync *sfs = (struct sendfile_sync *) kn->kn_ptr.p_v;
+ int ret;
+
+ SFSYNC_DPRINTF("%s: kn=%p, sfs=%p\n", __func__, kn, sfs);
+
+ /*
+ * XXX add a lock assertion here!
+ */
+ ret = (sfs->count == 0 && sfs->state == SF_STATE_COMPLETED);
+
+ return (ret);
+}
+
+
/*
* Detach mapped page and release resources back to the system.
*/
@@ -1885,21 +2022,97 @@ sf_buf_mext(struct mbuf *mb, void *addr, void *args)
sfs = addr;
sf_sync_deref(sfs);
}
+ /*
+ * sfs may be invalid at this point, don't use it!
+ */
return (EXT_FREE_OK);
}
+/*
+ * Called to remove a reference to a sf_sync object.
+ *
+ * This is generally done during the mbuf free path to signify
+ * that one of the mbufs in the transaction has been completed.
+ *
+ * If we're doing SF_SYNC and the refcount is zero then we'll wake
+ * up any waiters.
+ *
+ * IF we're doing SF_KQUEUE and the refcount is zero then we'll
+ * fire off the knote.
+ */
void
sf_sync_deref(struct sendfile_sync *sfs)
{
+ int do_free = 0;
if (sfs == NULL)
return;
mtx_lock(&sfs->mtx);
KASSERT(sfs->count> 0, ("Sendfile sync botchup count == 0"));
- if (--sfs->count == 0)
- cv_signal(&sfs->cv);
+ sfs->count --;
+
+ /*
+ * Only fire off the wakeup / kqueue notification if
+ * we are in the running state.
+ */
+ if (sfs->count == 0 && sfs->state == SF_STATE_COMPLETED) {
+ if (sfs->flags & SF_SYNC)
+ cv_signal(&sfs->cv);
+
+ if (sfs->flags & SF_KQUEUE) {
+ SFSYNC_DPRINTF("%s: (%llu) sfs=%p: knote!\n",
+ __func__,
+ (unsigned long long) curthread->td_tid,
+ sfs);
+ KNOTE_LOCKED(&sfs->klist, 1);
+ }
+
+ /*
+ * If we're not waiting around for a sync,
+ * check if the knote list is empty.
+ * If it is, we transition to free.
+ *
+ * XXX I think it's about time I added some state
+ * or flag that says whether we're supposed to be
+ * waiting around until we've done a signal.
+ *
+ * XXX Ie, the reason that I don't free it here
+ * is because the caller will free the last reference,
+ * not us. That should be codified in some flag
+ * that indicates "self-free" rather than checking
+ * for SF_SYNC all the time.
+ */
+ if ((sfs->flags & SF_SYNC) == 0 && knlist_empty(&sfs->klist)) {
+ SFSYNC_DPRINTF("%s: (%llu) sfs=%p; completed, "
+ "count==0, empty list: time to free!\n",
+ __func__,
+ (unsigned long long) curthread->td_tid,
+ sfs);
+ sf_sync_set_state(sfs, SF_STATE_FREEING, 1);
+ do_free = 1;
+ }
+
+ }
mtx_unlock(&sfs->mtx);
+
+ /*
+ * Attempt to do a free here.
+ *
+ * We do this outside of the lock because it may destroy the
+ * lock in question as it frees things. We can optimise this
+ * later.
+ *
+ * XXX yes, we should make it a requirement to hold the
+ * lock across sf_sync_free().
+ */
+ if (do_free == 1) {
+ SFSYNC_DPRINTF("%s: (%llu) sfs=%p\n",
+ __func__,
+ (unsigned long long) curthread->td_tid,
+ sfs);
+ sf_sync_free(sfs);
+ }
}
/*
@@ -1917,6 +2130,10 @@ sf_sync_alloc(uint32_t flags)
mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF);
cv_init(&sfs->cv, "sendfile");
sfs->flags = flags;
+ sfs->state = SF_STATE_SETUP;
+ knlist_init_mtx(&sfs->klist, &sfs->mtx);
+
+ SFSYNC_DPRINTF("%s: sfs=%p, flags=0x%08x\n", __func__, sfs, sfs->flags);
return (sfs);
}
@@ -1946,13 +2163,49 @@ sf_sync_syscall_wait(struct sendfile_sync *sfs)
if (sfs == NULL)
return;
- mtx_lock(&sfs->mtx);
+ KASSERT(mtx_owned(&sfs->mtx), ("%s: sfs=%p: not locked but should be!",
+ __func__,
+ sfs));
+
+ /*
+ * If we're not requested to wait during the syscall,
+ * don't bother waiting.
+ */
+ if ((sfs->flags & SF_SYNC) == 0)
+ goto out;
+
+ /*
+ * This is a bit suboptimal and confusing, so bear with me.
+ *
+ * Ideally sf_sync_syscall_wait() will wait until
+ * all pending mbuf transmit operations are done.
+ * This means that when sendfile becomes async, it'll
+ * run in the background and will transition from
+ * RUNNING to COMPLETED when it's finished acquiring
+ * new things to send. Then, when the mbufs finish
+ * sending, COMPLETED + sfs->count == 0 is enough to
+ * know that no further work is being done.
+ *
+ * So, we will sleep on both RUNNING and COMPLETED.
+ * It's up to the (in progress) async sendfile loop
+ * to transition the sf_sync from RUNNING to
+ * COMPLETED so the wakeup above will actually
+ * do the cv_signal() call.
+ */
+ if (sfs->state != SF_STATE_COMPLETED && sfs->state != SF_STATE_RUNNING)
+ goto out;
+
if (sfs->count != 0)
cv_wait(&sfs->cv, &sfs->mtx);
KASSERT(sfs->count == 0, ("sendfile sync still busy"));
- mtx_unlock(&sfs->mtx);
+
+out:
+ return;
}
+/*
+ * Free an sf_sync if it's appropriate to.
+ */
void
sf_sync_free(struct sendfile_sync *sfs)
{
@@ -1960,18 +2213,158 @@ sf_sync_free(struct sendfile_sync *sfs)
if (sfs == NULL)
return;
+ SFSYNC_DPRINTF("%s: (%lld) sfs=%p; called; state=%d, flags=0x%08x "
+ "count=%d\n",
+ __func__,
+ (long long) curthread->td_tid,
+ sfs,
+ sfs->state,
+ sfs->flags,
+ sfs->count);
+
+ mtx_lock(&sfs->mtx);
+
/*
- * XXX we should ensure that nothing else has this
- * locked before freeing.
+ * We keep the sf_sync around if the state is active,
+ * we are doing kqueue notification and we have active
+ * knotes.
+ *
+ * If the caller wants to free us right this second it
+ * should transition this to the freeing state.
+ *
+ * So, complain loudly if they break this rule.
*/
- mtx_lock(&sfs->mtx);
+ if (sfs->state != SF_STATE_FREEING) {
+ printf("%s: (%llu) sfs=%p; not freeing; let's wait!\n",
+ __func__,
+ (unsigned long long) curthread->td_tid,
+ sfs);
+ mtx_unlock(&sfs->mtx);
+ return;
+ }
+
KASSERT(sfs->count == 0, ("sendfile sync still busy"));
cv_destroy(&sfs->cv);
+ /*
+ * This doesn't call knlist_detach() on each knote; it just frees
+ * the entire list.
+ */
+ knlist_delete(&sfs->klist, curthread, 1);
mtx_destroy(&sfs->mtx);
+ SFSYNC_DPRINTF("%s: (%llu) sfs=%p; freeing\n",
+ __func__,
+ (unsigned long long) curthread->td_tid,
+ sfs);
uma_zfree(zone_sfsync, sfs);
}
/*
+ * Setup a sf_sync to post a kqueue notification when things are complete.
+ */
+int
+sf_sync_kqueue_setup(struct sendfile_sync *sfs, struct sf_hdtr_kq *sfkq)
+{
+ struct kevent kev;
+ int error;
+
+ sfs->flags |= SF_KQUEUE;
+
+ /* Check the flags are valid */
+ if ((sfkq->kq_flags & ~(EV_CLEAR | EV_DISPATCH | EV_ONESHOT)) != 0)
+ return (EINVAL);
+
+ SFSYNC_DPRINTF("%s: sfs=%p: kqfd=%d, flags=0x%08x, ident=%p, udata=%p\n",
+ __func__,
+ sfs,
+ sfkq->kq_fd,
+ sfkq->kq_flags,
+ (void *) sfkq->kq_ident,
+ (void *) sfkq->kq_udata);
+
+ /* Setup and register a knote on the given kqfd. */
+ kev.ident = (uintptr_t) sfkq->kq_ident;
+ kev.filter = EVFILT_SENDFILE;
+ kev.flags = EV_ADD | EV_ENABLE | EV_FLAG1 | sfkq->kq_flags;
+ kev.data = (intptr_t) sfs;
+ kev.udata = sfkq->kq_udata;
+
+ error = kqfd_register(sfkq->kq_fd, &kev, curthread, 1);
+ if (error != 0) {
+ SFSYNC_DPRINTF("%s: returned %d\n", __func__, error);
+ }
+ return (error);
+}
+
+void
+sf_sync_set_state(struct sendfile_sync *sfs, sendfile_sync_state_t state,
+ int islocked)
+{
+ sendfile_sync_state_t old_state;
+
+ if (! islocked)
+ mtx_lock(&sfs->mtx);
+
+ /*
+ * Update our current state.
+ */
+ old_state = sfs->state;
+ sfs->state = state;
+ SFSYNC_DPRINTF("%s: (%llu) sfs=%p; going from %d to %d\n",
+ __func__,
+ (unsigned long long) curthread->td_tid,
+ sfs,
+ old_state,
+ state);
+
+ /*
+ * If we're transitioning from RUNNING to COMPLETED and the count is
+ * zero, then post the knote. The caller may have completed the
+ * send before we updated the state to COMPLETED and we need to make
+ * sure this is communicated.
+ */
+ if (old_state == SF_STATE_RUNNING
+ && state == SF_STATE_COMPLETED
+ && sfs->count == 0
+ && sfs->flags & SF_KQUEUE) {
+ SFSYNC_DPRINTF("%s: (%llu) sfs=%p: triggering knote!\n",
+ __func__,
+ (unsigned long long) curthread->td_tid,
+ sfs);
+ KNOTE_LOCKED(&sfs->klist, 1);
+ }
+
+ if (! islocked)
+ mtx_unlock(&sfs->mtx);
+}
+
+/*
+ * Set the retval/errno for the given transaction.
+ *
+ * This will eventually/ideally be used when the KNOTE is fired off
+ * to signify the completion of this transaction.
+ *
+ * The sfsync lock should be held before entering this function.
+ */
+void
+sf_sync_set_retval(struct sendfile_sync *sfs, off_t retval, int xerrno)
+{
+
+ KASSERT(mtx_owned(&sfs->mtx), ("%s: sfs=%p: not locked but should be!",
+ __func__,
+ sfs));
+
+ SFSYNC_DPRINTF("%s: (%llu) sfs=%p: errno=%d, retval=%jd\n",
+ __func__,
+ (unsigned long long) curthread->td_tid,
+ sfs,
+ xerrno,
+ (intmax_t) retval);
+
+ sfs->retval = retval;
+ sfs->xerrno = xerrno;
+}
+
+/*
* sendfile(2)
*
* int sendfile(int fd, int s, off_t offset, size_t nbytes,
@@ -1992,15 +2385,21 @@ sys_sendfile(struct thread *td, struct sendfile_args *uap)
int
_do_sendfile(struct thread *td, int src_fd, int sock_fd, int flags,
int compat, off_t offset, size_t nbytes, off_t *sbytes,
- struct uio *hdr_uio, struct uio *trl_uio)
+ struct uio *hdr_uio,
+ struct uio *trl_uio, struct sf_hdtr_kq *hdtr_kq)
{
cap_rights_t rights;
struct sendfile_sync *sfs = NULL;
struct file *fp;
int error;
+ int do_kqueue = 0;
+ int do_free = 0;
AUDIT_ARG_FD(src_fd);
+ if (hdtr_kq != NULL)
+ do_kqueue = 1;
+
/*
* sendfile(2) can start at any offset within a file so we require
* CAP_READ+CAP_SEEK = CAP_PREAD.
@@ -2011,20 +2410,121 @@ _do_sendfile(struct thread *td, int src_fd, int sock_fd, int flags,
}
/*
+ * IF SF_KQUEUE is set but we haven't copied in anything for
+ * kqueue data, error out.
+ */
+ if (flags & SF_KQUEUE && do_kqueue == 0) {
+ SFSYNC_DPRINTF("%s: SF_KQUEUE but no KQUEUE data!\n", __func__);
+ goto out;
+ }
+
+ /*
* If we need to wait for completion, initialise the sfsync
* state here.
*/
- if (flags & SF_SYNC)
- sfs = sf_sync_alloc(flags & SF_SYNC);
+ if (flags & (SF_SYNC | SF_KQUEUE))
+ sfs = sf_sync_alloc(flags & (SF_SYNC | SF_KQUEUE));
+
+ if (flags & SF_KQUEUE) {
+ error = sf_sync_kqueue_setup(sfs, hdtr_kq);
+ if (error) {
+ SFSYNC_DPRINTF("%s: (%llu) error; sfs=%p\n",
+ __func__,
+ (unsigned long long) curthread->td_tid,
+ sfs);
+ sf_sync_set_state(sfs, SF_STATE_FREEING, 0);
+ sf_sync_free(sfs);
+ goto out;
+ }
+ }
+ /*
+ * Do the sendfile call.
+ *
+ * If this fails, it'll free the mbuf chain which will free up the
+ * sendfile_sync references.
+ */
error = fo_sendfile(fp, sock_fd, hdr_uio, trl_uio, offset,
nbytes, sbytes, flags, compat ? SFK_COMPAT : 0, sfs, td);
/*
- * If appropriate, do the wait and free here.
+ * If the sendfile call succeeded, transition the sf_sync state
+ * to RUNNING, then COMPLETED.
+ *
+ * If the sendfile call failed, then the sendfile call may have
+ * actually sent some data first - so we check to see whether
+ * any data was sent. If some data was queued (ie, count > 0)
+ * then we can't call free; we have to wait until the partial
+ * transaction completes before we continue along.
+ *
+ * This has the side effect of firing off the knote
+ * if the refcount has hit zero by the time we get here.
*/
if (sfs != NULL) {
+ mtx_lock(&sfs->mtx);
+ if (error == 0 || sfs->count > 0) {
+ /*
+ * When it's time to do async sendfile, the transition
+ * to RUNNING signifies that we're actually actively
+ * adding and completing mbufs. When the last disk
+ * buffer is read (ie, when we're not doing any
+ * further read IO and all subsequent stuff is mbuf
+ * transmissions) we'll transition to COMPLETED
+ * and when the final mbuf is freed, the completion
+ * will be signaled.
+ */
+ sf_sync_set_state(sfs, SF_STATE_RUNNING, 1);
+
+ /*
+ * Set the retval before we signal completed.
+ * If we do it the other way around then transitioning to
+ * COMPLETED may post the knote before you set the return
+ * status!
+ *
+ * XXX for now, errno is always 0, as we don't post
+ * knotes if sendfile failed. Maybe that'll change later.
+ */
+ sf_sync_set_retval(sfs, *sbytes, error);
+
+ /*
+ * And now transition to completed, which will kick off
+ * the knote if required.
+ */
+ sf_sync_set_state(sfs, SF_STATE_COMPLETED, 1);
+ } else {
+ /*
+ * Error isn't zero, sfs_count is zero, so we
+ * won't have some other thing to wake things up.
+ * Thus free.
+ */
+ sf_sync_set_state(sfs, SF_STATE_FREEING, 1);
+ do_free = 1;
+ }
+
+ /*
+ * Next - wait if appropriate.
+ */
sf_sync_syscall_wait(sfs);
+
+ /*
+ * If we're not doing kqueue notifications, we can
+ * transition this immediately to the freeing state.
+ */
+ if ((sfs->flags & SF_KQUEUE) == 0) {
+ sf_sync_set_state(sfs, SF_STATE_FREEING, 1);
+ do_free = 1;
+ }
+
+ mtx_unlock(&sfs->mtx);
+ }
+
+ /*
+ * If do_free is set, free here.
+ *
+ * If we're doing no-kqueue notification and it's just sleep notification,
+ * we also do free; it's the only chance we have.
+ */
+ if (sfs != NULL && do_free == 1) {
sf_sync_free(sfs);
}
@@ -2036,16 +2536,20 @@ _do_sendfile(struct thread *td, int src_fd, int sock_fd, int flags,
fdrop(fp, td);
out:
+ /* Return error */
return (error);
}
+
static int
do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
{
struct sf_hdtr hdtr;
+ struct sf_hdtr_kq hdtr_kq;
struct uio *hdr_uio, *trl_uio;
int error;
off_t sbytes;
+ int do_kqueue = 0;
/*
* File offset must be positive. If it goes beyond EOF
@@ -2070,10 +2574,25 @@ do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
if (error != 0)
goto out;
}
+
+ /*
+ * If SF_KQUEUE is set, then we need to also copy in
+ * the kqueue data after the normal hdtr set and set
+ * do_kqueue=1.
+ */
+ if (uap->flags & SF_KQUEUE) {
+ error = copyin(((char *) uap->hdtr) + sizeof(hdtr),
+ &hdtr_kq,
+ sizeof(hdtr_kq));
+ if (error != 0)
+ goto out;
+ do_kqueue = 1;
+ }
}
+ /* Call sendfile */
error = _do_sendfile(td, uap->fd, uap->s, uap->flags, compat,
- uap->offset, uap->nbytes, &sbytes, hdr_uio, trl_uio);
+ uap->offset, uap->nbytes, &sbytes, hdr_uio, trl_uio, &hdtr_kq);
if (uap->sbytes != NULL) {
copyout(&sbytes, uap->sbytes, sizeof(off_t));
diff --git a/sys/sys/sf_base.h b/sys/sys/sf_base.h
index 1c03ef4..7c8d49c 100644
--- a/sys/sys/sf_base.h
+++ b/sys/sys/sf_base.h
@@ -31,6 +31,7 @@
extern int _do_sendfile(struct thread *, int src_fd, int sock_fd, int flags,
int compat, off_t offset, size_t nbytes, off_t *sbytes,
- struct uio *hdr_uio, struct uio *trl_uio);
+ struct uio *hdr_uio, struct uio *trl_uio,
+ struct sf_hdtr_kq *hdtr_kq);
#endif /* _SYS_SF_BASE_H_ */
diff --git a/sys/sys/sf_sync.h b/sys/sys/sf_sync.h
index c66f4d9..04dee38 100644
--- a/sys/sys/sf_sync.h
+++ b/sys/sys/sf_sync.h
@@ -29,17 +29,36 @@
#ifndef _SYS_SF_SYNC_H_
#define _SYS_SF_SYNC_H_
+typedef enum {
+ SF_STATE_NONE,
+ SF_STATE_SETUP,
+ SF_STATE_RUNNING,
+ SF_STATE_COMPLETED,
+ SF_STATE_FREEING
+} sendfile_sync_state_t;
+
struct sendfile_sync {
- uint32_t flags;
struct mtx mtx;
struct cv cv;
- unsigned count;
+ struct knlist klist;
+ uint32_t flags;
+ uint32_t count;
+ int32_t xerrno; /* Completion errno, if retval < 0 */
+ off_t retval; /* Completion retval (eg written bytes) */
+ sendfile_sync_state_t state;
};
+/* XXX pollution */
+struct sf_hdtr_kq;
+
extern struct sendfile_sync * sf_sync_alloc(uint32_t flags);
extern void sf_sync_syscall_wait(struct sendfile_sync *);
extern void sf_sync_free(struct sendfile_sync *);
+extern void sf_sync_try_free(struct sendfile_sync *);
extern void sf_sync_ref(struct sendfile_sync *);
extern void sf_sync_deref(struct sendfile_sync *);
+extern int sf_sync_kqueue_setup(struct sendfile_sync *, struct sf_hdtr_kq *);
+extern void sf_sync_set_state(struct sendfile_sync *, sendfile_sync_state_t, int);
+extern void sf_sync_set_retval(struct sendfile_sync *, off_t, int);
#endif /* !_SYS_SF_BUF_H_ */
OpenPOWER on IntegriCloud