diff options
author | adrian <adrian@FreeBSD.org> | 2013-12-01 03:53:21 +0000 |
---|---|---|
committer | adrian <adrian@FreeBSD.org> | 2013-12-01 03:53:21 +0000 |
commit | 86274dd213a33a0a9012f50b3209adc1d5a20bed (patch) | |
tree | e3a2f725c4c9350bd7a0727c06a7497a8c4c68e3 | |
parent | 36d4a7a0b7b17cce6f348427fa01f9ed8d78bba0 (diff) | |
download | FreeBSD-src-86274dd213a33a0a9012f50b3209adc1d5a20bed.zip FreeBSD-src-86274dd213a33a0a9012f50b3209adc1d5a20bed.tar.gz |
Migrate the sendfile_sync structure into a public(ish) API in preparation
for extending and reusing it.
The sendfile_sync wrapper is mostly just a "mbuf transaction" wrapper,
used to indicate that the backing store for a group of mbufs has completed.
It's only being used by sendfile for now and it's only implementing a
sleep/wakeup rendezvous. However, there are other potential signaling
paths (kqueue) and other potential uses (socket zero-copy write) where the
same mechanism would also be useful.
So, with that in mind:
* extract the sendfile_sync code out into sf_sync_*() methods
* teach the sf_sync_alloc method about the current config flag -
it will eventually know about kqueue.
* move the sendfile_sync code out of do_sendfile() - the only thing
it now knows about is the sfs pointer. The guts of the sync
rendezvous (setup, rendezvous/wait, free) is now done in the
syscall wrapper.
* .. and teach the 32-bit compat sendfile call the same.
This should be a no-op. It's primarily preparation work for teaching
the sendfile_sync about kqueue notification.
Tested:
* Peter Holm's sendfile stress / regression scripts
Sponsored by: Netflix, Inc.
-rw-r--r-- | sys/compat/freebsd32/freebsd32_misc.c | 20 | ||||
-rw-r--r-- | sys/kern/kern_descrip.c | 4 | ||||
-rw-r--r-- | sys/kern/uipc_syscalls.c | 151 | ||||
-rw-r--r-- | sys/sys/file.h | 10 | ||||
-rw-r--r-- | sys/sys/sf_sync.h | 45 |
5 files changed, 189 insertions, 41 deletions
diff --git a/sys/compat/freebsd32/freebsd32_misc.c b/sys/compat/freebsd32/freebsd32_misc.c index 0703fd3..3b26745 100644 --- a/sys/compat/freebsd32/freebsd32_misc.c +++ b/sys/compat/freebsd32/freebsd32_misc.c @@ -83,6 +83,9 @@ __FBSDID("$FreeBSD$"); #include <sys/msg.h> #include <sys/sem.h> #include <sys/shm.h> +#include <sys/condvar.h> +#include <sys/sf_buf.h> +#include <sys/sf_sync.h> #ifdef INET #include <netinet/in.h> @@ -1653,12 +1656,14 @@ freebsd32_do_sendfile(struct thread *td, off_t offset; int error; off_t sbytes; + struct sendfile_sync *sfs; offset = PAIR32TO64(off_t, uap->offset); if (offset < 0) return (EINVAL); hdr_uio = trl_uio = NULL; + sfs = NULL; if (uap->hdtr != NULL) { error = copyin(uap->hdtr, &hdtr32, sizeof(hdtr32)); @@ -1692,8 +1697,21 @@ freebsd32_do_sendfile(struct thread *td, goto out; } + /* + * If we need to wait for completion, initialise the sfsync + * state here. + */ + if (uap->flags & SF_SYNC) + sfs = sf_sync_alloc(uap->flags & SF_SYNC); + error = fo_sendfile(fp, uap->s, hdr_uio, trl_uio, offset, - uap->nbytes, &sbytes, uap->flags, compat ? SFK_COMPAT : 0, td); + uap->nbytes, &sbytes, uap->flags, compat ? SFK_COMPAT : 0, + sfs, td); + if (sfs != NULL) { + sf_sync_syscall_wait(sfs); + sf_sync_free(sfs); + } + fdrop(fp, td); if (uap->sbytes != NULL) copyout(&sbytes, uap->sbytes, sizeof(off_t)); diff --git a/sys/kern/kern_descrip.c b/sys/kern/kern_descrip.c index 433a308..c0d9008 100644 --- a/sys/kern/kern_descrip.c +++ b/sys/kern/kern_descrip.c @@ -3949,7 +3949,7 @@ badfo_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred, static int badfo_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio, struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags, - int kflags, struct thread *td) + int kflags, struct sendfile_sync *sfs, struct thread *td) { return (EBADF); @@ -3988,7 +3988,7 @@ invfo_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred, int invfo_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio, struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags, - int kflags, struct thread *td) + int kflags, struct sendfile_sync *sfs, struct thread *td) { return (EINVAL); diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c index 134df9d..2e41232 100644 --- a/sys/kern/uipc_syscalls.c +++ b/sys/kern/uipc_syscalls.c @@ -64,6 +64,7 @@ __FBSDID("$FreeBSD$"); #include <sys/protosw.h> #include <sys/rwlock.h> #include <sys/sf_buf.h> +#include <sys/sf_sync.h> #include <sys/sysent.h> #include <sys/socket.h> #include <sys/socketvar.h> @@ -1844,12 +1845,6 @@ getsockaddr(namp, uaddr, len) return (error); } -struct sendfile_sync { - struct mtx mtx; - struct cv cv; - unsigned count; -}; - /* * Detach mapped page and release resources back to the system. */ @@ -1871,15 +1866,94 @@ sf_buf_mext(struct mbuf *mb, void *addr, void *args) if (m->wire_count == 0 && m->object == NULL) vm_page_free(m); vm_page_unlock(m); - if (addr == NULL) - return (EXT_FREE_OK); - sfs = addr; + if (addr != NULL) { + sfs = addr; + sf_sync_deref(sfs); + } + return (EXT_FREE_OK); +} + +void +sf_sync_deref(struct sendfile_sync *sfs) +{ + + if (sfs == NULL) + return; + mtx_lock(&sfs->mtx); KASSERT(sfs->count> 0, ("Sendfile sync botchup count == 0")); if (--sfs->count == 0) cv_signal(&sfs->cv); mtx_unlock(&sfs->mtx); - return (EXT_FREE_OK); +} + +/* + * Allocate a sendfile_sync state structure. + * + * For now this only knows about the "sleep" sync, but later it will + * grow various other personalities. + */ +struct sendfile_sync * +sf_sync_alloc(uint32_t flags) +{ + struct sendfile_sync *sfs; + + sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK | M_ZERO); + mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF); + cv_init(&sfs->cv, "sendfile"); + sfs->flags = flags; + + return (sfs); +} + +/* + * Take a reference to a sfsync instance. + * + * This has to map 1:1 to free calls coming in via sf_buf_mext(), + * so typically this will be referenced once for each mbuf allocated. + */ +void +sf_sync_ref(struct sendfile_sync *sfs) +{ + + if (sfs == NULL) + return; + + mtx_lock(&sfs->mtx); + sfs->count++; + mtx_unlock(&sfs->mtx); +} + +void +sf_sync_syscall_wait(struct sendfile_sync *sfs) +{ + + if (sfs == NULL) + return; + + mtx_lock(&sfs->mtx); + if (sfs->count != 0) + cv_wait(&sfs->cv, &sfs->mtx); + KASSERT(sfs->count == 0, ("sendfile sync still busy")); + mtx_unlock(&sfs->mtx); +} + +void +sf_sync_free(struct sendfile_sync *sfs) +{ + + if (sfs == NULL) + return; + + /* + * XXX we should ensure that nothing else has this + * locked before freeing. + */ + mtx_lock(&sfs->mtx); + KASSERT(sfs->count == 0, ("sendfile sync still busy")); + cv_destroy(&sfs->cv); + mtx_destroy(&sfs->mtx); + free(sfs, M_TEMP); } /* @@ -1909,6 +1983,7 @@ do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) cap_rights_t rights; int error; off_t sbytes; + struct sendfile_sync *sfs; /* * File offset must be positive. If it goes beyond EOF @@ -1918,6 +1993,7 @@ do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) return (EINVAL); hdr_uio = trl_uio = NULL; + sfs = NULL; if (uap->hdtr != NULL) { error = copyin(uap->hdtr, &hdtr, sizeof(hdtr)); @@ -1947,9 +2023,31 @@ do_sendfile(struct thread *td, struct sendfile_args *uap, int compat) goto out; } + /* + * If we need to wait for completion, initialise the sfsync + * state here. + */ + if (uap->flags & SF_SYNC) + sfs = sf_sync_alloc(uap->flags & SF_SYNC); + error = fo_sendfile(fp, uap->s, hdr_uio, trl_uio, uap->offset, - uap->nbytes, &sbytes, uap->flags, compat ? SFK_COMPAT : 0, td); + uap->nbytes, &sbytes, uap->flags, compat ? SFK_COMPAT : 0, sfs, td); + + /* + * If appropriate, do the wait and free here. + */ + if (sfs != NULL) { + sf_sync_syscall_wait(sfs); + sf_sync_free(sfs); + } + + /* + * XXX Should we wait until the send has completed before freeing the source + * file handle? It's the previous behaviour, sure, but is it required? + * We've wired down the page references after all. + */ fdrop(fp, td); + if (uap->sbytes != NULL) { copyout(&sbytes, uap->sbytes, sizeof(off_t)); } @@ -2177,7 +2275,7 @@ kern_sendfile_getsock(struct thread *td, int s, struct file **sock_fp, int vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio, struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags, - int kflags, struct thread *td) + int kflags, struct sendfile_sync *sfs, struct thread *td) { struct file *sock_fp; struct vnode *vp; @@ -2187,7 +2285,6 @@ vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio, struct sf_buf *sf; struct vm_page *pg; struct shmfd *shmfd; - struct sendfile_sync *sfs; struct vattr va; off_t off, xfsize, fsbytes, sbytes, rem, obj_size; int error, bsize, nd, hdrlen, mnw; @@ -2197,7 +2294,6 @@ vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio, obj = NULL; so = NULL; m = NULL; - sfs = NULL; fsbytes = sbytes = 0; hdrlen = mnw = 0; rem = nbytes; @@ -2222,12 +2318,6 @@ vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio, if (flags & SF_MNOWAIT) mnw = 1; - if (flags & SF_SYNC) { - sfs = malloc(sizeof *sfs, M_TEMP, M_WAITOK | M_ZERO); - mtx_init(&sfs->mtx, "sendfile", NULL, MTX_DEF); - cv_init(&sfs->cv, "sendfile"); - } - #ifdef MAC error = mac_socket_check_send(td->td_ucred, so); if (error != 0) @@ -2472,11 +2562,12 @@ retry_space: loopbytes += xfsize; off += xfsize; - if (sfs != NULL) { - mtx_lock(&sfs->mtx); - sfs->count++; - mtx_unlock(&sfs->mtx); - } + /* + * XXX eventually this should be a sfsync + * method call! + */ + if (sfs != NULL) + sf_sync_ref(sfs); } if (vp != NULL) @@ -2558,16 +2649,6 @@ out: if (m) m_freem(m); - if (sfs != NULL) { - mtx_lock(&sfs->mtx); - if (sfs->count != 0) - cv_wait(&sfs->cv, &sfs->mtx); - KASSERT(sfs->count == 0, ("sendfile sync still busy")); - cv_destroy(&sfs->cv); - mtx_destroy(&sfs->mtx); - free(sfs, M_TEMP); - } - if (error == ERESTART) error = EINTR; diff --git a/sys/sys/file.h b/sys/sys/file.h index 7b373f0..b7d358b 100644 --- a/sys/sys/file.h +++ b/sys/sys/file.h @@ -88,6 +88,9 @@ foffset_get(struct file *fp) return (foffset_lock(fp, FOF_NOLOCK)); } +/* XXX pollution? */ +struct sendfile_sync; + typedef int fo_rdwr_t(struct file *fp, struct uio *uio, struct ucred *active_cred, int flags, struct thread *td); @@ -107,7 +110,8 @@ typedef int fo_chown_t(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred, struct thread *td); typedef int fo_sendfile_t(struct file *fp, int sockfd, struct uio *hdr_uio, struct uio *trl_uio, off_t offset, size_t nbytes, - off_t *sent, int flags, int kflags, struct thread *td); + off_t *sent, int flags, int kflags, + struct sendfile_sync *sfs, struct thread *td); typedef int fo_seek_t(struct file *fp, off_t offset, int whence, struct thread *td); typedef int fo_flags_t; @@ -368,11 +372,11 @@ fo_chown(struct file *fp, uid_t uid, gid_t gid, struct ucred *active_cred, static __inline int fo_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio, struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags, - int kflags, struct thread *td) + int kflags, struct sendfile_sync *sfs, struct thread *td) { return ((*fp->f_ops->fo_sendfile)(fp, sockfd, hdr_uio, trl_uio, offset, - nbytes, sent, flags, kflags, td)); + nbytes, sent, flags, kflags, sfs, td)); } static __inline int diff --git a/sys/sys/sf_sync.h b/sys/sys/sf_sync.h new file mode 100644 index 0000000..c66f4d9 --- /dev/null +++ b/sys/sys/sf_sync.h @@ -0,0 +1,45 @@ +/*- + * Copyright (c) 2013 Adrian Chadd <adrian@FreeBSD.org> + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _SYS_SF_SYNC_H_ +#define _SYS_SF_SYNC_H_ + +struct sendfile_sync { + uint32_t flags; + struct mtx mtx; + struct cv cv; + unsigned count; +}; + +extern struct sendfile_sync * sf_sync_alloc(uint32_t flags); +extern void sf_sync_syscall_wait(struct sendfile_sync *); +extern void sf_sync_free(struct sendfile_sync *); +extern void sf_sync_ref(struct sendfile_sync *); +extern void sf_sync_deref(struct sendfile_sync *); + +#endif /* !_SYS_SF_BUF_H_ */ |