summaryrefslogtreecommitdiffstats
path: root/sys/kern/uipc_syscalls.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/kern/uipc_syscalls.c')
-rw-r--r--sys/kern/uipc_syscalls.c383
1 files changed, 254 insertions, 129 deletions
diff --git a/sys/kern/uipc_syscalls.c b/sys/kern/uipc_syscalls.c
index 8229390..75aab6e 100644
--- a/sys/kern/uipc_syscalls.c
+++ b/sys/kern/uipc_syscalls.c
@@ -45,6 +45,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/systm.h>
#include <sys/capability.h>
+#include <sys/condvar.h>
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/mutex.h>
@@ -57,6 +58,7 @@ __FBSDID("$FreeBSD$");
#include <sys/file.h>
#include <sys/filio.h>
#include <sys/jail.h>
+#include <sys/mman.h>
#include <sys/mount.h>
#include <sys/mbuf.h>
#include <sys/protosw.h>
@@ -86,7 +88,7 @@ __FBSDID("$FreeBSD$");
#include <vm/vm_param.h>
#include <vm/vm_object.h>
#include <vm/vm_page.h>
-#include <vm/vm_pageout.h>
+#include <vm/vm_pager.h>
#include <vm/vm_kern.h>
#include <vm/vm_extern.h>
@@ -1850,8 +1852,6 @@ getsockaddr(namp, uaddr, len)
return (error);
}
-#include <sys/condvar.h>
-
struct sendfile_sync {
struct mtx mtx;
struct cv cv;
@@ -1917,6 +1917,10 @@ do_sendfile(struct thread *td, struct sendfile_args *uap, int compat)
cap_rights_t rights;
int error;
+ /*
+ * File offset must be positive. If it goes beyond EOF
+ * we send only the header/trailer and no payload data.
+ */
if (uap->offset < 0)
return (EINVAL);
@@ -1978,79 +1982,242 @@ freebsd4_sendfile(struct thread *td, struct freebsd4_sendfile_args *uap)
}
#endif /* COMPAT_FREEBSD4 */
-int
-vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
- struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags,
- int kflags, struct thread *td)
+static int
+sendfile_readpage(vm_object_t obj, struct vnode *vp, int nd,
+ off_t off, int xfsize, int bsize, struct thread *td, vm_page_t *res)
{
- struct vnode *vp = fp->f_vnode;
- struct file *sock_fp;
- struct vm_object *obj = NULL;
- struct socket *so = NULL;
- struct mbuf *m = NULL;
- struct sf_buf *sf;
- struct vm_page *pg;
- struct vattr va;
- struct sendfile_sync *sfs = NULL;
- cap_rights_t rights;
- off_t off, xfsize, fsbytes = 0, sbytes = 0, rem = 0;
- int bsize, error, hdrlen = 0, mnw = 0;
+ vm_page_t m;
+ vm_pindex_t pindex;
+ ssize_t resid;
+ int error, readahead, rv;
- vn_lock(vp, LK_SHARED | LK_RETRY);
- if (vp->v_type == VREG) {
- bsize = vp->v_mount->mnt_stat.f_iosize;
- if (nbytes == 0) {
- error = VOP_GETATTR(vp, &va, td->td_ucred);
- if (error != 0) {
- VOP_UNLOCK(vp, 0);
- obj = NULL;
- goto out;
+ pindex = OFF_TO_IDX(off);
+ VM_OBJECT_WLOCK(obj);
+ m = vm_page_grab(obj, pindex, (vp != NULL ? VM_ALLOC_NOBUSY |
+ VM_ALLOC_IGN_SBUSY : 0) | VM_ALLOC_WIRED | VM_ALLOC_NORMAL);
+
+ /*
+ * Check if page is valid for what we need, otherwise initiate I/O.
+ *
+ * The non-zero nd argument prevents disk I/O, instead we
+ * return the caller what he specified in nd. In particular,
+ * if we already turned some pages into mbufs, nd == EAGAIN
+ * and the main function send them the pages before we come
+ * here again and block.
+ */
+ if (m->valid != 0 && vm_page_is_valid(m, off & PAGE_MASK, xfsize)) {
+ if (vp == NULL)
+ vm_page_xunbusy(m);
+ VM_OBJECT_WUNLOCK(obj);
+ *res = m;
+ return (0);
+ } else if (nd != 0) {
+ if (vp == NULL)
+ vm_page_xunbusy(m);
+ error = nd;
+ goto free_page;
+ }
+
+ /*
+ * Get the page from backing store.
+ */
+ error = 0;
+ if (vp != NULL) {
+ VM_OBJECT_WUNLOCK(obj);
+ readahead = sfreadahead * MAXBSIZE;
+
+ /*
+ * Use vn_rdwr() instead of the pager interface for
+ * the vnode, to allow the read-ahead.
+ *
+ * XXXMAC: Because we don't have fp->f_cred here, we
+ * pass in NOCRED. This is probably wrong, but is
+ * consistent with our original implementation.
+ */
+ error = vn_rdwr(UIO_READ, vp, NULL, readahead, trunc_page(off),
+ UIO_NOCOPY, IO_NODELOCKED | IO_VMIO | ((readahead /
+ bsize) << IO_SEQSHIFT), td->td_ucred, NOCRED, &resid, td);
+ SFSTAT_INC(sf_iocnt);
+ VM_OBJECT_WLOCK(obj);
+ } else {
+ if (vm_pager_has_page(obj, pindex, NULL, NULL)) {
+ rv = vm_pager_get_pages(obj, &m, 1, 0);
+ SFSTAT_INC(sf_iocnt);
+ m = vm_page_lookup(obj, pindex);
+ if (m == NULL)
+ error = EIO;
+ else if (rv != VM_PAGER_OK) {
+ vm_page_lock(m);
+ vm_page_free(m);
+ vm_page_unlock(m);
+ m = NULL;
+ error = EIO;
}
- rem = va.va_size;
- } else
- rem = nbytes;
+ } else {
+ pmap_zero_page(m);
+ m->valid = VM_PAGE_BITS_ALL;
+ m->dirty = 0;
+ }
+ if (m != NULL)
+ vm_page_xunbusy(m);
+ }
+ if (error == 0) {
+ *res = m;
+ } else if (m != NULL) {
+free_page:
+ vm_page_lock(m);
+ vm_page_unwire(m, 0);
+
+ /*
+ * See if anyone else might know about this page. If
+ * not and it is not valid, then free it.
+ */
+ if (m->wire_count == 0 && m->valid == 0 && !vm_page_busied(m))
+ vm_page_free(m);
+ vm_page_unlock(m);
+ }
+ VM_OBJECT_WUNLOCK(obj);
+ KASSERT(error != 0 || (m->wire_count > 0 && m->valid ==
+ VM_PAGE_BITS_ALL),
+ ("wrong page state m %p", m));
+ return (error);
+}
+
+static int
+sendfile_getobj(struct thread *td, struct file *fp, vm_object_t *obj_res,
+ struct vnode **vp_res, struct shmfd **shmfd_res, off_t *obj_size,
+ int *bsize)
+{
+ struct vattr va;
+ vm_object_t obj;
+ struct vnode *vp;
+ struct shmfd *shmfd;
+ int error;
+
+ vp = *vp_res = NULL;
+ obj = NULL;
+ shmfd = *shmfd_res = NULL;
+ *bsize = 0;
+
+ /*
+ * The file descriptor must be a regular file and have a
+ * backing VM object.
+ */
+ if (fp->f_type == DTYPE_VNODE) {
+ vp = fp->f_vnode;
+ vn_lock(vp, LK_SHARED | LK_RETRY);
+ if (vp->v_type != VREG) {
+ error = EINVAL;
+ goto out;
+ }
+ *bsize = vp->v_mount->mnt_stat.f_iosize;
+ error = VOP_GETATTR(vp, &va, td->td_ucred);
+ if (error != 0)
+ goto out;
+ *obj_size = va.va_size;
obj = vp->v_object;
- if (obj != NULL) {
- /*
- * Temporarily increase the backing VM
- * object's reference count so that a forced
- * reclamation of its vnode does not
- * immediately destroy it.
- */
- VM_OBJECT_WLOCK(obj);
- if ((obj->flags & OBJ_DEAD) == 0) {
- vm_object_reference_locked(obj);
- VM_OBJECT_WUNLOCK(obj);
- } else {
- VM_OBJECT_WUNLOCK(obj);
- obj = NULL;
- }
+ if (obj == NULL) {
+ error = EINVAL;
+ goto out;
}
- } else
- bsize = 0; /* silence gcc */
- VOP_UNLOCK(vp, 0);
- if (obj == NULL) {
+ } else if (fp->f_type == DTYPE_SHM) {
+ shmfd = fp->f_data;
+ obj = shmfd->shm_object;
+ *obj_size = shmfd->shm_size;
+ } else {
error = EINVAL;
goto out;
}
+ VM_OBJECT_WLOCK(obj);
+ if ((obj->flags & OBJ_DEAD) != 0) {
+ VM_OBJECT_WUNLOCK(obj);
+ error = EBADF;
+ goto out;
+ }
+
+ /*
+ * Temporarily increase the backing VM object's reference
+ * count so that a forced reclamation of its vnode does not
+ * immediately destroy it.
+ */
+ vm_object_reference_locked(obj);
+ VM_OBJECT_WUNLOCK(obj);
+ *obj_res = obj;
+ *vp_res = vp;
+ *shmfd_res = shmfd;
+
+out:
+ if (vp != NULL)
+ VOP_UNLOCK(vp, 0);
+ return (error);
+}
+
+static int
+kern_sendfile_getsock(struct thread *td, int s, struct file **sock_fp,
+ struct socket **so)
+{
+ cap_rights_t rights;
+ int error;
+
+ *sock_fp = NULL;
+ *so = NULL;
+
/*
* The socket must be a stream socket and connected.
- * Remember if it a blocking or non-blocking socket.
*/
- error = getsock_cap(td->td_proc->p_fd, sockfd,
- cap_rights_init(&rights, CAP_SEND), &sock_fp, NULL);
+ error = getsock_cap(td->td_proc->p_fd, s, cap_rights_init(&rights,
+ CAP_SEND), sock_fp, NULL);
+ if (error != 0)
+ return (error);
+ *so = (*sock_fp)->f_data;
+ if ((*so)->so_type != SOCK_STREAM)
+ return (EINVAL);
+ if (((*so)->so_state & SS_ISCONNECTED) == 0)
+ return (ENOTCONN);
+ return (0);
+}
+
+int
+vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
+ struct uio *trl_uio, off_t offset, size_t nbytes, off_t *sent, int flags,
+ int kflags, struct thread *td)
+{
+ struct file *sock_fp;
+ struct vnode *vp;
+ struct vm_object *obj;
+ struct socket *so;
+ struct mbuf *m;
+ struct sf_buf *sf;
+ struct vm_page *pg;
+ struct shmfd *shmfd;
+ struct sendfile_sync *sfs;
+ struct vattr va;
+ off_t off, xfsize, fsbytes, sbytes, rem, obj_size;
+ int error, bsize, nd, hdrlen, mnw;
+ bool inflight_called;
+
+ pg = NULL;
+ obj = NULL;
+ so = NULL;
+ m = NULL;
+ sfs = NULL;
+ fsbytes = sbytes = 0;
+ hdrlen = mnw = 0;
+ rem = nbytes;
+ obj_size = 0;
+ inflight_called = false;
+
+ error = sendfile_getobj(td, fp, &obj, &vp, &shmfd, &obj_size, &bsize);
+ if (error != 0)
+ return (error);
+ if (rem == 0)
+ rem = obj_size;
+
+ error = kern_sendfile_getsock(td, sockfd, &sock_fp, &so);
if (error != 0)
goto out;
- so = sock_fp->f_data;
- if (so->so_type != SOCK_STREAM) {
- error = EINVAL;
- goto out;
- }
- if ((so->so_state & SS_ISCONNECTED) == 0) {
- error = ENOTCONN;
- goto out;
- }
+
/*
* Do not wait on memory allocations but return ENOMEM for
* caller to retry later.
@@ -2123,7 +2290,7 @@ vn_sendfile(struct file *fp, int sockfd, struct uio *hdr_uio,
int done;
if ((nbytes != 0 && nbytes == fsbytes) ||
- (nbytes == 0 && va.va_size == fsbytes))
+ (nbytes == 0 && obj_size == fsbytes))
break;
mtail = NULL;
@@ -2197,13 +2364,16 @@ retry_space:
*/
space -= hdrlen;
- error = vn_lock(vp, LK_SHARED);
- if (error != 0)
- goto done;
- error = VOP_GETATTR(vp, &va, td->td_ucred);
- if (error != 0 || off >= va.va_size) {
- VOP_UNLOCK(vp, 0);
- goto done;
+ if (vp != NULL) {
+ error = vn_lock(vp, LK_SHARED);
+ if (error != 0)
+ goto done;
+ error = VOP_GETATTR(vp, &va, td->td_ucred);
+ if (error != 0 || off >= va.va_size) {
+ VOP_UNLOCK(vp, 0);
+ goto done;
+ }
+ obj_size = va.va_size;
}
/*
@@ -2211,7 +2381,6 @@ retry_space:
* dumped into socket buffer.
*/
while (space > loopbytes) {
- vm_pindex_t pindex;
vm_offset_t pgoff;
struct mbuf *m0;
@@ -2221,11 +2390,10 @@ retry_space:
* or the passed in nbytes.
*/
pgoff = (vm_offset_t)(off & PAGE_MASK);
- if (nbytes)
- rem = (nbytes - fsbytes - loopbytes);
- else
- rem = va.va_size -
- offset - fsbytes - loopbytes;
+ rem = obj_size - offset;
+ if (nbytes != 0)
+ rem = omin(rem, nbytes);
+ rem -= fsbytes + loopbytes;
xfsize = omin(PAGE_SIZE - pgoff, rem);
xfsize = omin(space - loopbytes, xfsize);
if (xfsize <= 0) {
@@ -2237,59 +2405,15 @@ retry_space:
* Attempt to look up the page. Allocate
* if not found or wait and loop if busy.
*/
- pindex = OFF_TO_IDX(off);
- VM_OBJECT_WLOCK(obj);
- pg = vm_page_grab(obj, pindex, VM_ALLOC_NOBUSY |
- VM_ALLOC_IGN_SBUSY | VM_ALLOC_NORMAL |
- VM_ALLOC_WIRED);
-
- /*
- * Check if page is valid for what we need,
- * otherwise initiate I/O.
- * If we already turned some pages into mbufs,
- * send them off before we come here again and
- * block.
- */
- if (pg->valid && vm_page_is_valid(pg, pgoff, xfsize))
- VM_OBJECT_WUNLOCK(obj);
- else if (m != NULL)
- error = EAGAIN; /* send what we already got */
- else if (flags & SF_NODISKIO)
- error = EBUSY;
- else {
- ssize_t resid;
- int readahead = sfreadahead * MAXBSIZE;
-
- VM_OBJECT_WUNLOCK(obj);
-
- /*
- * Get the page from backing store.
- * XXXMAC: Because we don't have fp->f_cred
- * here, we pass in NOCRED. This is probably
- * wrong, but is consistent with our original
- * implementation.
- */
- error = vn_rdwr(UIO_READ, vp, NULL, readahead,
- trunc_page(off), UIO_NOCOPY, IO_NODELOCKED |
- IO_VMIO | ((readahead / bsize) << IO_SEQSHIFT),
- td->td_ucred, NOCRED, &resid, td);
- SFSTAT_INC(sf_iocnt);
- if (error != 0)
- VM_OBJECT_WLOCK(obj);
- }
+ if (m != NULL)
+ nd = EAGAIN; /* send what we already got */
+ else if ((flags & SF_NODISKIO) != 0)
+ nd = EBUSY;
+ else
+ nd = 0;
+ error = sendfile_readpage(obj, vp, nd, off,
+ xfsize, bsize, td, &pg);
if (error != 0) {
- vm_page_lock(pg);
- vm_page_unwire(pg, 0);
- /*
- * See if anyone else might know about
- * this page. If not and it is not valid,
- * then free it.
- */
- if (pg->wire_count == 0 && pg->valid == 0 &&
- !vm_page_busied(pg))
- vm_page_free(pg);
- vm_page_unlock(pg);
- VM_OBJECT_WUNLOCK(obj);
if (error == EAGAIN)
error = 0; /* not a real error */
break;
@@ -2359,7 +2483,8 @@ retry_space:
}
}
- VOP_UNLOCK(vp, 0);
+ if (vp != NULL)
+ VOP_UNLOCK(vp, 0);
/* Add the buffer chain to the socket buffer. */
if (m != NULL) {
OpenPOWER on IntegriCloud