summaryrefslogtreecommitdiffstats
path: root/sys
diff options
context:
space:
mode:
authormdf <mdf@FreeBSD.org>2011-04-18 16:32:22 +0000
committermdf <mdf@FreeBSD.org>2011-04-18 16:32:22 +0000
commit9c9a32d97b41ab9d0cae56c7e428ad6d5cd1302f (patch)
tree99ab3d40a9311d51c78c3b3e6b880d6ba7d2560c /sys
parent0bbb5b8e1ab919b4d265f1857ccd42679a2cb39c (diff)
downloadFreeBSD-src-9c9a32d97b41ab9d0cae56c7e428ad6d5cd1302f.zip
FreeBSD-src-9c9a32d97b41ab9d0cae56c7e428ad6d5cd1302f.tar.gz
Add the posix_fallocate(2) syscall. The default implementation in
vop_stdallocate() is filesystem agnostic and will run as slow as a read/write loop in userspace; however, it serves to correctly implement the functionality for filesystems that do not implement a VOP_ALLOCATE. Note that __FreeBSD_version was already bumped today to 900036 for any ports which would like to use this function. Also reserve space in the syscall table for posix_fadvise(2). Reviewed by: -arch (previous version)
Diffstat (limited to 'sys')
-rw-r--r--sys/compat/freebsd32/freebsd32_misc.c12
-rw-r--r--sys/compat/freebsd32/syscalls.master4
-rw-r--r--sys/kern/syscalls.master3
-rw-r--r--sys/kern/vfs_default.c131
-rw-r--r--sys/kern/vfs_syscalls.c80
-rw-r--r--sys/kern/vnode_if.src10
-rw-r--r--sys/sys/fcntl.h5
-rw-r--r--sys/sys/vnode.h1
8 files changed, 245 insertions, 1 deletions
diff --git a/sys/compat/freebsd32/freebsd32_misc.c b/sys/compat/freebsd32/freebsd32_misc.c
index 5772c0e..23985d3 100644
--- a/sys/compat/freebsd32/freebsd32_misc.c
+++ b/sys/compat/freebsd32/freebsd32_misc.c
@@ -2790,3 +2790,15 @@ freebsd32_kldstat(struct thread *td, struct freebsd32_kldstat_args *uap)
bcopy(&stat.pathname[0], &stat32.pathname[0], sizeof(stat.pathname));
return (copyout(&stat32, uap->stat, version));
}
+
+int
+freebsd32_posix_fallocate(struct thread *td,
+ struct freebsd32_posix_fallocate_args *uap)
+{
+ struct posix_fallocate_args ap;
+
+ ap.fd = uap->fd;
+ ap.offset = (uap->offsetlo | ((off_t)uap->offsethi << 32));
+ ap.len = (uap->lenlo | ((off_t)uap->lenhi << 32));
+ return (posix_fallocate(td, &ap));
+}
diff --git a/sys/compat/freebsd32/syscalls.master b/sys/compat/freebsd32/syscalls.master
index da42133..d524f3c 100644
--- a/sys/compat/freebsd32/syscalls.master
+++ b/sys/compat/freebsd32/syscalls.master
@@ -986,3 +986,7 @@
529 AUE_NULL NOPROTO { int rctl_remove_rule(const void *inbufp, \
size_t inbuflen, void *outbufp, \
size_t outbuflen); }
+530 AUE_NULL STD { int freebsd32_posix_fallocate(int fd,\
+ uint32_t offsetlo, uint32_t offsethi,\
+ uint32_t lenlo, uint32_t lenhi); }
+531 AUE_NULL UNIMPL posix_fadvise
diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master
index e209731..af958c9 100644
--- a/sys/kern/syscalls.master
+++ b/sys/kern/syscalls.master
@@ -944,5 +944,8 @@
529 AUE_NULL STD { int rctl_remove_rule(const void *inbufp, \
size_t inbuflen, void *outbufp, \
size_t outbuflen); }
+530 AUE_NULL STD { int posix_fallocate(int fd, \
+ off_t offset, off_t len); }
+531 AUE_NULL UNIMPL posix_fadvise
; Please copy any additions and changes to the following compatability tables:
; sys/compat/freebsd32/syscalls.master
diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c
index 195e735..6fd4b97 100644
--- a/sys/kern/vfs_default.c
+++ b/sys/kern/vfs_default.c
@@ -99,6 +99,7 @@ struct vop_vector default_vnodeops = {
.vop_advlock = vop_stdadvlock,
.vop_advlockasync = vop_stdadvlockasync,
.vop_advlockpurge = vop_stdadvlockpurge,
+ .vop_allocate = vop_stdallocate,
.vop_bmap = vop_stdbmap,
.vop_close = VOP_NULL,
.vop_fsync = VOP_NULL,
@@ -855,6 +856,136 @@ out:
return (error);
}
+int
+vop_stdallocate(struct vop_allocate_args *ap)
+{
+#ifdef __notyet__
+ struct statfs sfs;
+#endif
+ struct iovec aiov;
+ struct vattr vattr, *vap;
+ struct uio auio;
+ off_t len, cur, offset;
+ uint8_t *buf;
+ struct thread *td;
+ struct vnode *vp;
+ size_t iosize;
+ int error, locked;
+
+ buf = NULL;
+ error = 0;
+ locked = 1;
+ td = curthread;
+ vap = &vattr;
+ vp = ap->a_vp;
+ len = ap->a_len;
+ offset = ap->a_offset;
+
+ error = VOP_GETATTR(vp, vap, td->td_ucred);
+ if (error != 0)
+ goto out;
+ iosize = vap->va_blocksize;
+ if (iosize == 0)
+ iosize = BLKDEV_IOSIZE;
+ if (iosize > MAXPHYS)
+ iosize = MAXPHYS;
+ buf = malloc(iosize, M_TEMP, M_WAITOK);
+
+#ifdef __notyet__
+ /*
+ * Check if the filesystem sets f_maxfilesize; if not use
+ * VOP_SETATTR to perform the check.
+ */
+ error = VFS_STATFS(vp->v_mount, &sfs, td);
+ if (error != 0)
+ goto out;
+ if (sfs.f_maxfilesize) {
+ if (offset > sfs.f_maxfilesize || len > sfs.f_maxfilesize ||
+ offset + len > sfs.f_maxfilesize) {
+ error = EFBIG;
+ goto out;
+ }
+ } else
+#endif
+ if (offset + len > vap->va_size) {
+ VATTR_NULL(vap);
+ vap->va_size = offset + len;
+ error = VOP_SETATTR(vp, vap, td->td_ucred);
+ if (error != 0)
+ goto out;
+ }
+
+ while (len > 0) {
+ if (should_yield()) {
+ VOP_UNLOCK(vp, 0);
+ locked = 0;
+ kern_yield(-1);
+ error = vn_lock(vp, LK_EXCLUSIVE);
+ if (error != 0)
+ break;
+ locked = 1;
+ error = VOP_GETATTR(vp, vap, td->td_ucred);
+ if (error != 0)
+ break;
+ }
+
+ /*
+ * Read and write back anything below the nominal file
+ * size. There's currently no way outside the filesystem
+ * to know whether this area is sparse or not.
+ */
+ cur = iosize;
+ if ((offset % iosize) != 0)
+ cur -= (offset % iosize);
+ if (cur > len)
+ cur = len;
+ if (offset < vap->va_size) {
+ aiov.iov_base = buf;
+ aiov.iov_len = cur;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_offset = offset;
+ auio.uio_resid = cur;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_rw = UIO_READ;
+ auio.uio_td = td;
+ error = VOP_READ(vp, &auio, 0, td->td_ucred);
+ if (error != 0)
+ break;
+ if (auio.uio_resid > 0) {
+ bzero(buf + cur - auio.uio_resid,
+ auio.uio_resid);
+ }
+ } else {
+ bzero(buf, cur);
+ }
+
+ aiov.iov_base = buf;
+ aiov.iov_len = cur;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_offset = offset;
+ auio.uio_resid = cur;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_rw = UIO_WRITE;
+ auio.uio_td = td;
+
+ error = VOP_WRITE(vp, &auio, 0, td->td_ucred);
+ if (error != 0)
+ break;
+
+ len -= cur;
+ offset += cur;
+ }
+
+ out:
+ KASSERT(locked || error != 0, ("How'd I get unlocked with no error?"));
+ if (locked && error != 0)
+ VOP_UNLOCK(vp, 0);
+ free(buf, M_TEMP);
+ return (error);
+}
+
/*
* vfs default ops
* used to fill the vfs function table to get reasonable default return values.
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index 4fc198e..26a21e3 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -4671,3 +4671,83 @@ out:
VFS_UNLOCK_GIANT(vfslocked);
return (error);
}
+
+static int
+kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len)
+{
+ struct file *fp;
+ struct mount *mp;
+ struct vnode *vp;
+ int error, vfslocked, vnlocked;
+
+ fp = NULL;
+ mp = NULL;
+ vfslocked = 0;
+ vnlocked = 0;
+ error = fget(td, fd, &fp);
+ if (error != 0)
+ goto out;
+
+ switch (fp->f_type) {
+ case DTYPE_VNODE:
+ break;
+ case DTYPE_PIPE:
+ case DTYPE_FIFO:
+ error = ESPIPE;
+ goto out;
+ default:
+ error = ENODEV;
+ goto out;
+ }
+ if ((fp->f_flag & FWRITE) == 0) {
+ error = EBADF;
+ goto out;
+ }
+ vp = fp->f_vnode;
+ if (vp->v_type != VREG) {
+ error = ENODEV;
+ goto out;
+ }
+ if (offset < 0 || len <= 0) {
+ error = EINVAL;
+ goto out;
+ }
+ /* Check for wrap. */
+ if (offset > OFF_MAX - len) {
+ error = EFBIG;
+ goto out;
+ }
+
+ bwillwrite();
+ vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+ error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
+ if (error != 0)
+ goto out;
+ error = vn_lock(vp, LK_EXCLUSIVE);
+ if (error != 0)
+ goto out;
+ vnlocked = 1;
+#ifdef MAC
+ error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp);
+ if (error != 0)
+ goto out;
+#endif
+ error = VOP_ALLOCATE(vp, offset, len);
+ if (error != 0)
+ vnlocked = 0;
+ out:
+ if (vnlocked)
+ VOP_UNLOCK(vp, 0);
+ vn_finished_write(mp);
+ VFS_UNLOCK_GIANT(vfslocked);
+ if (fp != NULL)
+ fdrop(fp, td);
+ return (error);
+}
+
+int
+posix_fallocate(struct thread *td, struct posix_fallocate_args *uap)
+{
+
+ return (kern_posix_fallocate(td, uap->fd, uap->offset, uap->len));
+}
diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src
index 304e009..fe838ec 100644
--- a/sys/kern/vnode_if.src
+++ b/sys/kern/vnode_if.src
@@ -608,6 +608,7 @@ vop_vptofh {
IN struct fid *fhp;
};
+
%% vptocnp vp L L L
%% vptocnp vpp - U -
@@ -618,3 +619,12 @@ vop_vptocnp {
INOUT char *buf;
INOUT int *buflen;
};
+
+
+%% allocate vp E E U
+
+vop_allocate {
+ IN struct vnode *vp;
+ IN off_t offset;
+ IN off_t len;
+};
diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h
index 6f6e348..6f48ee7 100644
--- a/sys/sys/fcntl.h
+++ b/sys/sys/fcntl.h
@@ -278,7 +278,7 @@ struct oflock {
#endif
/*
- * XXX missing posix_fadvise() and posix_fallocate(), and POSIX_FADV_* macros.
+ * XXX missing posix_fadvise() and POSIX_FADV_* macros.
*/
#ifndef _KERNEL
@@ -289,6 +289,9 @@ int fcntl(int, int, ...);
#if __BSD_VISIBLE || __POSIX_VISIBLE >= 200809
int openat(int, const char *, int, ...);
#endif
+#if __BSD_VISIBLE || __POSIX_VISIBLE >= 200112
+int posix_fallocate(int, off_t, off_t);
+#endif
#if __BSD_VISIBLE
int flock(int, int);
#endif
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index e7ff2f4..bfe94fb 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -689,6 +689,7 @@ int vop_stdaccessx(struct vop_accessx_args *ap);
int vop_stdadvlock(struct vop_advlock_args *ap);
int vop_stdadvlockasync(struct vop_advlockasync_args *ap);
int vop_stdadvlockpurge(struct vop_advlockpurge_args *ap);
+int vop_stdallocate(struct vop_allocate_args *ap);
int vop_stdpathconf(struct vop_pathconf_args *);
int vop_stdpoll(struct vop_poll_args *);
int vop_stdvptocnp(struct vop_vptocnp_args *ap);
OpenPOWER on IntegriCloud