summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authormdf <mdf@FreeBSD.org>2011-04-18 16:32:22 +0000
committermdf <mdf@FreeBSD.org>2011-04-18 16:32:22 +0000
commit9c9a32d97b41ab9d0cae56c7e428ad6d5cd1302f (patch)
tree99ab3d40a9311d51c78c3b3e6b880d6ba7d2560c
parent0bbb5b8e1ab919b4d265f1857ccd42679a2cb39c (diff)
downloadFreeBSD-src-9c9a32d97b41ab9d0cae56c7e428ad6d5cd1302f.zip
FreeBSD-src-9c9a32d97b41ab9d0cae56c7e428ad6d5cd1302f.tar.gz
Add the posix_fallocate(2) syscall. The default implementation in
vop_stdallocate() is filesystem agnostic and will run as slow as a read/write loop in userspace; however, it serves to correctly implement the functionality for filesystems that do not implement a VOP_ALLOCATE. Note that __FreeBSD_version was already bumped today to 900036 for any ports which would like to use this function. Also reserve space in the syscall table for posix_fadvise(2). Reviewed by: -arch (previous version)
-rw-r--r--lib/libc/sys/Makefile.inc2
-rw-r--r--lib/libc/sys/Symbol.map1
-rw-r--r--lib/libc/sys/posix_fallocate.2146
-rw-r--r--sys/compat/freebsd32/freebsd32_misc.c12
-rw-r--r--sys/compat/freebsd32/syscalls.master4
-rw-r--r--sys/kern/syscalls.master3
-rw-r--r--sys/kern/vfs_default.c131
-rw-r--r--sys/kern/vfs_syscalls.c80
-rw-r--r--sys/kern/vnode_if.src10
-rw-r--r--sys/sys/fcntl.h5
-rw-r--r--sys/sys/vnode.h1
11 files changed, 393 insertions, 2 deletions
diff --git a/lib/libc/sys/Makefile.inc b/lib/libc/sys/Makefile.inc
index 152a14a..008180a 100644
--- a/lib/libc/sys/Makefile.inc
+++ b/lib/libc/sys/Makefile.inc
@@ -96,7 +96,7 @@ MAN+= abort2.2 accept.2 access.2 acct.2 adjtime.2 \
mq_setattr.2 \
msgctl.2 msgget.2 msgrcv.2 msgsnd.2 \
msync.2 munmap.2 nanosleep.2 nfssvc.2 ntp_adjtime.2 open.2 \
- pathconf.2 pipe.2 poll.2 posix_openpt.2 profil.2 \
+ pathconf.2 pipe.2 poll.2 posix_fallocate.2 posix_openpt.2 profil.2 \
pselect.2 ptrace.2 quotactl.2 \
read.2 readlink.2 reboot.2 recv.2 rename.2 revoke.2 rfork.2 rmdir.2 \
rtprio.2
diff --git a/lib/libc/sys/Symbol.map b/lib/libc/sys/Symbol.map
index 4cb18c6..cd31f24 100644
--- a/lib/libc/sys/Symbol.map
+++ b/lib/libc/sys/Symbol.map
@@ -364,6 +364,7 @@ FBSD_1.2 {
cap_enter;
cap_getmode;
getloginclass;
+ posix_fallocate;
rctl_get_racct;
rctl_get_rules;
rctl_get_limits;
diff --git a/lib/libc/sys/posix_fallocate.2 b/lib/libc/sys/posix_fallocate.2
new file mode 100644
index 0000000..f7cbd49
--- /dev/null
+++ b/lib/libc/sys/posix_fallocate.2
@@ -0,0 +1,146 @@
+.\" Copyright (c) 1980, 1991, 1993
+.\" The Regents of the University of California. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\" 4. Neither the name of the University nor the names of its contributors
+.\" may be used to endorse or promote products derived from this software
+.\" without specific prior written permission.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" @(#)open.2 8.2 (Berkeley) 11/16/93
+.\" $FreeBSD$
+.\"
+.Dd April 13, 2011
+.Dt POSIX_FALLOCATE 2
+.Os
+.Sh NAME
+.Nm posix_fallocate
+.Nd pre-allocate storage for a range in a file
+.Sh LIBRARY
+.Lb libc
+.Sh SYNOPSIS
+.In fcntl.h
+.Ft int
+.Fn posix_fallocate "int fd" "off_t offset" "off_t len"
+.Sh DESCRIPTION
+Required storage for the range
+.Fa offset
+to
+.Fa offset +
+.Fa len
+in the file referenced by
+.Fa fd
+is guarateed to be allocated upon successful return.
+That is, if
+.Fn posix_fallocate
+returns successfully, subsequent writes to the specified file data
+will not fail due to lack of free space on the file system storage
+media.
+Any existing file data in the specified range is unmodified.
+If
+.Fa offset +
+.Fa len
+is beyond the current file size, then
+.Fn posix_fallocate
+will adjust the file size to
+.Fa offset +
+.Fa len .
+Otherwise, the file size will not be changed.
+.Pp
+Space allocated by
+.Fn posix_fallocate
+will be freed by a successful call to
+.Xr creat 2
+or
+.Xr open 2
+that truncates the size of the file.
+Space allocated via
+.Fn posix_fallocate
+may be freed by a successful call to
+.Xr ftruncate 2
+that reduces the file size to a size smaller than
+.Fa offset +
+.Fa len .
+.Pp
+.Sh RETURN VALUES
+If successful,
+.Fn posix_fallocate
+returns zero.
+It returns -1 on failure, and sets
+.Va errno
+to indicate the error.
+.Sh ERRORS
+Possible failure conditions:
+.Bl -tag -width Er
+.It Bq Er EBADF
+The
+.Fa fd
+argument is not a valid file descriptor.
+.It Bq Er EBADF
+The
+.Fa fd
+argument references a file that was opened without write permission.
+.It Bq Er EFBIG
+The value of
+.Fa offset +
+.Fa len
+is greater than the maximum file size.
+.It Bq Er EINTR
+A signal was caught during execution.
+.It Bq Er EINVAL
+The
+.Fa len
+argument was zero or the
+.Fa offset
+argument was less than zero.
+.It Bq Er EIO
+An I/O error occurred while reading from or writing to a file system.
+.It Bq Er ENODEV
+The
+.Fa fd
+argument does not refer to a regular file.
+.It Bq Er ENOSPC
+There is insufficient free space remaining on the file system storage
+media.
+.It Bq Er ESPIPE
+The
+.Fa fd
+argument is associated with a pipe or FIFO.
+.El
+.Sh SEE ALSO
+.Xr creat 2 ,
+.Xr ftruncate 2 ,
+.Xr open 2 ,
+.Xr unlink 2
+.Sh STANDARDS
+The
+.Fn posix_fallocate
+system call conforms to
+.St -p1003.1-2004 .
+.Sh HISTORY
+The
+.Fn posix_fallocate
+function appeared in
+.Fx 9.0 .
+.Sh AUTHORS
+.Fn posix_fallocate
+and this manual page were initially written by
+.An Matthew Fleming Aq mdf@FreeBSD.org .
diff --git a/sys/compat/freebsd32/freebsd32_misc.c b/sys/compat/freebsd32/freebsd32_misc.c
index 5772c0e..23985d3 100644
--- a/sys/compat/freebsd32/freebsd32_misc.c
+++ b/sys/compat/freebsd32/freebsd32_misc.c
@@ -2790,3 +2790,15 @@ freebsd32_kldstat(struct thread *td, struct freebsd32_kldstat_args *uap)
bcopy(&stat.pathname[0], &stat32.pathname[0], sizeof(stat.pathname));
return (copyout(&stat32, uap->stat, version));
}
+
+int
+freebsd32_posix_fallocate(struct thread *td,
+ struct freebsd32_posix_fallocate_args *uap)
+{
+ struct posix_fallocate_args ap;
+
+ ap.fd = uap->fd;
+ ap.offset = (uap->offsetlo | ((off_t)uap->offsethi << 32));
+ ap.len = (uap->lenlo | ((off_t)uap->lenhi << 32));
+ return (posix_fallocate(td, &ap));
+}
diff --git a/sys/compat/freebsd32/syscalls.master b/sys/compat/freebsd32/syscalls.master
index da42133..d524f3c 100644
--- a/sys/compat/freebsd32/syscalls.master
+++ b/sys/compat/freebsd32/syscalls.master
@@ -986,3 +986,7 @@
529 AUE_NULL NOPROTO { int rctl_remove_rule(const void *inbufp, \
size_t inbuflen, void *outbufp, \
size_t outbuflen); }
+530 AUE_NULL STD { int freebsd32_posix_fallocate(int fd,\
+ uint32_t offsetlo, uint32_t offsethi,\
+ uint32_t lenlo, uint32_t lenhi); }
+531 AUE_NULL UNIMPL posix_fadvise
diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master
index e209731..af958c9 100644
--- a/sys/kern/syscalls.master
+++ b/sys/kern/syscalls.master
@@ -944,5 +944,8 @@
529 AUE_NULL STD { int rctl_remove_rule(const void *inbufp, \
size_t inbuflen, void *outbufp, \
size_t outbuflen); }
+530 AUE_NULL STD { int posix_fallocate(int fd, \
+ off_t offset, off_t len); }
+531 AUE_NULL UNIMPL posix_fadvise
; Please copy any additions and changes to the following compatability tables:
; sys/compat/freebsd32/syscalls.master
diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c
index 195e735..6fd4b97 100644
--- a/sys/kern/vfs_default.c
+++ b/sys/kern/vfs_default.c
@@ -99,6 +99,7 @@ struct vop_vector default_vnodeops = {
.vop_advlock = vop_stdadvlock,
.vop_advlockasync = vop_stdadvlockasync,
.vop_advlockpurge = vop_stdadvlockpurge,
+ .vop_allocate = vop_stdallocate,
.vop_bmap = vop_stdbmap,
.vop_close = VOP_NULL,
.vop_fsync = VOP_NULL,
@@ -855,6 +856,136 @@ out:
return (error);
}
+int
+vop_stdallocate(struct vop_allocate_args *ap)
+{
+#ifdef __notyet__
+ struct statfs sfs;
+#endif
+ struct iovec aiov;
+ struct vattr vattr, *vap;
+ struct uio auio;
+ off_t len, cur, offset;
+ uint8_t *buf;
+ struct thread *td;
+ struct vnode *vp;
+ size_t iosize;
+ int error, locked;
+
+ buf = NULL;
+ error = 0;
+ locked = 1;
+ td = curthread;
+ vap = &vattr;
+ vp = ap->a_vp;
+ len = ap->a_len;
+ offset = ap->a_offset;
+
+ error = VOP_GETATTR(vp, vap, td->td_ucred);
+ if (error != 0)
+ goto out;
+ iosize = vap->va_blocksize;
+ if (iosize == 0)
+ iosize = BLKDEV_IOSIZE;
+ if (iosize > MAXPHYS)
+ iosize = MAXPHYS;
+ buf = malloc(iosize, M_TEMP, M_WAITOK);
+
+#ifdef __notyet__
+ /*
+ * Check if the filesystem sets f_maxfilesize; if not use
+ * VOP_SETATTR to perform the check.
+ */
+ error = VFS_STATFS(vp->v_mount, &sfs, td);
+ if (error != 0)
+ goto out;
+ if (sfs.f_maxfilesize) {
+ if (offset > sfs.f_maxfilesize || len > sfs.f_maxfilesize ||
+ offset + len > sfs.f_maxfilesize) {
+ error = EFBIG;
+ goto out;
+ }
+ } else
+#endif
+ if (offset + len > vap->va_size) {
+ VATTR_NULL(vap);
+ vap->va_size = offset + len;
+ error = VOP_SETATTR(vp, vap, td->td_ucred);
+ if (error != 0)
+ goto out;
+ }
+
+ while (len > 0) {
+ if (should_yield()) {
+ VOP_UNLOCK(vp, 0);
+ locked = 0;
+ kern_yield(-1);
+ error = vn_lock(vp, LK_EXCLUSIVE);
+ if (error != 0)
+ break;
+ locked = 1;
+ error = VOP_GETATTR(vp, vap, td->td_ucred);
+ if (error != 0)
+ break;
+ }
+
+ /*
+ * Read and write back anything below the nominal file
+ * size. There's currently no way outside the filesystem
+ * to know whether this area is sparse or not.
+ */
+ cur = iosize;
+ if ((offset % iosize) != 0)
+ cur -= (offset % iosize);
+ if (cur > len)
+ cur = len;
+ if (offset < vap->va_size) {
+ aiov.iov_base = buf;
+ aiov.iov_len = cur;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_offset = offset;
+ auio.uio_resid = cur;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_rw = UIO_READ;
+ auio.uio_td = td;
+ error = VOP_READ(vp, &auio, 0, td->td_ucred);
+ if (error != 0)
+ break;
+ if (auio.uio_resid > 0) {
+ bzero(buf + cur - auio.uio_resid,
+ auio.uio_resid);
+ }
+ } else {
+ bzero(buf, cur);
+ }
+
+ aiov.iov_base = buf;
+ aiov.iov_len = cur;
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_offset = offset;
+ auio.uio_resid = cur;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_rw = UIO_WRITE;
+ auio.uio_td = td;
+
+ error = VOP_WRITE(vp, &auio, 0, td->td_ucred);
+ if (error != 0)
+ break;
+
+ len -= cur;
+ offset += cur;
+ }
+
+ out:
+ KASSERT(locked || error != 0, ("How'd I get unlocked with no error?"));
+ if (locked && error != 0)
+ VOP_UNLOCK(vp, 0);
+ free(buf, M_TEMP);
+ return (error);
+}
+
/*
* vfs default ops
* used to fill the vfs function table to get reasonable default return values.
diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c
index 4fc198e..26a21e3 100644
--- a/sys/kern/vfs_syscalls.c
+++ b/sys/kern/vfs_syscalls.c
@@ -4671,3 +4671,83 @@ out:
VFS_UNLOCK_GIANT(vfslocked);
return (error);
}
+
+static int
+kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len)
+{
+ struct file *fp;
+ struct mount *mp;
+ struct vnode *vp;
+ int error, vfslocked, vnlocked;
+
+ fp = NULL;
+ mp = NULL;
+ vfslocked = 0;
+ vnlocked = 0;
+ error = fget(td, fd, &fp);
+ if (error != 0)
+ goto out;
+
+ switch (fp->f_type) {
+ case DTYPE_VNODE:
+ break;
+ case DTYPE_PIPE:
+ case DTYPE_FIFO:
+ error = ESPIPE;
+ goto out;
+ default:
+ error = ENODEV;
+ goto out;
+ }
+ if ((fp->f_flag & FWRITE) == 0) {
+ error = EBADF;
+ goto out;
+ }
+ vp = fp->f_vnode;
+ if (vp->v_type != VREG) {
+ error = ENODEV;
+ goto out;
+ }
+ if (offset < 0 || len <= 0) {
+ error = EINVAL;
+ goto out;
+ }
+ /* Check for wrap. */
+ if (offset > OFF_MAX - len) {
+ error = EFBIG;
+ goto out;
+ }
+
+ bwillwrite();
+ vfslocked = VFS_LOCK_GIANT(vp->v_mount);
+ error = vn_start_write(vp, &mp, V_WAIT | PCATCH);
+ if (error != 0)
+ goto out;
+ error = vn_lock(vp, LK_EXCLUSIVE);
+ if (error != 0)
+ goto out;
+ vnlocked = 1;
+#ifdef MAC
+ error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp);
+ if (error != 0)
+ goto out;
+#endif
+ error = VOP_ALLOCATE(vp, offset, len);
+ if (error != 0)
+ vnlocked = 0;
+ out:
+ if (vnlocked)
+ VOP_UNLOCK(vp, 0);
+ vn_finished_write(mp);
+ VFS_UNLOCK_GIANT(vfslocked);
+ if (fp != NULL)
+ fdrop(fp, td);
+ return (error);
+}
+
+int
+posix_fallocate(struct thread *td, struct posix_fallocate_args *uap)
+{
+
+ return (kern_posix_fallocate(td, uap->fd, uap->offset, uap->len));
+}
diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src
index 304e009..fe838ec 100644
--- a/sys/kern/vnode_if.src
+++ b/sys/kern/vnode_if.src
@@ -608,6 +608,7 @@ vop_vptofh {
IN struct fid *fhp;
};
+
%% vptocnp vp L L L
%% vptocnp vpp - U -
@@ -618,3 +619,12 @@ vop_vptocnp {
INOUT char *buf;
INOUT int *buflen;
};
+
+
+%% allocate vp E E U
+
+vop_allocate {
+ IN struct vnode *vp;
+ IN off_t offset;
+ IN off_t len;
+};
diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h
index 6f6e348..6f48ee7 100644
--- a/sys/sys/fcntl.h
+++ b/sys/sys/fcntl.h
@@ -278,7 +278,7 @@ struct oflock {
#endif
/*
- * XXX missing posix_fadvise() and posix_fallocate(), and POSIX_FADV_* macros.
+ * XXX missing posix_fadvise() and POSIX_FADV_* macros.
*/
#ifndef _KERNEL
@@ -289,6 +289,9 @@ int fcntl(int, int, ...);
#if __BSD_VISIBLE || __POSIX_VISIBLE >= 200809
int openat(int, const char *, int, ...);
#endif
+#if __BSD_VISIBLE || __POSIX_VISIBLE >= 200112
+int posix_fallocate(int, off_t, off_t);
+#endif
#if __BSD_VISIBLE
int flock(int, int);
#endif
diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h
index e7ff2f4..bfe94fb 100644
--- a/sys/sys/vnode.h
+++ b/sys/sys/vnode.h
@@ -689,6 +689,7 @@ int vop_stdaccessx(struct vop_accessx_args *ap);
int vop_stdadvlock(struct vop_advlock_args *ap);
int vop_stdadvlockasync(struct vop_advlockasync_args *ap);
int vop_stdadvlockpurge(struct vop_advlockpurge_args *ap);
+int vop_stdallocate(struct vop_allocate_args *ap);
int vop_stdpathconf(struct vop_pathconf_args *);
int vop_stdpoll(struct vop_poll_args *);
int vop_stdvptocnp(struct vop_vptocnp_args *ap);
OpenPOWER on IntegriCloud