diff options
-rw-r--r-- | lib/libc/sys/Makefile.inc | 2 | ||||
-rw-r--r-- | lib/libc/sys/Symbol.map | 1 | ||||
-rw-r--r-- | lib/libc/sys/posix_fallocate.2 | 146 | ||||
-rw-r--r-- | sys/compat/freebsd32/freebsd32_misc.c | 12 | ||||
-rw-r--r-- | sys/compat/freebsd32/syscalls.master | 4 | ||||
-rw-r--r-- | sys/kern/syscalls.master | 3 | ||||
-rw-r--r-- | sys/kern/vfs_default.c | 131 | ||||
-rw-r--r-- | sys/kern/vfs_syscalls.c | 80 | ||||
-rw-r--r-- | sys/kern/vnode_if.src | 10 | ||||
-rw-r--r-- | sys/sys/fcntl.h | 5 | ||||
-rw-r--r-- | sys/sys/vnode.h | 1 |
11 files changed, 393 insertions, 2 deletions
diff --git a/lib/libc/sys/Makefile.inc b/lib/libc/sys/Makefile.inc index 152a14a..008180a 100644 --- a/lib/libc/sys/Makefile.inc +++ b/lib/libc/sys/Makefile.inc @@ -96,7 +96,7 @@ MAN+= abort2.2 accept.2 access.2 acct.2 adjtime.2 \ mq_setattr.2 \ msgctl.2 msgget.2 msgrcv.2 msgsnd.2 \ msync.2 munmap.2 nanosleep.2 nfssvc.2 ntp_adjtime.2 open.2 \ - pathconf.2 pipe.2 poll.2 posix_openpt.2 profil.2 \ + pathconf.2 pipe.2 poll.2 posix_fallocate.2 posix_openpt.2 profil.2 \ pselect.2 ptrace.2 quotactl.2 \ read.2 readlink.2 reboot.2 recv.2 rename.2 revoke.2 rfork.2 rmdir.2 \ rtprio.2 diff --git a/lib/libc/sys/Symbol.map b/lib/libc/sys/Symbol.map index 4cb18c6..cd31f24 100644 --- a/lib/libc/sys/Symbol.map +++ b/lib/libc/sys/Symbol.map @@ -364,6 +364,7 @@ FBSD_1.2 { cap_enter; cap_getmode; getloginclass; + posix_fallocate; rctl_get_racct; rctl_get_rules; rctl_get_limits; diff --git a/lib/libc/sys/posix_fallocate.2 b/lib/libc/sys/posix_fallocate.2 new file mode 100644 index 0000000..f7cbd49 --- /dev/null +++ b/lib/libc/sys/posix_fallocate.2 @@ -0,0 +1,146 @@ +.\" Copyright (c) 1980, 1991, 1993 +.\" The Regents of the University of California. All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" @(#)open.2 8.2 (Berkeley) 11/16/93 +.\" $FreeBSD$ +.\" +.Dd April 13, 2011 +.Dt POSIX_FALLOCATE 2 +.Os +.Sh NAME +.Nm posix_fallocate +.Nd pre-allocate storage for a range in a file +.Sh LIBRARY +.Lb libc +.Sh SYNOPSIS +.In fcntl.h +.Ft int +.Fn posix_fallocate "int fd" "off_t offset" "off_t len" +.Sh DESCRIPTION +Required storage for the range +.Fa offset +to +.Fa offset + +.Fa len +in the file referenced by +.Fa fd +is guarateed to be allocated upon successful return. +That is, if +.Fn posix_fallocate +returns successfully, subsequent writes to the specified file data +will not fail due to lack of free space on the file system storage +media. +Any existing file data in the specified range is unmodified. +If +.Fa offset + +.Fa len +is beyond the current file size, then +.Fn posix_fallocate +will adjust the file size to +.Fa offset + +.Fa len . +Otherwise, the file size will not be changed. +.Pp +Space allocated by +.Fn posix_fallocate +will be freed by a successful call to +.Xr creat 2 +or +.Xr open 2 +that truncates the size of the file. +Space allocated via +.Fn posix_fallocate +may be freed by a successful call to +.Xr ftruncate 2 +that reduces the file size to a size smaller than +.Fa offset + +.Fa len . +.Pp +.Sh RETURN VALUES +If successful, +.Fn posix_fallocate +returns zero. +It returns -1 on failure, and sets +.Va errno +to indicate the error. +.Sh ERRORS +Possible failure conditions: +.Bl -tag -width Er +.It Bq Er EBADF +The +.Fa fd +argument is not a valid file descriptor. +.It Bq Er EBADF +The +.Fa fd +argument references a file that was opened without write permission. +.It Bq Er EFBIG +The value of +.Fa offset + +.Fa len +is greater than the maximum file size. +.It Bq Er EINTR +A signal was caught during execution. +.It Bq Er EINVAL +The +.Fa len +argument was zero or the +.Fa offset +argument was less than zero. +.It Bq Er EIO +An I/O error occurred while reading from or writing to a file system. +.It Bq Er ENODEV +The +.Fa fd +argument does not refer to a regular file. +.It Bq Er ENOSPC +There is insufficient free space remaining on the file system storage +media. +.It Bq Er ESPIPE +The +.Fa fd +argument is associated with a pipe or FIFO. +.El +.Sh SEE ALSO +.Xr creat 2 , +.Xr ftruncate 2 , +.Xr open 2 , +.Xr unlink 2 +.Sh STANDARDS +The +.Fn posix_fallocate +system call conforms to +.St -p1003.1-2004 . +.Sh HISTORY +The +.Fn posix_fallocate +function appeared in +.Fx 9.0 . +.Sh AUTHORS +.Fn posix_fallocate +and this manual page were initially written by +.An Matthew Fleming Aq mdf@FreeBSD.org . diff --git a/sys/compat/freebsd32/freebsd32_misc.c b/sys/compat/freebsd32/freebsd32_misc.c index 5772c0e..23985d3 100644 --- a/sys/compat/freebsd32/freebsd32_misc.c +++ b/sys/compat/freebsd32/freebsd32_misc.c @@ -2790,3 +2790,15 @@ freebsd32_kldstat(struct thread *td, struct freebsd32_kldstat_args *uap) bcopy(&stat.pathname[0], &stat32.pathname[0], sizeof(stat.pathname)); return (copyout(&stat32, uap->stat, version)); } + +int +freebsd32_posix_fallocate(struct thread *td, + struct freebsd32_posix_fallocate_args *uap) +{ + struct posix_fallocate_args ap; + + ap.fd = uap->fd; + ap.offset = (uap->offsetlo | ((off_t)uap->offsethi << 32)); + ap.len = (uap->lenlo | ((off_t)uap->lenhi << 32)); + return (posix_fallocate(td, &ap)); +} diff --git a/sys/compat/freebsd32/syscalls.master b/sys/compat/freebsd32/syscalls.master index da42133..d524f3c 100644 --- a/sys/compat/freebsd32/syscalls.master +++ b/sys/compat/freebsd32/syscalls.master @@ -986,3 +986,7 @@ 529 AUE_NULL NOPROTO { int rctl_remove_rule(const void *inbufp, \ size_t inbuflen, void *outbufp, \ size_t outbuflen); } +530 AUE_NULL STD { int freebsd32_posix_fallocate(int fd,\ + uint32_t offsetlo, uint32_t offsethi,\ + uint32_t lenlo, uint32_t lenhi); } +531 AUE_NULL UNIMPL posix_fadvise diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master index e209731..af958c9 100644 --- a/sys/kern/syscalls.master +++ b/sys/kern/syscalls.master @@ -944,5 +944,8 @@ 529 AUE_NULL STD { int rctl_remove_rule(const void *inbufp, \ size_t inbuflen, void *outbufp, \ size_t outbuflen); } +530 AUE_NULL STD { int posix_fallocate(int fd, \ + off_t offset, off_t len); } +531 AUE_NULL UNIMPL posix_fadvise ; Please copy any additions and changes to the following compatability tables: ; sys/compat/freebsd32/syscalls.master diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c index 195e735..6fd4b97 100644 --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -99,6 +99,7 @@ struct vop_vector default_vnodeops = { .vop_advlock = vop_stdadvlock, .vop_advlockasync = vop_stdadvlockasync, .vop_advlockpurge = vop_stdadvlockpurge, + .vop_allocate = vop_stdallocate, .vop_bmap = vop_stdbmap, .vop_close = VOP_NULL, .vop_fsync = VOP_NULL, @@ -855,6 +856,136 @@ out: return (error); } +int +vop_stdallocate(struct vop_allocate_args *ap) +{ +#ifdef __notyet__ + struct statfs sfs; +#endif + struct iovec aiov; + struct vattr vattr, *vap; + struct uio auio; + off_t len, cur, offset; + uint8_t *buf; + struct thread *td; + struct vnode *vp; + size_t iosize; + int error, locked; + + buf = NULL; + error = 0; + locked = 1; + td = curthread; + vap = &vattr; + vp = ap->a_vp; + len = ap->a_len; + offset = ap->a_offset; + + error = VOP_GETATTR(vp, vap, td->td_ucred); + if (error != 0) + goto out; + iosize = vap->va_blocksize; + if (iosize == 0) + iosize = BLKDEV_IOSIZE; + if (iosize > MAXPHYS) + iosize = MAXPHYS; + buf = malloc(iosize, M_TEMP, M_WAITOK); + +#ifdef __notyet__ + /* + * Check if the filesystem sets f_maxfilesize; if not use + * VOP_SETATTR to perform the check. + */ + error = VFS_STATFS(vp->v_mount, &sfs, td); + if (error != 0) + goto out; + if (sfs.f_maxfilesize) { + if (offset > sfs.f_maxfilesize || len > sfs.f_maxfilesize || + offset + len > sfs.f_maxfilesize) { + error = EFBIG; + goto out; + } + } else +#endif + if (offset + len > vap->va_size) { + VATTR_NULL(vap); + vap->va_size = offset + len; + error = VOP_SETATTR(vp, vap, td->td_ucred); + if (error != 0) + goto out; + } + + while (len > 0) { + if (should_yield()) { + VOP_UNLOCK(vp, 0); + locked = 0; + kern_yield(-1); + error = vn_lock(vp, LK_EXCLUSIVE); + if (error != 0) + break; + locked = 1; + error = VOP_GETATTR(vp, vap, td->td_ucred); + if (error != 0) + break; + } + + /* + * Read and write back anything below the nominal file + * size. There's currently no way outside the filesystem + * to know whether this area is sparse or not. + */ + cur = iosize; + if ((offset % iosize) != 0) + cur -= (offset % iosize); + if (cur > len) + cur = len; + if (offset < vap->va_size) { + aiov.iov_base = buf; + aiov.iov_len = cur; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_offset = offset; + auio.uio_resid = cur; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_rw = UIO_READ; + auio.uio_td = td; + error = VOP_READ(vp, &auio, 0, td->td_ucred); + if (error != 0) + break; + if (auio.uio_resid > 0) { + bzero(buf + cur - auio.uio_resid, + auio.uio_resid); + } + } else { + bzero(buf, cur); + } + + aiov.iov_base = buf; + aiov.iov_len = cur; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_offset = offset; + auio.uio_resid = cur; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_rw = UIO_WRITE; + auio.uio_td = td; + + error = VOP_WRITE(vp, &auio, 0, td->td_ucred); + if (error != 0) + break; + + len -= cur; + offset += cur; + } + + out: + KASSERT(locked || error != 0, ("How'd I get unlocked with no error?")); + if (locked && error != 0) + VOP_UNLOCK(vp, 0); + free(buf, M_TEMP); + return (error); +} + /* * vfs default ops * used to fill the vfs function table to get reasonable default return values. diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index 4fc198e..26a21e3 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -4671,3 +4671,83 @@ out: VFS_UNLOCK_GIANT(vfslocked); return (error); } + +static int +kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) +{ + struct file *fp; + struct mount *mp; + struct vnode *vp; + int error, vfslocked, vnlocked; + + fp = NULL; + mp = NULL; + vfslocked = 0; + vnlocked = 0; + error = fget(td, fd, &fp); + if (error != 0) + goto out; + + switch (fp->f_type) { + case DTYPE_VNODE: + break; + case DTYPE_PIPE: + case DTYPE_FIFO: + error = ESPIPE; + goto out; + default: + error = ENODEV; + goto out; + } + if ((fp->f_flag & FWRITE) == 0) { + error = EBADF; + goto out; + } + vp = fp->f_vnode; + if (vp->v_type != VREG) { + error = ENODEV; + goto out; + } + if (offset < 0 || len <= 0) { + error = EINVAL; + goto out; + } + /* Check for wrap. */ + if (offset > OFF_MAX - len) { + error = EFBIG; + goto out; + } + + bwillwrite(); + vfslocked = VFS_LOCK_GIANT(vp->v_mount); + error = vn_start_write(vp, &mp, V_WAIT | PCATCH); + if (error != 0) + goto out; + error = vn_lock(vp, LK_EXCLUSIVE); + if (error != 0) + goto out; + vnlocked = 1; +#ifdef MAC + error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); + if (error != 0) + goto out; +#endif + error = VOP_ALLOCATE(vp, offset, len); + if (error != 0) + vnlocked = 0; + out: + if (vnlocked) + VOP_UNLOCK(vp, 0); + vn_finished_write(mp); + VFS_UNLOCK_GIANT(vfslocked); + if (fp != NULL) + fdrop(fp, td); + return (error); +} + +int +posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) +{ + + return (kern_posix_fallocate(td, uap->fd, uap->offset, uap->len)); +} diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src index 304e009..fe838ec 100644 --- a/sys/kern/vnode_if.src +++ b/sys/kern/vnode_if.src @@ -608,6 +608,7 @@ vop_vptofh { IN struct fid *fhp; }; + %% vptocnp vp L L L %% vptocnp vpp - U - @@ -618,3 +619,12 @@ vop_vptocnp { INOUT char *buf; INOUT int *buflen; }; + + +%% allocate vp E E U + +vop_allocate { + IN struct vnode *vp; + IN off_t offset; + IN off_t len; +}; diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h index 6f6e348..6f48ee7 100644 --- a/sys/sys/fcntl.h +++ b/sys/sys/fcntl.h @@ -278,7 +278,7 @@ struct oflock { #endif /* - * XXX missing posix_fadvise() and posix_fallocate(), and POSIX_FADV_* macros. + * XXX missing posix_fadvise() and POSIX_FADV_* macros. */ #ifndef _KERNEL @@ -289,6 +289,9 @@ int fcntl(int, int, ...); #if __BSD_VISIBLE || __POSIX_VISIBLE >= 200809 int openat(int, const char *, int, ...); #endif +#if __BSD_VISIBLE || __POSIX_VISIBLE >= 200112 +int posix_fallocate(int, off_t, off_t); +#endif #if __BSD_VISIBLE int flock(int, int); #endif diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index e7ff2f4..bfe94fb 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -689,6 +689,7 @@ int vop_stdaccessx(struct vop_accessx_args *ap); int vop_stdadvlock(struct vop_advlock_args *ap); int vop_stdadvlockasync(struct vop_advlockasync_args *ap); int vop_stdadvlockpurge(struct vop_advlockpurge_args *ap); +int vop_stdallocate(struct vop_allocate_args *ap); int vop_stdpathconf(struct vop_pathconf_args *); int vop_stdpoll(struct vop_poll_args *); int vop_stdvptocnp(struct vop_vptocnp_args *ap); |