diff options
author | mdf <mdf@FreeBSD.org> | 2011-04-18 16:32:22 +0000 |
---|---|---|
committer | mdf <mdf@FreeBSD.org> | 2011-04-18 16:32:22 +0000 |
commit | 9c9a32d97b41ab9d0cae56c7e428ad6d5cd1302f (patch) | |
tree | 99ab3d40a9311d51c78c3b3e6b880d6ba7d2560c /sys | |
parent | 0bbb5b8e1ab919b4d265f1857ccd42679a2cb39c (diff) | |
download | FreeBSD-src-9c9a32d97b41ab9d0cae56c7e428ad6d5cd1302f.zip FreeBSD-src-9c9a32d97b41ab9d0cae56c7e428ad6d5cd1302f.tar.gz |
Add the posix_fallocate(2) syscall. The default implementation in
vop_stdallocate() is filesystem agnostic and will run as slow as a
read/write loop in userspace; however, it serves to correctly
implement the functionality for filesystems that do not implement a
VOP_ALLOCATE.
Note that __FreeBSD_version was already bumped today to 900036 for any
ports which would like to use this function.
Also reserve space in the syscall table for posix_fadvise(2).
Reviewed by: -arch (previous version)
Diffstat (limited to 'sys')
-rw-r--r-- | sys/compat/freebsd32/freebsd32_misc.c | 12 | ||||
-rw-r--r-- | sys/compat/freebsd32/syscalls.master | 4 | ||||
-rw-r--r-- | sys/kern/syscalls.master | 3 | ||||
-rw-r--r-- | sys/kern/vfs_default.c | 131 | ||||
-rw-r--r-- | sys/kern/vfs_syscalls.c | 80 | ||||
-rw-r--r-- | sys/kern/vnode_if.src | 10 | ||||
-rw-r--r-- | sys/sys/fcntl.h | 5 | ||||
-rw-r--r-- | sys/sys/vnode.h | 1 |
8 files changed, 245 insertions, 1 deletions
diff --git a/sys/compat/freebsd32/freebsd32_misc.c b/sys/compat/freebsd32/freebsd32_misc.c index 5772c0e..23985d3 100644 --- a/sys/compat/freebsd32/freebsd32_misc.c +++ b/sys/compat/freebsd32/freebsd32_misc.c @@ -2790,3 +2790,15 @@ freebsd32_kldstat(struct thread *td, struct freebsd32_kldstat_args *uap) bcopy(&stat.pathname[0], &stat32.pathname[0], sizeof(stat.pathname)); return (copyout(&stat32, uap->stat, version)); } + +int +freebsd32_posix_fallocate(struct thread *td, + struct freebsd32_posix_fallocate_args *uap) +{ + struct posix_fallocate_args ap; + + ap.fd = uap->fd; + ap.offset = (uap->offsetlo | ((off_t)uap->offsethi << 32)); + ap.len = (uap->lenlo | ((off_t)uap->lenhi << 32)); + return (posix_fallocate(td, &ap)); +} diff --git a/sys/compat/freebsd32/syscalls.master b/sys/compat/freebsd32/syscalls.master index da42133..d524f3c 100644 --- a/sys/compat/freebsd32/syscalls.master +++ b/sys/compat/freebsd32/syscalls.master @@ -986,3 +986,7 @@ 529 AUE_NULL NOPROTO { int rctl_remove_rule(const void *inbufp, \ size_t inbuflen, void *outbufp, \ size_t outbuflen); } +530 AUE_NULL STD { int freebsd32_posix_fallocate(int fd,\ + uint32_t offsetlo, uint32_t offsethi,\ + uint32_t lenlo, uint32_t lenhi); } +531 AUE_NULL UNIMPL posix_fadvise diff --git a/sys/kern/syscalls.master b/sys/kern/syscalls.master index e209731..af958c9 100644 --- a/sys/kern/syscalls.master +++ b/sys/kern/syscalls.master @@ -944,5 +944,8 @@ 529 AUE_NULL STD { int rctl_remove_rule(const void *inbufp, \ size_t inbuflen, void *outbufp, \ size_t outbuflen); } +530 AUE_NULL STD { int posix_fallocate(int fd, \ + off_t offset, off_t len); } +531 AUE_NULL UNIMPL posix_fadvise ; Please copy any additions and changes to the following compatability tables: ; sys/compat/freebsd32/syscalls.master diff --git a/sys/kern/vfs_default.c b/sys/kern/vfs_default.c index 195e735..6fd4b97 100644 --- a/sys/kern/vfs_default.c +++ b/sys/kern/vfs_default.c @@ -99,6 +99,7 @@ struct vop_vector default_vnodeops = { .vop_advlock = vop_stdadvlock, .vop_advlockasync = vop_stdadvlockasync, .vop_advlockpurge = vop_stdadvlockpurge, + .vop_allocate = vop_stdallocate, .vop_bmap = vop_stdbmap, .vop_close = VOP_NULL, .vop_fsync = VOP_NULL, @@ -855,6 +856,136 @@ out: return (error); } +int +vop_stdallocate(struct vop_allocate_args *ap) +{ +#ifdef __notyet__ + struct statfs sfs; +#endif + struct iovec aiov; + struct vattr vattr, *vap; + struct uio auio; + off_t len, cur, offset; + uint8_t *buf; + struct thread *td; + struct vnode *vp; + size_t iosize; + int error, locked; + + buf = NULL; + error = 0; + locked = 1; + td = curthread; + vap = &vattr; + vp = ap->a_vp; + len = ap->a_len; + offset = ap->a_offset; + + error = VOP_GETATTR(vp, vap, td->td_ucred); + if (error != 0) + goto out; + iosize = vap->va_blocksize; + if (iosize == 0) + iosize = BLKDEV_IOSIZE; + if (iosize > MAXPHYS) + iosize = MAXPHYS; + buf = malloc(iosize, M_TEMP, M_WAITOK); + +#ifdef __notyet__ + /* + * Check if the filesystem sets f_maxfilesize; if not use + * VOP_SETATTR to perform the check. + */ + error = VFS_STATFS(vp->v_mount, &sfs, td); + if (error != 0) + goto out; + if (sfs.f_maxfilesize) { + if (offset > sfs.f_maxfilesize || len > sfs.f_maxfilesize || + offset + len > sfs.f_maxfilesize) { + error = EFBIG; + goto out; + } + } else +#endif + if (offset + len > vap->va_size) { + VATTR_NULL(vap); + vap->va_size = offset + len; + error = VOP_SETATTR(vp, vap, td->td_ucred); + if (error != 0) + goto out; + } + + while (len > 0) { + if (should_yield()) { + VOP_UNLOCK(vp, 0); + locked = 0; + kern_yield(-1); + error = vn_lock(vp, LK_EXCLUSIVE); + if (error != 0) + break; + locked = 1; + error = VOP_GETATTR(vp, vap, td->td_ucred); + if (error != 0) + break; + } + + /* + * Read and write back anything below the nominal file + * size. There's currently no way outside the filesystem + * to know whether this area is sparse or not. + */ + cur = iosize; + if ((offset % iosize) != 0) + cur -= (offset % iosize); + if (cur > len) + cur = len; + if (offset < vap->va_size) { + aiov.iov_base = buf; + aiov.iov_len = cur; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_offset = offset; + auio.uio_resid = cur; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_rw = UIO_READ; + auio.uio_td = td; + error = VOP_READ(vp, &auio, 0, td->td_ucred); + if (error != 0) + break; + if (auio.uio_resid > 0) { + bzero(buf + cur - auio.uio_resid, + auio.uio_resid); + } + } else { + bzero(buf, cur); + } + + aiov.iov_base = buf; + aiov.iov_len = cur; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_offset = offset; + auio.uio_resid = cur; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_rw = UIO_WRITE; + auio.uio_td = td; + + error = VOP_WRITE(vp, &auio, 0, td->td_ucred); + if (error != 0) + break; + + len -= cur; + offset += cur; + } + + out: + KASSERT(locked || error != 0, ("How'd I get unlocked with no error?")); + if (locked && error != 0) + VOP_UNLOCK(vp, 0); + free(buf, M_TEMP); + return (error); +} + /* * vfs default ops * used to fill the vfs function table to get reasonable default return values. diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index 4fc198e..26a21e3 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -4671,3 +4671,83 @@ out: VFS_UNLOCK_GIANT(vfslocked); return (error); } + +static int +kern_posix_fallocate(struct thread *td, int fd, off_t offset, off_t len) +{ + struct file *fp; + struct mount *mp; + struct vnode *vp; + int error, vfslocked, vnlocked; + + fp = NULL; + mp = NULL; + vfslocked = 0; + vnlocked = 0; + error = fget(td, fd, &fp); + if (error != 0) + goto out; + + switch (fp->f_type) { + case DTYPE_VNODE: + break; + case DTYPE_PIPE: + case DTYPE_FIFO: + error = ESPIPE; + goto out; + default: + error = ENODEV; + goto out; + } + if ((fp->f_flag & FWRITE) == 0) { + error = EBADF; + goto out; + } + vp = fp->f_vnode; + if (vp->v_type != VREG) { + error = ENODEV; + goto out; + } + if (offset < 0 || len <= 0) { + error = EINVAL; + goto out; + } + /* Check for wrap. */ + if (offset > OFF_MAX - len) { + error = EFBIG; + goto out; + } + + bwillwrite(); + vfslocked = VFS_LOCK_GIANT(vp->v_mount); + error = vn_start_write(vp, &mp, V_WAIT | PCATCH); + if (error != 0) + goto out; + error = vn_lock(vp, LK_EXCLUSIVE); + if (error != 0) + goto out; + vnlocked = 1; +#ifdef MAC + error = mac_vnode_check_write(td->td_ucred, fp->f_cred, vp); + if (error != 0) + goto out; +#endif + error = VOP_ALLOCATE(vp, offset, len); + if (error != 0) + vnlocked = 0; + out: + if (vnlocked) + VOP_UNLOCK(vp, 0); + vn_finished_write(mp); + VFS_UNLOCK_GIANT(vfslocked); + if (fp != NULL) + fdrop(fp, td); + return (error); +} + +int +posix_fallocate(struct thread *td, struct posix_fallocate_args *uap) +{ + + return (kern_posix_fallocate(td, uap->fd, uap->offset, uap->len)); +} diff --git a/sys/kern/vnode_if.src b/sys/kern/vnode_if.src index 304e009..fe838ec 100644 --- a/sys/kern/vnode_if.src +++ b/sys/kern/vnode_if.src @@ -608,6 +608,7 @@ vop_vptofh { IN struct fid *fhp; }; + %% vptocnp vp L L L %% vptocnp vpp - U - @@ -618,3 +619,12 @@ vop_vptocnp { INOUT char *buf; INOUT int *buflen; }; + + +%% allocate vp E E U + +vop_allocate { + IN struct vnode *vp; + IN off_t offset; + IN off_t len; +}; diff --git a/sys/sys/fcntl.h b/sys/sys/fcntl.h index 6f6e348..6f48ee7 100644 --- a/sys/sys/fcntl.h +++ b/sys/sys/fcntl.h @@ -278,7 +278,7 @@ struct oflock { #endif /* - * XXX missing posix_fadvise() and posix_fallocate(), and POSIX_FADV_* macros. + * XXX missing posix_fadvise() and POSIX_FADV_* macros. */ #ifndef _KERNEL @@ -289,6 +289,9 @@ int fcntl(int, int, ...); #if __BSD_VISIBLE || __POSIX_VISIBLE >= 200809 int openat(int, const char *, int, ...); #endif +#if __BSD_VISIBLE || __POSIX_VISIBLE >= 200112 +int posix_fallocate(int, off_t, off_t); +#endif #if __BSD_VISIBLE int flock(int, int); #endif diff --git a/sys/sys/vnode.h b/sys/sys/vnode.h index e7ff2f4..bfe94fb 100644 --- a/sys/sys/vnode.h +++ b/sys/sys/vnode.h @@ -689,6 +689,7 @@ int vop_stdaccessx(struct vop_accessx_args *ap); int vop_stdadvlock(struct vop_advlock_args *ap); int vop_stdadvlockasync(struct vop_advlockasync_args *ap); int vop_stdadvlockpurge(struct vop_advlockpurge_args *ap); +int vop_stdallocate(struct vop_allocate_args *ap); int vop_stdpathconf(struct vop_pathconf_args *); int vop_stdpoll(struct vop_poll_args *); int vop_stdvptocnp(struct vop_vptocnp_args *ap); |