diff options
Diffstat (limited to 'sys/kern/vfs_extattr.c')
-rw-r--r-- | sys/kern/vfs_extattr.c | 4862 |
1 files changed, 4862 insertions, 0 deletions
diff --git a/sys/kern/vfs_extattr.c b/sys/kern/vfs_extattr.c new file mode 100644 index 0000000..1244e54 --- /dev/null +++ b/sys/kern/vfs_extattr.c @@ -0,0 +1,4862 @@ +/* + * Copyright (c) 1989, 1993 + * The Regents of the University of California. All rights reserved. + * (c) UNIX System Laboratories, Inc. + * All or some portions of this file are derived from material licensed + * to the University of California by American Telephone and Telegraph + * Co. or Unix System Laboratories, Inc. and are reproduced herein with + * the permission of UNIX System Laboratories, Inc. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * 3. All advertising materials mentioning features or use of this software + * must display the following acknowledgement: + * This product includes software developed by the University of + * California, Berkeley and its contributors. + * 4. Neither the name of the University nor the names of its contributors + * may be used to endorse or promote products derived from this software + * without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * @(#)vfs_syscalls.c 8.13 (Berkeley) 4/15/94 + * $FreeBSD$ + */ + +/* For 4.3 integer FS ID compatibility */ +#include "opt_compat.h" +#include "opt_ffs.h" + +#include <sys/param.h> +#include <sys/systm.h> +#include <sys/bio.h> +#include <sys/buf.h> +#include <sys/sysent.h> +#include <sys/malloc.h> +#include <sys/mount.h> +#include <sys/mutex.h> +#include <sys/sysproto.h> +#include <sys/namei.h> +#include <sys/filedesc.h> +#include <sys/kernel.h> +#include <sys/fcntl.h> +#include <sys/file.h> +#include <sys/linker.h> +#include <sys/stat.h> +#include <sys/sx.h> +#include <sys/unistd.h> +#include <sys/vnode.h> +#include <sys/proc.h> +#include <sys/dirent.h> +#include <sys/extattr.h> +#include <sys/jail.h> +#include <sys/sysctl.h> + +#include <machine/limits.h> +#include <machine/stdarg.h> + +#include <vm/vm.h> +#include <vm/vm_object.h> +#include <vm/vm_page.h> +#include <vm/uma.h> + +static int change_dir(struct nameidata *ndp, struct thread *td); +static void checkdirs(struct vnode *olddp, struct vnode *newdp); +static int chroot_refuse_vdir_fds(struct filedesc *fdp); +static int getutimes(const struct timeval *, struct timespec *); +static int setfown(struct thread *td, struct vnode *, uid_t, gid_t); +static int setfmode(struct thread *td, struct vnode *, int); +static int setfflags(struct thread *td, struct vnode *, int); +static int setutimes(struct thread *td, struct vnode *, + const struct timespec *, int); +static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, + struct thread *td); +static int vfs_nmount(struct thread *td, int, struct uio *); + +static int usermount = 0; /* if 1, non-root can mount fs. */ + +int (*union_dircheckp)(struct thread *td, struct vnode **, struct file *); + +SYSCTL_INT(_vfs, OID_AUTO, usermount, CTLFLAG_RW, &usermount, 0, ""); + +/* + * Virtual File System System Calls + */ + +#ifndef _SYS_SYSPROTO_H_ +struct nmount_args { + struct iovec *iovp; + unsigned int iovcnt; + int flags; +}; +#endif +/* ARGSUSED */ +int +nmount(td, uap) + struct thread *td; + struct nmount_args /* { + syscallarg(struct iovec *) iovp; + syscallarg(unsigned int) iovcnt; + syscallarg(int) flags; + } */ *uap; +{ + struct uio auio; + struct iovec *iov, *needfree; + struct iovec aiov[UIO_SMALLIOV]; + unsigned int i; + int error; + u_int iovlen, iovcnt; + + iovcnt = SCARG(uap, iovcnt); + iovlen = iovcnt * sizeof (struct iovec); + /* + * Check that we have an even number of iovec's + * and that we have at least two options. + */ + if ((iovcnt & 1) || (iovcnt < 4) || (iovcnt > UIO_MAXIOV)) + return (EINVAL); + + if (iovcnt > UIO_SMALLIOV) { + MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); + needfree = iov; + } else { + iov = aiov; + needfree = NULL; + } + auio.uio_iov = iov; + auio.uio_iovcnt = iovcnt; + auio.uio_segflg = UIO_USERSPACE; + if ((error = copyin(uap->iovp, iov, iovlen))) + goto finish; + + for (i = 0; i < iovcnt; i++) { + if (iov->iov_len > MMAXOPTIONLEN) { + error = EINVAL; + goto finish; + } + iov++; + } + error = vfs_nmount(td, SCARG(uap, flags), &auio); +finish: + if (needfree != NULL) + free(needfree, M_TEMP); + return (error); +} + +/* + * Release all resources related to the + * mount options. + */ +void +vfs_freeopts(struct vfsoptlist *opts) +{ + struct vfsopt *opt; + + while (!TAILQ_EMPTY(opts)) { + opt = TAILQ_FIRST(opts); + TAILQ_REMOVE(opts, opt, link); + free(opt->name, M_MOUNT); + free(opt->value, M_MOUNT); + free(opt, M_MOUNT); + } + free(opts, M_MOUNT); +} + +int +kernel_mount(iovp, iovcnt, flags) + struct iovec *iovp; + unsigned int iovcnt; + int flags; +{ + struct uio auio; + int error; + + /* + * Check that we have an even number of iovec's + * and that we have at least two options. + */ + if ((iovcnt & 1) || (iovcnt < 4)) + return (EINVAL); + + auio.uio_iov = iovp; + auio.uio_iovcnt = iovcnt; + auio.uio_segflg = UIO_SYSSPACE; + + error = vfs_nmount(curthread, flags, &auio); + return (error); +} + +int +kernel_vmount(int flags, ...) +{ + struct iovec *iovp; + struct uio auio; + va_list ap; + unsigned int iovcnt, iovlen, len; + const char *cp; + char *buf, *pos; + size_t n; + int error, i; + + len = 0; + va_start(ap, flags); + for (iovcnt = 0; (cp = va_arg(ap, const char *)) != NULL; iovcnt++) + len += strlen(cp) + 1; + va_end(ap); + + if (iovcnt < 4 || iovcnt & 1) + return (EINVAL); + + iovlen = iovcnt * sizeof (struct iovec); + MALLOC(iovp, struct iovec *, iovlen, M_MOUNT, M_WAITOK); + MALLOC(buf, char *, len, M_MOUNT, M_WAITOK); + pos = buf; + va_start(ap, flags); + for (i = 0; i < iovcnt; i++) { + cp = va_arg(ap, const char *); + copystr(cp, pos, len - (pos - buf), &n); + iovp[i].iov_base = pos; + iovp[i].iov_len = n; + pos += n; + } + va_end(ap); + + auio.uio_iov = iovp; + auio.uio_iovcnt = iovcnt; + auio.uio_segflg = UIO_SYSSPACE; + + error = vfs_nmount(curthread, flags, &auio); + FREE(iovp, M_MOUNT); + FREE(buf, M_MOUNT); + return (error); +} + +/* + * vfs_nmount(): actually attempt a filesystem mount. + */ +static int +vfs_nmount(td, fsflags, fsoptions) + struct thread *td; + int fsflags; /* Flags common to all filesystems. */ + struct uio *fsoptions; /* Options local to the filesystem. */ +{ + linker_file_t lf; + struct vnode *vp; + struct mount *mp; + struct vfsconf *vfsp; + struct vfsoptlist *optlist; + char *fstype, *fspath; + int error, flag = 0, kern_flag = 0; + int fstypelen, fspathlen; + struct vattr va; + struct nameidata nd; + + error = vfs_buildopts(fsoptions, &optlist); + if (error) + return (error); + + /* + * We need these two options before the others, + * and they are mandatory for any filesystem. + * Ensure they are NUL terminated as well. + */ + fstypelen = 0; + error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen); + if (error || fstype[fstypelen - 1] != '\0') { + error = EINVAL; + goto bad; + } + fspathlen = 0; + error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen); + if (error || fspath[fspathlen - 1] != '\0') { + error = EINVAL; + goto bad; + } + + /* + * Be ultra-paranoid about making sure the type and fspath + * variables will fit in our mp buffers, including the + * terminating NUL. + */ + if (fstypelen >= MFSNAMELEN - 1 || fspathlen >= MNAMELEN - 1) { + error = ENAMETOOLONG; + goto bad; + } + + if (usermount == 0) { + error = suser(td); + if (error) + goto bad; + } + /* + * Do not allow NFS export by non-root users. + */ + if (fsflags & MNT_EXPORTED) { + error = suser(td); + if (error) + goto bad; + } + /* + * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users. + */ + if (suser(td)) + fsflags |= MNT_NOSUID | MNT_NODEV; + /* + * Get vnode to be covered + */ + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td); + if ((error = namei(&nd)) != 0) + goto bad; + NDFREE(&nd, NDF_ONLY_PNBUF); + vp = nd.ni_vp; + if (fsflags & MNT_UPDATE) { + if ((vp->v_flag & VROOT) == 0) { + vput(vp); + error = EINVAL; + goto bad; + } + mp = vp->v_mount; + flag = mp->mnt_flag; + kern_flag = mp->mnt_kern_flag; + /* + * We only allow the filesystem to be reloaded if it + * is currently mounted read-only. + */ + if ((fsflags & MNT_RELOAD) && + ((mp->mnt_flag & MNT_RDONLY) == 0)) { + vput(vp); + error = EOPNOTSUPP; /* Needs translation */ + goto bad; + } + /* + * Only root, or the user that did the original mount is + * permitted to update it. + */ + if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) { + error = suser(td); + if (error) { + vput(vp); + goto bad; + } + } + if (vfs_busy(mp, LK_NOWAIT, 0, td)) { + vput(vp); + error = EBUSY; + goto bad; + } + mtx_lock(&vp->v_interlock); + if ((vp->v_flag & VMOUNT) != 0 || vp->v_mountedhere != NULL) { + mtx_unlock(&vp->v_interlock); + vfs_unbusy(mp, td); + vput(vp); + error = EBUSY; + goto bad; + } + vp->v_flag |= VMOUNT; + mtx_unlock(&vp->v_interlock); + mp->mnt_flag |= fsflags & + (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT); + VOP_UNLOCK(vp, 0, td); + goto update; + } + /* + * If the user is not root, ensure that they own the directory + * onto which we are attempting to mount. + */ + error = VOP_GETATTR(vp, &va, td->td_ucred, td); + if (error) { + vput(vp); + goto bad; + } + if (va.va_uid != td->td_ucred->cr_uid) { + error = suser(td); + if (error) { + vput(vp); + goto bad; + } + } + if ((error = vinvalbuf(vp, V_SAVE, td->td_ucred, td, 0, 0)) != 0) { + vput(vp); + goto bad; + } + if (vp->v_type != VDIR) { + vput(vp); + error = ENOTDIR; + goto bad; + } + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) + if (!strcmp(vfsp->vfc_name, fstype)) + break; + if (vfsp == NULL) { + /* Only load modules for root (very important!). */ + error = suser(td); + if (error) { + vput(vp); + goto bad; + } + error = securelevel_gt(td->td_ucred, 0); + if (error) { + vput(vp); + goto bad; + } + error = linker_load_file(fstype, &lf); + if (error || lf == NULL) { + vput(vp); + if (lf == NULL) + error = ENODEV; + goto bad; + } + lf->userrefs++; + /* Look up again to see if the VFS was loaded. */ + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) + if (!strcmp(vfsp->vfc_name, fstype)) + break; + if (vfsp == NULL) { + lf->userrefs--; + linker_file_unload(lf); + vput(vp); + error = ENODEV; + goto bad; + } + } + mtx_lock(&vp->v_interlock); + if ((vp->v_flag & VMOUNT) != 0 || + vp->v_mountedhere != NULL) { + mtx_unlock(&vp->v_interlock); + vput(vp); + error = EBUSY; + goto bad; + } + vp->v_flag |= VMOUNT; + mtx_unlock(&vp->v_interlock); + + /* + * Allocate and initialize the filesystem. + */ + mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO); + TAILQ_INIT(&mp->mnt_nvnodelist); + TAILQ_INIT(&mp->mnt_reservedvnlist); + lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE); + (void)vfs_busy(mp, LK_NOWAIT, 0, td); + mp->mnt_op = vfsp->vfc_vfsops; + mp->mnt_vfc = vfsp; + vfsp->vfc_refcount++; + mp->mnt_stat.f_type = vfsp->vfc_typenum; + mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; + strncpy(mp->mnt_stat.f_fstypename, fstype, MFSNAMELEN); + mp->mnt_vnodecovered = vp; + mp->mnt_stat.f_owner = td->td_ucred->cr_uid; + strncpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN); + mp->mnt_iosize_max = DFLTPHYS; + VOP_UNLOCK(vp, 0, td); + +update: + mp->mnt_optnew = optlist; + /* + * Check if the fs implements the new VFS_NMOUNT() + * function, since the new system call was used. + */ + if (mp->mnt_op->vfs_mount != NULL) { + printf("%s doesn't support the new mount syscall\n", + mp->mnt_vfc->vfc_name); + mtx_lock(&vp->v_interlock); + vp->v_flag &= ~VMOUNT; + mtx_unlock(&vp->v_interlock); + if (mp->mnt_flag & MNT_UPDATE) + vfs_unbusy(mp, td); + else { + mp->mnt_vfc->vfc_refcount--; + vfs_unbusy(mp, td); + free(mp, M_MOUNT); + } + vrele(vp); + error = EOPNOTSUPP; + goto bad; + } + + /* + * Set the mount level flags. + */ + if (fsflags & MNT_RDONLY) + mp->mnt_flag |= MNT_RDONLY; + else if (mp->mnt_flag & MNT_RDONLY) + mp->mnt_kern_flag |= MNTK_WANTRDWR; + mp->mnt_flag &=~ MNT_UPDATEMASK; + mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE); + /* + * Mount the filesystem. + * XXX The final recipients of VFS_MOUNT just overwrite the ndp they + * get. No freeing of cn_pnbuf. + */ + error = VFS_NMOUNT(mp, &nd, td); + if (!error) { + if (mp->mnt_opt != NULL) + vfs_freeopts(mp->mnt_opt); + mp->mnt_opt = mp->mnt_optnew; + } + /* + * Prevent external consumers of mount + * options to read mnt_optnew. + */ + mp->mnt_optnew = NULL; + if (mp->mnt_flag & MNT_UPDATE) { + if (mp->mnt_kern_flag & MNTK_WANTRDWR) + mp->mnt_flag &= ~MNT_RDONLY; + mp->mnt_flag &=~ + (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT); + mp->mnt_kern_flag &=~ MNTK_WANTRDWR; + if (error) { + mp->mnt_flag = flag; + mp->mnt_kern_flag = kern_flag; + } + if ((mp->mnt_flag & MNT_RDONLY) == 0) { + if (mp->mnt_syncer == NULL) + error = vfs_allocate_syncvnode(mp); + } else { + if (mp->mnt_syncer != NULL) + vrele(mp->mnt_syncer); + mp->mnt_syncer = NULL; + } + vfs_unbusy(mp, td); + mtx_lock(&vp->v_interlock); + vp->v_flag &= ~VMOUNT; + mtx_unlock(&vp->v_interlock); + vrele(vp); + return (error); + } + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); + /* + * Put the new filesystem on the mount list after root. + */ + cache_purge(vp); + if (!error) { + struct vnode *newdp; + + mtx_lock(&vp->v_interlock); + vp->v_flag &= ~VMOUNT; + vp->v_mountedhere = mp; + mtx_unlock(&vp->v_interlock); + mtx_lock(&mountlist_mtx); + TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); + mtx_unlock(&mountlist_mtx); + if (VFS_ROOT(mp, &newdp)) + panic("mount: lost mount"); + checkdirs(vp, newdp); + vput(newdp); + VOP_UNLOCK(vp, 0, td); + if ((mp->mnt_flag & MNT_RDONLY) == 0) + error = vfs_allocate_syncvnode(mp); + vfs_unbusy(mp, td); + if ((error = VFS_START(mp, 0, td)) != 0) { + vrele(vp); + goto bad; + } + } else { + mtx_lock(&vp->v_interlock); + vp->v_flag &= ~VMOUNT; + mtx_unlock(&vp->v_interlock); + mp->mnt_vfc->vfc_refcount--; + vfs_unbusy(mp, td); + free(mp, M_MOUNT); + vput(vp); + goto bad; + } + return (0); +bad: + vfs_freeopts(optlist); + return (error); +} + +/* + * Old Mount API. + */ +#ifndef _SYS_SYSPROTO_H_ +struct mount_args { + char *type; + char *path; + int flags; + caddr_t data; +}; +#endif +/* ARGSUSED */ +int +mount(td, uap) + struct thread *td; + struct mount_args /* { + syscallarg(char *) type; + syscallarg(char *) path; + syscallarg(int) flags; + syscallarg(caddr_t) data; + } */ *uap; +{ + char *fstype; + char *fspath; + int error; + + fstype = malloc(MFSNAMELEN, M_TEMP, M_WAITOK); + fspath = malloc(MNAMELEN, M_TEMP, M_WAITOK); + + /* + * vfs_mount() actually takes a kernel string for `type' and + * `path' now, so extract them. + */ + error = copyinstr(SCARG(uap, type), fstype, MFSNAMELEN, NULL); + if (error) + goto finish; + error = copyinstr(SCARG(uap, path), fspath, MNAMELEN, NULL); + if (error) + goto finish; + error = vfs_mount(td, fstype, fspath, SCARG(uap, flags), + SCARG(uap, data)); +finish: + free(fstype, M_TEMP); + free(fspath, M_TEMP); + return (error); +} + +/* + * vfs_mount(): actually attempt a filesystem mount. + * + * This routine is designed to be a "generic" entry point for routines + * that wish to mount a filesystem. All parameters except `fsdata' are + * pointers into kernel space. `fsdata' is currently still a pointer + * into userspace. + */ +int +vfs_mount(td, fstype, fspath, fsflags, fsdata) + struct thread *td; + const char *fstype; + char *fspath; + int fsflags; + void *fsdata; +{ + linker_file_t lf; + struct vnode *vp; + struct mount *mp; + struct vfsconf *vfsp; + int error, flag = 0, kern_flag = 0; + struct vattr va; + struct nameidata nd; + + /* + * Be ultra-paranoid about making sure the type and fspath + * variables will fit in our mp buffers, including the + * terminating NUL. + */ + if (strlen(fstype) >= MFSNAMELEN || strlen(fspath) >= MNAMELEN) + return (ENAMETOOLONG); + + if (usermount == 0) { + error = suser(td); + if (error) + return (error); + } + /* + * Do not allow NFS export by non-root users. + */ + if (fsflags & MNT_EXPORTED) { + error = suser(td); + if (error) + return (error); + } + /* + * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users. + */ + if (suser(td)) + fsflags |= MNT_NOSUID | MNT_NODEV; + /* + * Get vnode to be covered + */ + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td); + if ((error = namei(&nd)) != 0) + return (error); + NDFREE(&nd, NDF_ONLY_PNBUF); + vp = nd.ni_vp; + if (fsflags & MNT_UPDATE) { + if ((vp->v_flag & VROOT) == 0) { + vput(vp); + return (EINVAL); + } + mp = vp->v_mount; + flag = mp->mnt_flag; + kern_flag = mp->mnt_kern_flag; + /* + * We only allow the filesystem to be reloaded if it + * is currently mounted read-only. + */ + if ((fsflags & MNT_RELOAD) && + ((mp->mnt_flag & MNT_RDONLY) == 0)) { + vput(vp); + return (EOPNOTSUPP); /* Needs translation */ + } + /* + * Only root, or the user that did the original mount is + * permitted to update it. + */ + if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) { + error = suser(td); + if (error) { + vput(vp); + return (error); + } + } + if (vfs_busy(mp, LK_NOWAIT, 0, td)) { + vput(vp); + return (EBUSY); + } + mtx_lock(&vp->v_interlock); + if ((vp->v_flag & VMOUNT) != 0 || vp->v_mountedhere != NULL) { + mtx_unlock(&vp->v_interlock); + vfs_unbusy(mp, td); + vput(vp); + return (EBUSY); + } + vp->v_flag |= VMOUNT; + mtx_unlock(&vp->v_interlock); + mp->mnt_flag |= fsflags & + (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT); + VOP_UNLOCK(vp, 0, td); + goto update; + } + /* + * If the user is not root, ensure that they own the directory + * onto which we are attempting to mount. + */ + error = VOP_GETATTR(vp, &va, td->td_ucred, td); + if (error) { + vput(vp); + return (error); + } + if (va.va_uid != td->td_ucred->cr_uid) { + error = suser(td); + if (error) { + vput(vp); + return (error); + } + } + if ((error = vinvalbuf(vp, V_SAVE, td->td_ucred, td, 0, 0)) != 0) { + vput(vp); + return (error); + } + if (vp->v_type != VDIR) { + vput(vp); + return (ENOTDIR); + } + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) + if (!strcmp(vfsp->vfc_name, fstype)) + break; + if (vfsp == NULL) { + /* Only load modules for root (very important!). */ + error = suser(td); + if (error) { + vput(vp); + return (error); + } + error = securelevel_gt(td->td_ucred, 0); + if (error) { + vput(vp); + return (error); + } + error = linker_load_file(fstype, &lf); + if (error || lf == NULL) { + vput(vp); + if (lf == NULL) + error = ENODEV; + return (error); + } + lf->userrefs++; + /* Look up again to see if the VFS was loaded. */ + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) + if (!strcmp(vfsp->vfc_name, fstype)) + break; + if (vfsp == NULL) { + lf->userrefs--; + linker_file_unload(lf); + vput(vp); + return (ENODEV); + } + } + mtx_lock(&vp->v_interlock); + if ((vp->v_flag & VMOUNT) != 0 || + vp->v_mountedhere != NULL) { + mtx_unlock(&vp->v_interlock); + vput(vp); + return (EBUSY); + } + vp->v_flag |= VMOUNT; + mtx_unlock(&vp->v_interlock); + + /* + * Allocate and initialize the filesystem. + */ + mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO); + TAILQ_INIT(&mp->mnt_nvnodelist); + TAILQ_INIT(&mp->mnt_reservedvnlist); + lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE); + (void)vfs_busy(mp, LK_NOWAIT, 0, td); + mp->mnt_op = vfsp->vfc_vfsops; + mp->mnt_vfc = vfsp; + vfsp->vfc_refcount++; + mp->mnt_stat.f_type = vfsp->vfc_typenum; + mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; + strncpy(mp->mnt_stat.f_fstypename, fstype, MFSNAMELEN); + mp->mnt_vnodecovered = vp; + mp->mnt_stat.f_owner = td->td_ucred->cr_uid; + strncpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN); + mp->mnt_iosize_max = DFLTPHYS; + VOP_UNLOCK(vp, 0, td); +update: + /* + * Check if the fs implements the old VFS_MOUNT() + * function, since the old system call was used. + */ + if (mp->mnt_op->vfs_mount == NULL) { + printf("%s doesn't support the old mount syscall\n", + mp->mnt_vfc->vfc_name); + mtx_lock(&vp->v_interlock); + vp->v_flag &= ~VMOUNT; + mtx_unlock(&vp->v_interlock); + if (mp->mnt_flag & MNT_UPDATE) + vfs_unbusy(mp, td); + else { + mp->mnt_vfc->vfc_refcount--; + vfs_unbusy(mp, td); + free(mp, M_MOUNT); + } + vrele(vp); + return (EOPNOTSUPP); + } + + /* + * Set the mount level flags. + */ + if (fsflags & MNT_RDONLY) + mp->mnt_flag |= MNT_RDONLY; + else if (mp->mnt_flag & MNT_RDONLY) + mp->mnt_kern_flag |= MNTK_WANTRDWR; + mp->mnt_flag &=~ MNT_UPDATEMASK; + mp->mnt_flag |= fsflags & (MNT_UPDATEMASK | MNT_FORCE); + /* + * Mount the filesystem. + * XXX The final recipients of VFS_MOUNT just overwrite the ndp they + * get. No freeing of cn_pnbuf. + */ + error = VFS_MOUNT(mp, fspath, fsdata, &nd, td); + if (mp->mnt_flag & MNT_UPDATE) { + if (mp->mnt_kern_flag & MNTK_WANTRDWR) + mp->mnt_flag &= ~MNT_RDONLY; + mp->mnt_flag &=~ + (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT); + mp->mnt_kern_flag &=~ MNTK_WANTRDWR; + if (error) { + mp->mnt_flag = flag; + mp->mnt_kern_flag = kern_flag; + } + if ((mp->mnt_flag & MNT_RDONLY) == 0) { + if (mp->mnt_syncer == NULL) + error = vfs_allocate_syncvnode(mp); + } else { + if (mp->mnt_syncer != NULL) + vrele(mp->mnt_syncer); + mp->mnt_syncer = NULL; + } + vfs_unbusy(mp, td); + mtx_lock(&vp->v_interlock); + vp->v_flag &= ~VMOUNT; + mtx_unlock(&vp->v_interlock); + vrele(vp); + return (error); + } + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); + /* + * Put the new filesystem on the mount list after root. + */ + cache_purge(vp); + if (!error) { + struct vnode *newdp; + + mtx_lock(&vp->v_interlock); + vp->v_flag &= ~VMOUNT; + vp->v_mountedhere = mp; + mtx_unlock(&vp->v_interlock); + mtx_lock(&mountlist_mtx); + TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); + mtx_unlock(&mountlist_mtx); + if (VFS_ROOT(mp, &newdp)) + panic("mount: lost mount"); + checkdirs(vp, newdp); + vput(newdp); + VOP_UNLOCK(vp, 0, td); + if ((mp->mnt_flag & MNT_RDONLY) == 0) + error = vfs_allocate_syncvnode(mp); + vfs_unbusy(mp, td); + if ((error = VFS_START(mp, 0, td)) != 0) + vrele(vp); + } else { + mtx_lock(&vp->v_interlock); + vp->v_flag &= ~VMOUNT; + mtx_unlock(&vp->v_interlock); + mp->mnt_vfc->vfc_refcount--; + vfs_unbusy(mp, td); + free(mp, M_MOUNT); + vput(vp); + } + return (error); +} + +/* + * Scan all active processes to see if any of them have a current + * or root directory of `olddp'. If so, replace them with the new + * mount point. + */ +static void +checkdirs(olddp, newdp) + struct vnode *olddp, *newdp; +{ + struct filedesc *fdp; + struct proc *p; + int nrele; + + if (olddp->v_usecount == 1) + return; + sx_slock(&allproc_lock); + LIST_FOREACH(p, &allproc, p_list) { + PROC_LOCK(p); + fdp = p->p_fd; + if (fdp == NULL) { + PROC_UNLOCK(p); + continue; + } + nrele = 0; + FILEDESC_LOCK(fdp); + if (fdp->fd_cdir == olddp) { + VREF(newdp); + fdp->fd_cdir = newdp; + nrele++; + } + if (fdp->fd_rdir == olddp) { + VREF(newdp); + fdp->fd_rdir = newdp; + nrele++; + } + FILEDESC_UNLOCK(fdp); + PROC_UNLOCK(p); + while (nrele--) + vrele(olddp); + } + sx_sunlock(&allproc_lock); + if (rootvnode == olddp) { + vrele(rootvnode); + VREF(newdp); + rootvnode = newdp; + } +} + +/* + * Unmount a filesystem. + * + * Note: unmount takes a path to the vnode mounted on as argument, + * not special file (as before). + */ +#ifndef _SYS_SYSPROTO_H_ +struct unmount_args { + char *path; + int flags; +}; +#endif +/* ARGSUSED */ +int +unmount(td, uap) + struct thread *td; + register struct unmount_args /* { + syscallarg(char *) path; + syscallarg(int) flags; + } */ *uap; +{ + register struct vnode *vp; + struct mount *mp; + int error; + struct nameidata nd; + + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, + SCARG(uap, path), td); + if ((error = namei(&nd)) != 0) + return (error); + vp = nd.ni_vp; + NDFREE(&nd, NDF_ONLY_PNBUF); + mp = vp->v_mount; + + /* + * Only root, or the user that did the original mount is + * permitted to unmount this filesystem. + */ + if (mp->mnt_stat.f_owner != td->td_ucred->cr_uid) { + error = suser(td); + if (error) { + vput(vp); + return (error); + } + } + + /* + * Don't allow unmounting the root filesystem. + */ + if (mp->mnt_flag & MNT_ROOTFS) { + vput(vp); + return (EINVAL); + } + + /* + * Must be the root of the filesystem + */ + if ((vp->v_flag & VROOT) == 0) { + vput(vp); + return (EINVAL); + } + vput(vp); + return (dounmount(mp, SCARG(uap, flags), td)); +} + +/* + * Do the actual filesystem unmount. + */ +int +dounmount(mp, flags, td) + struct mount *mp; + int flags; + struct thread *td; +{ + struct vnode *coveredvp, *fsrootvp; + int error; + int async_flag; + + mtx_lock(&mountlist_mtx); + if (mp->mnt_kern_flag & MNTK_UNMOUNT) { + mtx_unlock(&mountlist_mtx); + return (EBUSY); + } + mp->mnt_kern_flag |= MNTK_UNMOUNT; + /* Allow filesystems to detect that a forced unmount is in progress. */ + if (flags & MNT_FORCE) + mp->mnt_kern_flag |= MNTK_UNMOUNTF; + error = lockmgr(&mp->mnt_lock, LK_DRAIN | LK_INTERLOCK | + ((flags & MNT_FORCE) ? 0 : LK_NOWAIT), &mountlist_mtx, td); + if (error) { + mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); + if (mp->mnt_kern_flag & MNTK_MWAIT) + wakeup(mp); + return (error); + } + vn_start_write(NULL, &mp, V_WAIT); + + if (mp->mnt_flag & MNT_EXPUBLIC) + vfs_setpublicfs(NULL, NULL, NULL); + + vfs_msync(mp, MNT_WAIT); + async_flag = mp->mnt_flag & MNT_ASYNC; + mp->mnt_flag &=~ MNT_ASYNC; + cache_purgevfs(mp); /* remove cache entries for this file sys */ + if (mp->mnt_syncer != NULL) + vrele(mp->mnt_syncer); + /* Move process cdir/rdir refs on fs root to underlying vnode. */ + if (VFS_ROOT(mp, &fsrootvp) == 0) { + if (mp->mnt_vnodecovered != NULL) + checkdirs(fsrootvp, mp->mnt_vnodecovered); + if (fsrootvp == rootvnode) { + vrele(rootvnode); + rootvnode = NULL; + } + vput(fsrootvp); + } + if (((mp->mnt_flag & MNT_RDONLY) || + (error = VFS_SYNC(mp, MNT_WAIT, td->td_ucred, td)) == 0) || + (flags & MNT_FORCE)) { + error = VFS_UNMOUNT(mp, flags, td); + } + vn_finished_write(mp); + if (error) { + /* Undo cdir/rdir and rootvnode changes made above. */ + if (VFS_ROOT(mp, &fsrootvp) == 0) { + if (mp->mnt_vnodecovered != NULL) + checkdirs(mp->mnt_vnodecovered, fsrootvp); + if (rootvnode == NULL) { + rootvnode = fsrootvp; + vref(rootvnode); + } + vput(fsrootvp); + } + if ((mp->mnt_flag & MNT_RDONLY) == 0 && mp->mnt_syncer == NULL) + (void) vfs_allocate_syncvnode(mp); + mtx_lock(&mountlist_mtx); + mp->mnt_kern_flag &= ~(MNTK_UNMOUNT | MNTK_UNMOUNTF); + mp->mnt_flag |= async_flag; + lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, + &mountlist_mtx, td); + if (mp->mnt_kern_flag & MNTK_MWAIT) + wakeup(mp); + return (error); + } + mtx_lock(&mountlist_mtx); + TAILQ_REMOVE(&mountlist, mp, mnt_list); + if ((coveredvp = mp->mnt_vnodecovered) != NULL) + coveredvp->v_mountedhere = NULL; + mp->mnt_vfc->vfc_refcount--; + if (!TAILQ_EMPTY(&mp->mnt_nvnodelist)) + panic("unmount: dangling vnode"); + lockmgr(&mp->mnt_lock, LK_RELEASE | LK_INTERLOCK, &mountlist_mtx, td); + lockdestroy(&mp->mnt_lock); + if (coveredvp != NULL) + vrele(coveredvp); + if (mp->mnt_kern_flag & MNTK_MWAIT) + wakeup(mp); + if (mp->mnt_op->vfs_mount == NULL) + vfs_freeopts(mp->mnt_opt); + free(mp, M_MOUNT); + return (0); +} + +/* + * Sync each mounted filesystem. + */ +#ifndef _SYS_SYSPROTO_H_ +struct sync_args { + int dummy; +}; +#endif + +#ifdef DEBUG +static int syncprt = 0; +SYSCTL_INT(_debug, OID_AUTO, syncprt, CTLFLAG_RW, &syncprt, 0, ""); +#endif + +/* ARGSUSED */ +int +sync(td, uap) + struct thread *td; + struct sync_args *uap; +{ + struct mount *mp, *nmp; + int asyncflag; + + mtx_lock(&mountlist_mtx); + for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { + if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) { + nmp = TAILQ_NEXT(mp, mnt_list); + continue; + } + if ((mp->mnt_flag & MNT_RDONLY) == 0 && + vn_start_write(NULL, &mp, V_NOWAIT) == 0) { + asyncflag = mp->mnt_flag & MNT_ASYNC; + mp->mnt_flag &= ~MNT_ASYNC; + vfs_msync(mp, MNT_NOWAIT); + VFS_SYNC(mp, MNT_NOWAIT, + ((td != NULL) ? td->td_ucred : NOCRED), td); + mp->mnt_flag |= asyncflag; + vn_finished_write(mp); + } + mtx_lock(&mountlist_mtx); + nmp = TAILQ_NEXT(mp, mnt_list); + vfs_unbusy(mp, td); + } + mtx_unlock(&mountlist_mtx); +#if 0 +/* + * XXX don't call vfs_bufstats() yet because that routine + * was not imported in the Lite2 merge. + */ +#ifdef DIAGNOSTIC + if (syncprt) + vfs_bufstats(); +#endif /* DIAGNOSTIC */ +#endif + return (0); +} + +/* XXX PRISON: could be per prison flag */ +static int prison_quotas; +#if 0 +SYSCTL_INT(_kern_prison, OID_AUTO, quotas, CTLFLAG_RW, &prison_quotas, 0, ""); +#endif + +/* + * Change filesystem quotas. + */ +#ifndef _SYS_SYSPROTO_H_ +struct quotactl_args { + char *path; + int cmd; + int uid; + caddr_t arg; +}; +#endif +/* ARGSUSED */ +int +quotactl(td, uap) + struct thread *td; + register struct quotactl_args /* { + syscallarg(char *) path; + syscallarg(int) cmd; + syscallarg(int) uid; + syscallarg(caddr_t) arg; + } */ *uap; +{ + struct mount *mp; + int error; + struct nameidata nd; + + if (jailed(td->td_ucred) && !prison_quotas) + return (EPERM); + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td); + if ((error = namei(&nd)) != 0) + return (error); + NDFREE(&nd, NDF_ONLY_PNBUF); + error = vn_start_write(nd.ni_vp, &mp, V_WAIT | PCATCH); + vrele(nd.ni_vp); + if (error) + return (error); + error = VFS_QUOTACTL(mp, SCARG(uap, cmd), SCARG(uap, uid), + SCARG(uap, arg), td); + vn_finished_write(mp); + return (error); +} + +/* + * Get filesystem statistics. + */ +#ifndef _SYS_SYSPROTO_H_ +struct statfs_args { + char *path; + struct statfs *buf; +}; +#endif +/* ARGSUSED */ +int +statfs(td, uap) + struct thread *td; + register struct statfs_args /* { + syscallarg(char *) path; + syscallarg(struct statfs *) buf; + } */ *uap; +{ + register struct mount *mp; + register struct statfs *sp; + int error; + struct nameidata nd; + struct statfs sb; + + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td); + if ((error = namei(&nd)) != 0) + return (error); + mp = nd.ni_vp->v_mount; + sp = &mp->mnt_stat; + NDFREE(&nd, NDF_ONLY_PNBUF); + vrele(nd.ni_vp); + error = VFS_STATFS(mp, sp, td); + if (error) + return (error); + sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; + if (suser(td)) { + bcopy(sp, &sb, sizeof(sb)); + sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; + sp = &sb; + } + return (copyout(sp, SCARG(uap, buf), sizeof(*sp))); +} + +/* + * Get filesystem statistics. + */ +#ifndef _SYS_SYSPROTO_H_ +struct fstatfs_args { + int fd; + struct statfs *buf; +}; +#endif +/* ARGSUSED */ +int +fstatfs(td, uap) + struct thread *td; + register struct fstatfs_args /* { + syscallarg(int) fd; + syscallarg(struct statfs *) buf; + } */ *uap; +{ + struct file *fp; + struct mount *mp; + register struct statfs *sp; + int error; + struct statfs sb; + + if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) + return (error); + mp = ((struct vnode *)fp->f_data)->v_mount; + fdrop(fp, td); + if (mp == NULL) + return (EBADF); + sp = &mp->mnt_stat; + error = VFS_STATFS(mp, sp, td); + if (error) + return (error); + sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; + if (suser(td)) { + bcopy(sp, &sb, sizeof(sb)); + sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; + sp = &sb; + } + return (copyout(sp, SCARG(uap, buf), sizeof(*sp))); +} + +/* + * Get statistics on all filesystems. + */ +#ifndef _SYS_SYSPROTO_H_ +struct getfsstat_args { + struct statfs *buf; + long bufsize; + int flags; +}; +#endif +int +getfsstat(td, uap) + struct thread *td; + register struct getfsstat_args /* { + syscallarg(struct statfs *) buf; + syscallarg(long) bufsize; + syscallarg(int) flags; + } */ *uap; +{ + register struct mount *mp, *nmp; + register struct statfs *sp; + caddr_t sfsp; + long count, maxcount, error; + + maxcount = SCARG(uap, bufsize) / sizeof(struct statfs); + sfsp = (caddr_t)SCARG(uap, buf); + count = 0; + mtx_lock(&mountlist_mtx); + for (mp = TAILQ_FIRST(&mountlist); mp != NULL; mp = nmp) { + if (vfs_busy(mp, LK_NOWAIT, &mountlist_mtx, td)) { + nmp = TAILQ_NEXT(mp, mnt_list); + continue; + } + if (sfsp && count < maxcount) { + sp = &mp->mnt_stat; + /* + * If MNT_NOWAIT or MNT_LAZY is specified, do not + * refresh the fsstat cache. MNT_NOWAIT or MNT_LAZY + * overrides MNT_WAIT. + */ + if (((SCARG(uap, flags) & (MNT_LAZY|MNT_NOWAIT)) == 0 || + (SCARG(uap, flags) & MNT_WAIT)) && + (error = VFS_STATFS(mp, sp, td))) { + mtx_lock(&mountlist_mtx); + nmp = TAILQ_NEXT(mp, mnt_list); + vfs_unbusy(mp, td); + continue; + } + sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; + error = copyout(sp, sfsp, sizeof(*sp)); + if (error) { + vfs_unbusy(mp, td); + return (error); + } + sfsp += sizeof(*sp); + } + count++; + mtx_lock(&mountlist_mtx); + nmp = TAILQ_NEXT(mp, mnt_list); + vfs_unbusy(mp, td); + } + mtx_unlock(&mountlist_mtx); + if (sfsp && count > maxcount) + td->td_retval[0] = maxcount; + else + td->td_retval[0] = count; + return (0); +} + +/* + * Change current working directory to a given file descriptor. + */ +#ifndef _SYS_SYSPROTO_H_ +struct fchdir_args { + int fd; +}; +#endif +/* ARGSUSED */ +int +fchdir(td, uap) + struct thread *td; + struct fchdir_args /* { + syscallarg(int) fd; + } */ *uap; +{ + register struct filedesc *fdp = td->td_proc->p_fd; + struct vnode *vp, *tdp, *vpold; + struct mount *mp; + struct file *fp; + int error; + + if ((error = getvnode(fdp, SCARG(uap, fd), &fp)) != 0) + return (error); + vp = (struct vnode *)fp->f_data; + VREF(vp); + fdrop(fp, td); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); + if (vp->v_type != VDIR) + error = ENOTDIR; + else + error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); + while (!error && (mp = vp->v_mountedhere) != NULL) { + if (vfs_busy(mp, 0, 0, td)) + continue; + error = VFS_ROOT(mp, &tdp); + vfs_unbusy(mp, td); + if (error) + break; + vput(vp); + vp = tdp; + } + if (error) { + vput(vp); + return (error); + } + VOP_UNLOCK(vp, 0, td); + FILEDESC_LOCK(fdp); + vpold = fdp->fd_cdir; + fdp->fd_cdir = vp; + FILEDESC_UNLOCK(fdp); + vrele(vpold); + return (0); +} + +/* + * Change current working directory (``.''). + */ +#ifndef _SYS_SYSPROTO_H_ +struct chdir_args { + char *path; +}; +#endif +/* ARGSUSED */ +int +chdir(td, uap) + struct thread *td; + struct chdir_args /* { + syscallarg(char *) path; + } */ *uap; +{ + register struct filedesc *fdp = td->td_proc->p_fd; + int error; + struct nameidata nd; + struct vnode *vp; + + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, + SCARG(uap, path), td); + if ((error = change_dir(&nd, td)) != 0) + return (error); + NDFREE(&nd, NDF_ONLY_PNBUF); + FILEDESC_LOCK(fdp); + vp = fdp->fd_cdir; + fdp->fd_cdir = nd.ni_vp; + FILEDESC_UNLOCK(fdp); + vrele(vp); + return (0); +} + +/* + * Helper function for raised chroot(2) security function: Refuse if + * any filedescriptors are open directories. + */ +static int +chroot_refuse_vdir_fds(fdp) + struct filedesc *fdp; +{ + struct vnode *vp; + struct file *fp; + int fd; + + FILEDESC_LOCK(fdp); + for (fd = 0; fd < fdp->fd_nfiles ; fd++) { + fp = fget_locked(fdp, fd); + if (fp == NULL) + continue; + if (fp->f_type == DTYPE_VNODE) { + vp = (struct vnode *)fp->f_data; + if (vp->v_type == VDIR) { + FILEDESC_UNLOCK(fdp); + return (EPERM); + } + } + } + FILEDESC_UNLOCK(fdp); + return (0); +} + +/* + * This sysctl determines if we will allow a process to chroot(2) if it + * has a directory open: + * 0: disallowed for all processes. + * 1: allowed for processes that were not already chroot(2)'ed. + * 2: allowed for all processes. + */ + +static int chroot_allow_open_directories = 1; + +SYSCTL_INT(_kern, OID_AUTO, chroot_allow_open_directories, CTLFLAG_RW, + &chroot_allow_open_directories, 0, ""); + +/* + * Change notion of root (``/'') directory. + */ +#ifndef _SYS_SYSPROTO_H_ +struct chroot_args { + char *path; +}; +#endif +/* ARGSUSED */ +int +chroot(td, uap) + struct thread *td; + struct chroot_args /* { + syscallarg(char *) path; + } */ *uap; +{ + register struct filedesc *fdp = td->td_proc->p_fd; + int error; + struct nameidata nd; + struct vnode *vp; + + error = suser_cred(td->td_ucred, PRISON_ROOT); + if (error) + return (error); + FILEDESC_LOCK(fdp); + if (chroot_allow_open_directories == 0 || + (chroot_allow_open_directories == 1 && fdp->fd_rdir != rootvnode)) { + FILEDESC_UNLOCK(fdp); + error = chroot_refuse_vdir_fds(fdp); + } else + FILEDESC_UNLOCK(fdp); + if (error) + return (error); + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, + SCARG(uap, path), td); + if ((error = change_dir(&nd, td)) != 0) + return (error); + NDFREE(&nd, NDF_ONLY_PNBUF); + FILEDESC_LOCK(fdp); + vp = fdp->fd_rdir; + fdp->fd_rdir = nd.ni_vp; + if (!fdp->fd_jdir) { + fdp->fd_jdir = nd.ni_vp; + VREF(fdp->fd_jdir); + } + FILEDESC_UNLOCK(fdp); + vrele(vp); + return (0); +} + +/* + * Common routine for chroot and chdir. + */ +static int +change_dir(ndp, td) + register struct nameidata *ndp; + struct thread *td; +{ + struct vnode *vp; + int error; + + error = namei(ndp); + if (error) + return (error); + vp = ndp->ni_vp; + if (vp->v_type != VDIR) + error = ENOTDIR; + else + error = VOP_ACCESS(vp, VEXEC, td->td_ucred, td); + if (error) + vput(vp); + else + VOP_UNLOCK(vp, 0, td); + return (error); +} + +/* + * Check permissions, allocate an open file structure, + * and call the device open routine if any. + */ +#ifndef _SYS_SYSPROTO_H_ +struct open_args { + char *path; + int flags; + int mode; +}; +#endif +int +open(td, uap) + struct thread *td; + register struct open_args /* { + syscallarg(char *) path; + syscallarg(int) flags; + syscallarg(int) mode; + } */ *uap; +{ + struct proc *p = td->td_proc; + struct filedesc *fdp = p->p_fd; + struct file *fp; + struct vnode *vp; + struct vattr vat; + struct mount *mp; + int cmode, flags, oflags; + struct file *nfp; + int type, indx, error; + struct flock lf; + struct nameidata nd; + + oflags = SCARG(uap, flags); + if ((oflags & O_ACCMODE) == O_ACCMODE) + return (EINVAL); + flags = FFLAGS(oflags); + error = falloc(td, &nfp, &indx); + if (error) + return (error); + fp = nfp; + FILEDESC_LOCK(fdp); + cmode = ((SCARG(uap, mode) &~ fdp->fd_cmask) & ALLPERMS) &~ S_ISTXT; + FILEDESC_UNLOCK(fdp); + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td); + td->td_dupfd = -indx - 1; /* XXX check for fdopen */ + /* + * Bump the ref count to prevent another process from closing + * the descriptor while we are blocked in vn_open() + */ + fhold(fp); + error = vn_open(&nd, &flags, cmode); + if (error) { + /* + * release our own reference + */ + fdrop(fp, td); + + /* + * handle special fdopen() case. bleh. dupfdopen() is + * responsible for dropping the old contents of ofiles[indx] + * if it succeeds. + */ + if ((error == ENODEV || error == ENXIO) && + td->td_dupfd >= 0 && /* XXX from fdopen */ + (error = + dupfdopen(td, fdp, indx, td->td_dupfd, flags, error)) == 0) { + td->td_retval[0] = indx; + return (0); + } + /* + * Clean up the descriptor, but only if another thread hadn't + * replaced or closed it. + */ + FILEDESC_LOCK(fdp); + if (fdp->fd_ofiles[indx] == fp) { + fdp->fd_ofiles[indx] = NULL; + FILEDESC_UNLOCK(fdp); + fdrop(fp, td); + } else + FILEDESC_UNLOCK(fdp); + + if (error == ERESTART) + error = EINTR; + return (error); + } + td->td_dupfd = 0; + NDFREE(&nd, NDF_ONLY_PNBUF); + vp = nd.ni_vp; + + /* + * There should be 2 references on the file, one from the descriptor + * table, and one for us. + * + * Handle the case where someone closed the file (via its file + * descriptor) while we were blocked. The end result should look + * like opening the file succeeded but it was immediately closed. + */ + FILEDESC_LOCK(fdp); + FILE_LOCK(fp); + if (fp->f_count == 1) { + KASSERT(fdp->fd_ofiles[indx] != fp, + ("Open file descriptor lost all refs")); + FILEDESC_UNLOCK(fdp); + FILE_UNLOCK(fp); + VOP_UNLOCK(vp, 0, td); + vn_close(vp, flags & FMASK, fp->f_cred, td); + fdrop(fp, td); + td->td_retval[0] = indx; + return 0; + } + + /* assert that vn_open created a backing object if one is needed */ + KASSERT(!vn_canvmio(vp) || VOP_GETVOBJECT(vp, NULL) == 0, + ("open: vmio vnode has no backing object after vn_open")); + + fp->f_data = vp; + fp->f_flag = flags & FMASK; + fp->f_ops = &vnops; + fp->f_type = (vp->v_type == VFIFO ? DTYPE_FIFO : DTYPE_VNODE); + FILEDESC_UNLOCK(fdp); + FILE_UNLOCK(fp); + VOP_UNLOCK(vp, 0, td); + if (flags & (O_EXLOCK | O_SHLOCK)) { + lf.l_whence = SEEK_SET; + lf.l_start = 0; + lf.l_len = 0; + if (flags & O_EXLOCK) + lf.l_type = F_WRLCK; + else + lf.l_type = F_RDLCK; + type = F_FLOCK; + if ((flags & FNONBLOCK) == 0) + type |= F_WAIT; + if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, + type)) != 0) + goto bad; + fp->f_flag |= FHASLOCK; + } + if (flags & O_TRUNC) { + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) + goto bad; + VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); + VATTR_NULL(&vat); + vat.va_size = 0; + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); + error = VOP_SETATTR(vp, &vat, td->td_ucred, td); + VOP_UNLOCK(vp, 0, td); + vn_finished_write(mp); + if (error) + goto bad; + } + /* + * Release our private reference, leaving the one associated with + * the descriptor table intact. + */ + fdrop(fp, td); + td->td_retval[0] = indx; + return (0); +bad: + FILEDESC_LOCK(fdp); + if (fdp->fd_ofiles[indx] == fp) { + fdp->fd_ofiles[indx] = NULL; + FILEDESC_UNLOCK(fdp); + fdrop(fp, td); + } else + FILEDESC_UNLOCK(fdp); + return (error); +} + +#ifdef COMPAT_43 +/* + * Create a file. + */ +#ifndef _SYS_SYSPROTO_H_ +struct ocreat_args { + char *path; + int mode; +}; +#endif +int +ocreat(td, uap) + struct thread *td; + register struct ocreat_args /* { + syscallarg(char *) path; + syscallarg(int) mode; + } */ *uap; +{ + struct open_args /* { + syscallarg(char *) path; + syscallarg(int) flags; + syscallarg(int) mode; + } */ nuap; + + SCARG(&nuap, path) = SCARG(uap, path); + SCARG(&nuap, mode) = SCARG(uap, mode); + SCARG(&nuap, flags) = O_WRONLY | O_CREAT | O_TRUNC; + return (open(td, &nuap)); +} +#endif /* COMPAT_43 */ + +/* + * Create a special file. + */ +#ifndef _SYS_SYSPROTO_H_ +struct mknod_args { + char *path; + int mode; + int dev; +}; +#endif +/* ARGSUSED */ +int +mknod(td, uap) + struct thread *td; + register struct mknod_args /* { + syscallarg(char *) path; + syscallarg(int) mode; + syscallarg(int) dev; + } */ *uap; +{ + struct vnode *vp; + struct mount *mp; + struct vattr vattr; + int error; + int whiteout = 0; + struct nameidata nd; + + switch (SCARG(uap, mode) & S_IFMT) { + case S_IFCHR: + case S_IFBLK: + error = suser(td); + break; + default: + error = suser_cred(td->td_ucred, PRISON_ROOT); + break; + } + if (error) + return (error); +restart: + bwillwrite(); + NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td); + if ((error = namei(&nd)) != 0) + return (error); + vp = nd.ni_vp; + if (vp != NULL) { + vrele(vp); + error = EEXIST; + } else { + VATTR_NULL(&vattr); + FILEDESC_LOCK(td->td_proc->p_fd); + vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ td->td_proc->p_fd->fd_cmask; + FILEDESC_UNLOCK(td->td_proc->p_fd); + vattr.va_rdev = SCARG(uap, dev); + whiteout = 0; + + switch (SCARG(uap, mode) & S_IFMT) { + case S_IFMT: /* used by badsect to flag bad sectors */ + vattr.va_type = VBAD; + break; + case S_IFCHR: + vattr.va_type = VCHR; + break; + case S_IFBLK: + vattr.va_type = VBLK; + break; + case S_IFWHT: + whiteout = 1; + break; + default: + error = EINVAL; + break; + } + } + if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(nd.ni_dvp); + if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) + return (error); + goto restart; + } + if (!error) { + VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); + if (whiteout) + error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, CREATE); + else { + error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, + &nd.ni_cnd, &vattr); + if (error == 0) + vput(nd.ni_vp); + } + } + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(nd.ni_dvp); + vn_finished_write(mp); + ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mknod"); + ASSERT_VOP_UNLOCKED(nd.ni_vp, "mknod"); + return (error); +} + +/* + * Create a named pipe. + */ +#ifndef _SYS_SYSPROTO_H_ +struct mkfifo_args { + char *path; + int mode; +}; +#endif +/* ARGSUSED */ +int +mkfifo(td, uap) + struct thread *td; + register struct mkfifo_args /* { + syscallarg(char *) path; + syscallarg(int) mode; + } */ *uap; +{ + struct mount *mp; + struct vattr vattr; + int error; + struct nameidata nd; + +restart: + bwillwrite(); + NDINIT(&nd, CREATE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td); + if ((error = namei(&nd)) != 0) + return (error); + if (nd.ni_vp != NULL) { + NDFREE(&nd, NDF_ONLY_PNBUF); + vrele(nd.ni_vp); + vput(nd.ni_dvp); + return (EEXIST); + } + if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(nd.ni_dvp); + if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) + return (error); + goto restart; + } + VATTR_NULL(&vattr); + vattr.va_type = VFIFO; + FILEDESC_LOCK(td->td_proc->p_fd); + vattr.va_mode = (SCARG(uap, mode) & ALLPERMS) &~ td->td_proc->p_fd->fd_cmask; + FILEDESC_UNLOCK(td->td_proc->p_fd); + VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); + error = VOP_MKNOD(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); + if (error == 0) + vput(nd.ni_vp); + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(nd.ni_dvp); + vn_finished_write(mp); + return (error); +} + +/* + * Make a hard file link. + */ +#ifndef _SYS_SYSPROTO_H_ +struct link_args { + char *path; + char *link; +}; +#endif +/* ARGSUSED */ +int +link(td, uap) + struct thread *td; + register struct link_args /* { + syscallarg(char *) path; + syscallarg(char *) link; + } */ *uap; +{ + struct vnode *vp; + struct mount *mp; + struct nameidata nd; + int error; + + bwillwrite(); + NDINIT(&nd, LOOKUP, FOLLOW|NOOBJ, UIO_USERSPACE, SCARG(uap, path), td); + if ((error = namei(&nd)) != 0) + return (error); + NDFREE(&nd, NDF_ONLY_PNBUF); + vp = nd.ni_vp; + if (vp->v_type == VDIR) { + vrele(vp); + return (EPERM); /* POSIX */ + } + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { + vrele(vp); + return (error); + } + NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), td); + if ((error = namei(&nd)) == 0) { + if (nd.ni_vp != NULL) { + vrele(nd.ni_vp); + error = EEXIST; + } else { + VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); + VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); + error = VOP_LINK(nd.ni_dvp, vp, &nd.ni_cnd); + } + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(nd.ni_dvp); + } + vrele(vp); + vn_finished_write(mp); + ASSERT_VOP_UNLOCKED(nd.ni_dvp, "link"); + ASSERT_VOP_UNLOCKED(nd.ni_vp, "link"); + return (error); +} + +/* + * Make a symbolic link. + */ +#ifndef _SYS_SYSPROTO_H_ +struct symlink_args { + char *path; + char *link; +}; +#endif +/* ARGSUSED */ +int +symlink(td, uap) + struct thread *td; + register struct symlink_args /* { + syscallarg(char *) path; + syscallarg(char *) link; + } */ *uap; +{ + struct mount *mp; + struct vattr vattr; + char *path; + int error; + struct nameidata nd; + + path = uma_zalloc(namei_zone, M_WAITOK); + if ((error = copyinstr(SCARG(uap, path), path, MAXPATHLEN, NULL)) != 0) + goto out; +restart: + bwillwrite(); + NDINIT(&nd, CREATE, LOCKPARENT|NOOBJ, UIO_USERSPACE, SCARG(uap, link), td); + if ((error = namei(&nd)) != 0) + goto out; + if (nd.ni_vp) { + NDFREE(&nd, NDF_ONLY_PNBUF); + vrele(nd.ni_vp); + vput(nd.ni_dvp); + error = EEXIST; + goto out; + } + if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(nd.ni_dvp); + if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) + return (error); + goto restart; + } + VATTR_NULL(&vattr); + FILEDESC_LOCK(td->td_proc->p_fd); + vattr.va_mode = ACCESSPERMS &~ td->td_proc->p_fd->fd_cmask; + FILEDESC_UNLOCK(td->td_proc->p_fd); + VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); + error = VOP_SYMLINK(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr, path); + NDFREE(&nd, NDF_ONLY_PNBUF); + if (error == 0) + vput(nd.ni_vp); + vput(nd.ni_dvp); + vn_finished_write(mp); + ASSERT_VOP_UNLOCKED(nd.ni_dvp, "symlink"); + ASSERT_VOP_UNLOCKED(nd.ni_vp, "symlink"); +out: + uma_zfree(namei_zone, path); + return (error); +} + +/* + * Delete a whiteout from the filesystem. + */ +/* ARGSUSED */ +int +undelete(td, uap) + struct thread *td; + register struct undelete_args /* { + syscallarg(char *) path; + } */ *uap; +{ + int error; + struct mount *mp; + struct nameidata nd; + +restart: + bwillwrite(); + NDINIT(&nd, DELETE, LOCKPARENT|DOWHITEOUT, UIO_USERSPACE, + SCARG(uap, path), td); + error = namei(&nd); + if (error) + return (error); + + if (nd.ni_vp != NULLVP || !(nd.ni_cnd.cn_flags & ISWHITEOUT)) { + NDFREE(&nd, NDF_ONLY_PNBUF); + if (nd.ni_vp) + vrele(nd.ni_vp); + vput(nd.ni_dvp); + return (EEXIST); + } + if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(nd.ni_dvp); + if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) + return (error); + goto restart; + } + VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); + error = VOP_WHITEOUT(nd.ni_dvp, &nd.ni_cnd, DELETE); + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(nd.ni_dvp); + vn_finished_write(mp); + ASSERT_VOP_UNLOCKED(nd.ni_dvp, "undelete"); + ASSERT_VOP_UNLOCKED(nd.ni_vp, "undelete"); + return (error); +} + +/* + * Delete a name from the filesystem. + */ +#ifndef _SYS_SYSPROTO_H_ +struct unlink_args { + char *path; +}; +#endif +/* ARGSUSED */ +int +unlink(td, uap) + struct thread *td; + struct unlink_args /* { + syscallarg(char *) path; + } */ *uap; +{ + struct mount *mp; + struct vnode *vp; + int error; + struct nameidata nd; + +restart: + bwillwrite(); + NDINIT(&nd, DELETE, LOCKPARENT, UIO_USERSPACE, SCARG(uap, path), td); + if ((error = namei(&nd)) != 0) + return (error); + vp = nd.ni_vp; + if (vp->v_type == VDIR) + error = EPERM; /* POSIX */ + else { + /* + * The root of a mounted filesystem cannot be deleted. + * + * XXX: can this only be a VDIR case? + */ + if (vp->v_flag & VROOT) + error = EBUSY; + } + if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { + NDFREE(&nd, NDF_ONLY_PNBUF); + vrele(vp); + vput(nd.ni_dvp); + if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) + return (error); + goto restart; + } + VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); + if (!error) { + VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); + error = VOP_REMOVE(nd.ni_dvp, vp, &nd.ni_cnd); + } + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(nd.ni_dvp); + vput(vp); + vn_finished_write(mp); + ASSERT_VOP_UNLOCKED(nd.ni_dvp, "unlink"); + ASSERT_VOP_UNLOCKED(nd.ni_vp, "unlink"); + return (error); +} + +/* + * Reposition read/write file offset. + */ +#ifndef _SYS_SYSPROTO_H_ +struct lseek_args { + int fd; + int pad; + off_t offset; + int whence; +}; +#endif +int +lseek(td, uap) + struct thread *td; + register struct lseek_args /* { + syscallarg(int) fd; + syscallarg(int) pad; + syscallarg(off_t) offset; + syscallarg(int) whence; + } */ *uap; +{ + struct ucred *cred = td->td_ucred; + struct file *fp; + struct vnode *vp; + struct vattr vattr; + off_t offset; + int error, noneg; + + if ((error = fget(td, uap->fd, &fp)) != 0) + return (error); + if (fp->f_type != DTYPE_VNODE) { + fdrop(fp, td); + return (ESPIPE); + } + vp = (struct vnode *)fp->f_data; + noneg = (vp->v_type != VCHR); + offset = SCARG(uap, offset); + switch (SCARG(uap, whence)) { + case L_INCR: + if (noneg && + (fp->f_offset < 0 || + (offset > 0 && fp->f_offset > OFF_MAX - offset))) + return (EOVERFLOW); + offset += fp->f_offset; + break; + case L_XTND: + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); + error = VOP_GETATTR(vp, &vattr, cred, td); + VOP_UNLOCK(vp, 0, td); + if (error) + return (error); + if (noneg && + (vattr.va_size > OFF_MAX || + (offset > 0 && vattr.va_size > OFF_MAX - offset))) + return (EOVERFLOW); + offset += vattr.va_size; + break; + case L_SET: + break; + default: + fdrop(fp, td); + return (EINVAL); + } + if (noneg && offset < 0) + return (EINVAL); + fp->f_offset = offset; + *(off_t *)(td->td_retval) = fp->f_offset; + fdrop(fp, td); + return (0); +} + +#if defined(COMPAT_43) || defined(COMPAT_SUNOS) +/* + * Reposition read/write file offset. + */ +#ifndef _SYS_SYSPROTO_H_ +struct olseek_args { + int fd; + long offset; + int whence; +}; +#endif +int +olseek(td, uap) + struct thread *td; + register struct olseek_args /* { + syscallarg(int) fd; + syscallarg(long) offset; + syscallarg(int) whence; + } */ *uap; +{ + struct lseek_args /* { + syscallarg(int) fd; + syscallarg(int) pad; + syscallarg(off_t) offset; + syscallarg(int) whence; + } */ nuap; + int error; + + SCARG(&nuap, fd) = SCARG(uap, fd); + SCARG(&nuap, offset) = SCARG(uap, offset); + SCARG(&nuap, whence) = SCARG(uap, whence); + error = lseek(td, &nuap); + return (error); +} +#endif /* COMPAT_43 */ + +/* + * Check access permissions using passed credentials. + */ +static int +vn_access(vp, user_flags, cred, td) + struct vnode *vp; + int user_flags; + struct ucred *cred; + struct thread *td; +{ + int error, flags; + + /* Flags == 0 means only check for existence. */ + error = 0; + if (user_flags) { + flags = 0; + if (user_flags & R_OK) + flags |= VREAD; + if (user_flags & W_OK) + flags |= VWRITE; + if (user_flags & X_OK) + flags |= VEXEC; + if ((flags & VWRITE) == 0 || (error = vn_writechk(vp)) == 0) + error = VOP_ACCESS(vp, flags, cred, td); + } + return (error); +} + +/* + * Check access permissions using "real" credentials. + */ +#ifndef _SYS_SYSPROTO_H_ +struct access_args { + char *path; + int flags; +}; +#endif +int +access(td, uap) + struct thread *td; + register struct access_args /* { + syscallarg(char *) path; + syscallarg(int) flags; + } */ *uap; +{ + struct ucred *cred, *tmpcred; + register struct vnode *vp; + int error; + struct nameidata nd; + + /* + * Create and modify a temporary credential instead of one that + * is potentially shared. This could also mess up socket + * buffer accounting which can run in an interrupt context. + * + * XXX - Depending on how "threads" are finally implemented, it + * may be better to explicitly pass the credential to namei() + * rather than to modify the potentially shared process structure. + */ + cred = td->td_ucred; + tmpcred = crdup(cred); + tmpcred->cr_uid = cred->cr_ruid; + tmpcred->cr_groups[0] = cred->cr_rgid; + td->td_ucred = tmpcred; + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, + SCARG(uap, path), td); + if ((error = namei(&nd)) != 0) + goto out1; + vp = nd.ni_vp; + + error = vn_access(vp, SCARG(uap, flags), tmpcred, td); + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(vp); +out1: + td->td_ucred = cred; + crfree(tmpcred); + return (error); +} + +/* + * Check access permissions using "effective" credentials. + */ +#ifndef _SYS_SYSPROTO_H_ +struct eaccess_args { + char *path; + int flags; +}; +#endif +int +eaccess(td, uap) + struct thread *td; + register struct eaccess_args /* { + syscallarg(char *) path; + syscallarg(int) flags; + } */ *uap; +{ + struct nameidata nd; + struct vnode *vp; + int error; + + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, + SCARG(uap, path), td); + if ((error = namei(&nd)) != 0) + return (error); + vp = nd.ni_vp; + + error = vn_access(vp, SCARG(uap, flags), td->td_ucred, td); + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(vp); + return (error); +} + +#if defined(COMPAT_43) || defined(COMPAT_SUNOS) +/* + * Get file status; this version follows links. + */ +#ifndef _SYS_SYSPROTO_H_ +struct ostat_args { + char *path; + struct ostat *ub; +}; +#endif +/* ARGSUSED */ +int +ostat(td, uap) + struct thread *td; + register struct ostat_args /* { + syscallarg(char *) path; + syscallarg(struct ostat *) ub; + } */ *uap; +{ + struct stat sb; + struct ostat osb; + int error; + struct nameidata nd; + + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, + SCARG(uap, path), td); + if ((error = namei(&nd)) != 0) + return (error); + NDFREE(&nd, NDF_ONLY_PNBUF); + error = vn_stat(nd.ni_vp, &sb, td); + vput(nd.ni_vp); + if (error) + return (error); + cvtstat(&sb, &osb); + error = copyout(&osb, SCARG(uap, ub), sizeof (osb)); + return (error); +} + +/* + * Get file status; this version does not follow links. + */ +#ifndef _SYS_SYSPROTO_H_ +struct olstat_args { + char *path; + struct ostat *ub; +}; +#endif +/* ARGSUSED */ +int +olstat(td, uap) + struct thread *td; + register struct olstat_args /* { + syscallarg(char *) path; + syscallarg(struct ostat *) ub; + } */ *uap; +{ + struct vnode *vp; + struct stat sb; + struct ostat osb; + int error; + struct nameidata nd; + + NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, + SCARG(uap, path), td); + if ((error = namei(&nd)) != 0) + return (error); + vp = nd.ni_vp; + error = vn_stat(vp, &sb, td); + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(vp); + if (error) + return (error); + cvtstat(&sb, &osb); + error = copyout(&osb, SCARG(uap, ub), sizeof (osb)); + return (error); +} + +/* + * Convert from an old to a new stat structure. + */ +void +cvtstat(st, ost) + struct stat *st; + struct ostat *ost; +{ + + ost->st_dev = st->st_dev; + ost->st_ino = st->st_ino; + ost->st_mode = st->st_mode; + ost->st_nlink = st->st_nlink; + ost->st_uid = st->st_uid; + ost->st_gid = st->st_gid; + ost->st_rdev = st->st_rdev; + if (st->st_size < (quad_t)1 << 32) + ost->st_size = st->st_size; + else + ost->st_size = -2; + ost->st_atime = st->st_atime; + ost->st_mtime = st->st_mtime; + ost->st_ctime = st->st_ctime; + ost->st_blksize = st->st_blksize; + ost->st_blocks = st->st_blocks; + ost->st_flags = st->st_flags; + ost->st_gen = st->st_gen; +} +#endif /* COMPAT_43 || COMPAT_SUNOS */ + +/* + * Get file status; this version follows links. + */ +#ifndef _SYS_SYSPROTO_H_ +struct stat_args { + char *path; + struct stat *ub; +}; +#endif +/* ARGSUSED */ +int +stat(td, uap) + struct thread *td; + register struct stat_args /* { + syscallarg(char *) path; + syscallarg(struct stat *) ub; + } */ *uap; +{ + struct stat sb; + int error; + struct nameidata nd; + +#ifdef LOOKUP_SHARED + NDINIT(&nd, LOOKUP, FOLLOW | LOCKSHARED | LOCKLEAF | NOOBJ, + UIO_USERSPACE, SCARG(uap, path), td); +#else + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, + SCARG(uap, path), td); +#endif + if ((error = namei(&nd)) != 0) + return (error); + error = vn_stat(nd.ni_vp, &sb, td); + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(nd.ni_vp); + if (error) + return (error); + error = copyout(&sb, SCARG(uap, ub), sizeof (sb)); + return (error); +} + +/* + * Get file status; this version does not follow links. + */ +#ifndef _SYS_SYSPROTO_H_ +struct lstat_args { + char *path; + struct stat *ub; +}; +#endif +/* ARGSUSED */ +int +lstat(td, uap) + struct thread *td; + register struct lstat_args /* { + syscallarg(char *) path; + syscallarg(struct stat *) ub; + } */ *uap; +{ + int error; + struct vnode *vp; + struct stat sb; + struct nameidata nd; + + NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, + SCARG(uap, path), td); + if ((error = namei(&nd)) != 0) + return (error); + vp = nd.ni_vp; + error = vn_stat(vp, &sb, td); + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(vp); + if (error) + return (error); + error = copyout(&sb, SCARG(uap, ub), sizeof (sb)); + return (error); +} + +/* + * Implementation of the NetBSD stat() function. + * XXX This should probably be collapsed with the FreeBSD version, + * as the differences are only due to vn_stat() clearing spares at + * the end of the structures. vn_stat could be split to avoid this, + * and thus collapse the following to close to zero code. + */ +void +cvtnstat(sb, nsb) + struct stat *sb; + struct nstat *nsb; +{ + bzero(nsb, sizeof *nsb); + nsb->st_dev = sb->st_dev; + nsb->st_ino = sb->st_ino; + nsb->st_mode = sb->st_mode; + nsb->st_nlink = sb->st_nlink; + nsb->st_uid = sb->st_uid; + nsb->st_gid = sb->st_gid; + nsb->st_rdev = sb->st_rdev; + nsb->st_atimespec = sb->st_atimespec; + nsb->st_mtimespec = sb->st_mtimespec; + nsb->st_ctimespec = sb->st_ctimespec; + nsb->st_size = sb->st_size; + nsb->st_blocks = sb->st_blocks; + nsb->st_blksize = sb->st_blksize; + nsb->st_flags = sb->st_flags; + nsb->st_gen = sb->st_gen; + nsb->st_createtimespec = sb->st_createtimespec; +} + +#ifndef _SYS_SYSPROTO_H_ +struct nstat_args { + char *path; + struct nstat *ub; +}; +#endif +/* ARGSUSED */ +int +nstat(td, uap) + struct thread *td; + register struct nstat_args /* { + syscallarg(char *) path; + syscallarg(struct nstat *) ub; + } */ *uap; +{ + struct stat sb; + struct nstat nsb; + int error; + struct nameidata nd; + + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, + SCARG(uap, path), td); + if ((error = namei(&nd)) != 0) + return (error); + NDFREE(&nd, NDF_ONLY_PNBUF); + error = vn_stat(nd.ni_vp, &sb, td); + vput(nd.ni_vp); + if (error) + return (error); + cvtnstat(&sb, &nsb); + error = copyout(&nsb, SCARG(uap, ub), sizeof (nsb)); + return (error); +} + +/* + * NetBSD lstat. Get file status; this version does not follow links. + */ +#ifndef _SYS_SYSPROTO_H_ +struct lstat_args { + char *path; + struct stat *ub; +}; +#endif +/* ARGSUSED */ +int +nlstat(td, uap) + struct thread *td; + register struct nlstat_args /* { + syscallarg(char *) path; + syscallarg(struct nstat *) ub; + } */ *uap; +{ + int error; + struct vnode *vp; + struct stat sb; + struct nstat nsb; + struct nameidata nd; + + NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, + SCARG(uap, path), td); + if ((error = namei(&nd)) != 0) + return (error); + vp = nd.ni_vp; + NDFREE(&nd, NDF_ONLY_PNBUF); + error = vn_stat(vp, &sb, td); + vput(vp); + if (error) + return (error); + cvtnstat(&sb, &nsb); + error = copyout(&nsb, SCARG(uap, ub), sizeof (nsb)); + return (error); +} + +/* + * Get configurable pathname variables. + */ +#ifndef _SYS_SYSPROTO_H_ +struct pathconf_args { + char *path; + int name; +}; +#endif +/* ARGSUSED */ +int +pathconf(td, uap) + struct thread *td; + register struct pathconf_args /* { + syscallarg(char *) path; + syscallarg(int) name; + } */ *uap; +{ + int error; + struct nameidata nd; + + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, + SCARG(uap, path), td); + if ((error = namei(&nd)) != 0) + return (error); + NDFREE(&nd, NDF_ONLY_PNBUF); + error = VOP_PATHCONF(nd.ni_vp, SCARG(uap, name), td->td_retval); + vput(nd.ni_vp); + return (error); +} + +/* + * Return target name of a symbolic link. + */ +#ifndef _SYS_SYSPROTO_H_ +struct readlink_args { + char *path; + char *buf; + int count; +}; +#endif +/* ARGSUSED */ +int +readlink(td, uap) + struct thread *td; + register struct readlink_args /* { + syscallarg(char *) path; + syscallarg(char *) buf; + syscallarg(int) count; + } */ *uap; +{ + register struct vnode *vp; + struct iovec aiov; + struct uio auio; + int error; + struct nameidata nd; + + NDINIT(&nd, LOOKUP, NOFOLLOW | LOCKLEAF | NOOBJ, UIO_USERSPACE, + SCARG(uap, path), td); + if ((error = namei(&nd)) != 0) + return (error); + NDFREE(&nd, NDF_ONLY_PNBUF); + vp = nd.ni_vp; + if (vp->v_type != VLNK) + error = EINVAL; + else { + aiov.iov_base = SCARG(uap, buf); + aiov.iov_len = SCARG(uap, count); + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_offset = 0; + auio.uio_rw = UIO_READ; + auio.uio_segflg = UIO_USERSPACE; + auio.uio_td = td; + auio.uio_resid = SCARG(uap, count); + error = VOP_READLINK(vp, &auio, td->td_ucred); + } + vput(vp); + td->td_retval[0] = SCARG(uap, count) - auio.uio_resid; + return (error); +} + +/* + * Common implementation code for chflags() and fchflags(). + */ +static int +setfflags(td, vp, flags) + struct thread *td; + struct vnode *vp; + int flags; +{ + int error; + struct mount *mp; + struct vattr vattr; + + /* + * Prevent non-root users from setting flags on devices. When + * a device is reused, users can retain ownership of the device + * if they are allowed to set flags and programs assume that + * chown can't fail when done as root. + */ + if (vp->v_type == VCHR || vp->v_type == VBLK) { + error = suser_cred(td->td_ucred, PRISON_ROOT); + if (error) + return (error); + } + + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) + return (error); + VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); + VATTR_NULL(&vattr); + vattr.va_flags = flags; + error = VOP_SETATTR(vp, &vattr, td->td_ucred, td); + VOP_UNLOCK(vp, 0, td); + vn_finished_write(mp); + return (error); +} + +/* + * Change flags of a file given a path name. + */ +#ifndef _SYS_SYSPROTO_H_ +struct chflags_args { + char *path; + int flags; +}; +#endif +/* ARGSUSED */ +int +chflags(td, uap) + struct thread *td; + register struct chflags_args /* { + syscallarg(char *) path; + syscallarg(int) flags; + } */ *uap; +{ + int error; + struct nameidata nd; + + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td); + if ((error = namei(&nd)) != 0) + return (error); + NDFREE(&nd, NDF_ONLY_PNBUF); + error = setfflags(td, nd.ni_vp, SCARG(uap, flags)); + vrele(nd.ni_vp); + return error; +} + +/* + * Same as chflags() but doesn't follow symlinks. + */ +int +lchflags(td, uap) + struct thread *td; + register struct lchflags_args /* { + syscallarg(char *) path; + syscallarg(int) flags; + } */ *uap; +{ + int error; + struct nameidata nd; + + NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td); + if ((error = namei(&nd)) != 0) + return (error); + NDFREE(&nd, NDF_ONLY_PNBUF); + error = setfflags(td, nd.ni_vp, SCARG(uap, flags)); + vrele(nd.ni_vp); + return error; +} + +/* + * Change flags of a file given a file descriptor. + */ +#ifndef _SYS_SYSPROTO_H_ +struct fchflags_args { + int fd; + int flags; +}; +#endif +/* ARGSUSED */ +int +fchflags(td, uap) + struct thread *td; + register struct fchflags_args /* { + syscallarg(int) fd; + syscallarg(int) flags; + } */ *uap; +{ + struct file *fp; + int error; + + if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) + return (error); + error = setfflags(td, (struct vnode *) fp->f_data, SCARG(uap, flags)); + fdrop(fp, td); + return (error); +} + +/* + * Common implementation code for chmod(), lchmod() and fchmod(). + */ +static int +setfmode(td, vp, mode) + struct thread *td; + struct vnode *vp; + int mode; +{ + int error; + struct mount *mp; + struct vattr vattr; + + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) + return (error); + VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); + VATTR_NULL(&vattr); + vattr.va_mode = mode & ALLPERMS; + error = VOP_SETATTR(vp, &vattr, td->td_ucred, td); + VOP_UNLOCK(vp, 0, td); + vn_finished_write(mp); + return error; +} + +/* + * Change mode of a file given path name. + */ +#ifndef _SYS_SYSPROTO_H_ +struct chmod_args { + char *path; + int mode; +}; +#endif +/* ARGSUSED */ +int +chmod(td, uap) + struct thread *td; + register struct chmod_args /* { + syscallarg(char *) path; + syscallarg(int) mode; + } */ *uap; +{ + int error; + struct nameidata nd; + + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td); + if ((error = namei(&nd)) != 0) + return (error); + NDFREE(&nd, NDF_ONLY_PNBUF); + error = setfmode(td, nd.ni_vp, SCARG(uap, mode)); + vrele(nd.ni_vp); + return error; +} + +/* + * Change mode of a file given path name (don't follow links.) + */ +#ifndef _SYS_SYSPROTO_H_ +struct lchmod_args { + char *path; + int mode; +}; +#endif +/* ARGSUSED */ +int +lchmod(td, uap) + struct thread *td; + register struct lchmod_args /* { + syscallarg(char *) path; + syscallarg(int) mode; + } */ *uap; +{ + int error; + struct nameidata nd; + + NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td); + if ((error = namei(&nd)) != 0) + return (error); + NDFREE(&nd, NDF_ONLY_PNBUF); + error = setfmode(td, nd.ni_vp, SCARG(uap, mode)); + vrele(nd.ni_vp); + return error; +} + +/* + * Change mode of a file given a file descriptor. + */ +#ifndef _SYS_SYSPROTO_H_ +struct fchmod_args { + int fd; + int mode; +}; +#endif +/* ARGSUSED */ +int +fchmod(td, uap) + struct thread *td; + register struct fchmod_args /* { + syscallarg(int) fd; + syscallarg(int) mode; + } */ *uap; +{ + struct file *fp; + struct vnode *vp; + int error; + + if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) + return (error); + vp = (struct vnode *)fp->f_data; + error = setfmode(td, (struct vnode *)fp->f_data, SCARG(uap, mode)); + fdrop(fp, td); + return (error); +} + +/* + * Common implementation for chown(), lchown(), and fchown() + */ +static int +setfown(td, vp, uid, gid) + struct thread *td; + struct vnode *vp; + uid_t uid; + gid_t gid; +{ + int error; + struct mount *mp; + struct vattr vattr; + + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) + return (error); + VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); + VATTR_NULL(&vattr); + vattr.va_uid = uid; + vattr.va_gid = gid; + error = VOP_SETATTR(vp, &vattr, td->td_ucred, td); + VOP_UNLOCK(vp, 0, td); + vn_finished_write(mp); + return error; +} + +/* + * Set ownership given a path name. + */ +#ifndef _SYS_SYSPROTO_H_ +struct chown_args { + char *path; + int uid; + int gid; +}; +#endif +/* ARGSUSED */ +int +chown(td, uap) + struct thread *td; + register struct chown_args /* { + syscallarg(char *) path; + syscallarg(int) uid; + syscallarg(int) gid; + } */ *uap; +{ + int error; + struct nameidata nd; + + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td); + if ((error = namei(&nd)) != 0) + return (error); + NDFREE(&nd, NDF_ONLY_PNBUF); + error = setfown(td, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid)); + vrele(nd.ni_vp); + return (error); +} + +/* + * Set ownership given a path name, do not cross symlinks. + */ +#ifndef _SYS_SYSPROTO_H_ +struct lchown_args { + char *path; + int uid; + int gid; +}; +#endif +/* ARGSUSED */ +int +lchown(td, uap) + struct thread *td; + register struct lchown_args /* { + syscallarg(char *) path; + syscallarg(int) uid; + syscallarg(int) gid; + } */ *uap; +{ + int error; + struct nameidata nd; + + NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td); + if ((error = namei(&nd)) != 0) + return (error); + NDFREE(&nd, NDF_ONLY_PNBUF); + error = setfown(td, nd.ni_vp, SCARG(uap, uid), SCARG(uap, gid)); + vrele(nd.ni_vp); + return (error); +} + +/* + * Set ownership given a file descriptor. + */ +#ifndef _SYS_SYSPROTO_H_ +struct fchown_args { + int fd; + int uid; + int gid; +}; +#endif +/* ARGSUSED */ +int +fchown(td, uap) + struct thread *td; + register struct fchown_args /* { + syscallarg(int) fd; + syscallarg(int) uid; + syscallarg(int) gid; + } */ *uap; +{ + struct file *fp; + struct vnode *vp; + int error; + + if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) + return (error); + vp = (struct vnode *)fp->f_data; + error = setfown(td, (struct vnode *)fp->f_data, + SCARG(uap, uid), SCARG(uap, gid)); + fdrop(fp, td); + return (error); +} + +/* + * Common implementation code for utimes(), lutimes(), and futimes(). + */ +static int +getutimes(usrtvp, tsp) + const struct timeval *usrtvp; + struct timespec *tsp; +{ + struct timeval tv[2]; + int error; + + if (usrtvp == NULL) { + microtime(&tv[0]); + TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); + tsp[1] = tsp[0]; + } else { + if ((error = copyin(usrtvp, tv, sizeof (tv))) != 0) + return (error); + TIMEVAL_TO_TIMESPEC(&tv[0], &tsp[0]); + TIMEVAL_TO_TIMESPEC(&tv[1], &tsp[1]); + } + return 0; +} + +/* + * Common implementation code for utimes(), lutimes(), and futimes(). + */ +static int +setutimes(td, vp, ts, nullflag) + struct thread *td; + struct vnode *vp; + const struct timespec *ts; + int nullflag; +{ + int error; + struct mount *mp; + struct vattr vattr; + + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) + return (error); + VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); + VATTR_NULL(&vattr); + vattr.va_atime = ts[0]; + vattr.va_mtime = ts[1]; + if (nullflag) + vattr.va_vaflags |= VA_UTIMES_NULL; + error = VOP_SETATTR(vp, &vattr, td->td_ucred, td); + VOP_UNLOCK(vp, 0, td); + vn_finished_write(mp); + return error; +} + +/* + * Set the access and modification times of a file. + */ +#ifndef _SYS_SYSPROTO_H_ +struct utimes_args { + char *path; + struct timeval *tptr; +}; +#endif +/* ARGSUSED */ +int +utimes(td, uap) + struct thread *td; + register struct utimes_args /* { + syscallarg(char *) path; + syscallarg(struct timeval *) tptr; + } */ *uap; +{ + struct timespec ts[2]; + struct timeval *usrtvp; + int error; + struct nameidata nd; + + usrtvp = SCARG(uap, tptr); + if ((error = getutimes(usrtvp, ts)) != 0) + return (error); + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td); + if ((error = namei(&nd)) != 0) + return (error); + NDFREE(&nd, NDF_ONLY_PNBUF); + error = setutimes(td, nd.ni_vp, ts, usrtvp == NULL); + vrele(nd.ni_vp); + return (error); +} + +/* + * Set the access and modification times of a file. + */ +#ifndef _SYS_SYSPROTO_H_ +struct lutimes_args { + char *path; + struct timeval *tptr; +}; +#endif +/* ARGSUSED */ +int +lutimes(td, uap) + struct thread *td; + register struct lutimes_args /* { + syscallarg(char *) path; + syscallarg(struct timeval *) tptr; + } */ *uap; +{ + struct timespec ts[2]; + struct timeval *usrtvp; + int error; + struct nameidata nd; + + usrtvp = SCARG(uap, tptr); + if ((error = getutimes(usrtvp, ts)) != 0) + return (error); + NDINIT(&nd, LOOKUP, NOFOLLOW, UIO_USERSPACE, SCARG(uap, path), td); + if ((error = namei(&nd)) != 0) + return (error); + NDFREE(&nd, NDF_ONLY_PNBUF); + error = setutimes(td, nd.ni_vp, ts, usrtvp == NULL); + vrele(nd.ni_vp); + return (error); +} + +/* + * Set the access and modification times of a file. + */ +#ifndef _SYS_SYSPROTO_H_ +struct futimes_args { + int fd; + struct timeval *tptr; +}; +#endif +/* ARGSUSED */ +int +futimes(td, uap) + struct thread *td; + register struct futimes_args /* { + syscallarg(int ) fd; + syscallarg(struct timeval *) tptr; + } */ *uap; +{ + struct timespec ts[2]; + struct file *fp; + struct timeval *usrtvp; + int error; + + usrtvp = SCARG(uap, tptr); + if ((error = getutimes(usrtvp, ts)) != 0) + return (error); + if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) + return (error); + error = setutimes(td, (struct vnode *)fp->f_data, ts, usrtvp == NULL); + fdrop(fp, td); + return (error); +} + +/* + * Truncate a file given its path name. + */ +#ifndef _SYS_SYSPROTO_H_ +struct truncate_args { + char *path; + int pad; + off_t length; +}; +#endif +/* ARGSUSED */ +int +truncate(td, uap) + struct thread *td; + register struct truncate_args /* { + syscallarg(char *) path; + syscallarg(int) pad; + syscallarg(off_t) length; + } */ *uap; +{ + struct mount *mp; + struct vnode *vp; + struct vattr vattr; + int error; + struct nameidata nd; + + if (uap->length < 0) + return(EINVAL); + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, SCARG(uap, path), td); + if ((error = namei(&nd)) != 0) + return (error); + vp = nd.ni_vp; + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { + vrele(vp); + return (error); + } + NDFREE(&nd, NDF_ONLY_PNBUF); + VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); + if (vp->v_type == VDIR) + error = EISDIR; + else if ((error = vn_writechk(vp)) == 0 && + (error = VOP_ACCESS(vp, VWRITE, td->td_ucred, td)) == 0) { + VATTR_NULL(&vattr); + vattr.va_size = SCARG(uap, length); + error = VOP_SETATTR(vp, &vattr, td->td_ucred, td); + } + vput(vp); + vn_finished_write(mp); + return (error); +} + +/* + * Truncate a file given a file descriptor. + */ +#ifndef _SYS_SYSPROTO_H_ +struct ftruncate_args { + int fd; + int pad; + off_t length; +}; +#endif +/* ARGSUSED */ +int +ftruncate(td, uap) + struct thread *td; + register struct ftruncate_args /* { + syscallarg(int) fd; + syscallarg(int) pad; + syscallarg(off_t) length; + } */ *uap; +{ + struct mount *mp; + struct vattr vattr; + struct vnode *vp; + struct file *fp; + int error; + + if (uap->length < 0) + return(EINVAL); + if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) + return (error); + if ((fp->f_flag & FWRITE) == 0) { + fdrop(fp, td); + return (EINVAL); + } + vp = (struct vnode *)fp->f_data; + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { + fdrop(fp, td); + return (error); + } + VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); + if (vp->v_type == VDIR) + error = EISDIR; + else if ((error = vn_writechk(vp)) == 0) { + VATTR_NULL(&vattr); + vattr.va_size = SCARG(uap, length); + error = VOP_SETATTR(vp, &vattr, fp->f_cred, td); + } + VOP_UNLOCK(vp, 0, td); + vn_finished_write(mp); + fdrop(fp, td); + return (error); +} + +#if defined(COMPAT_43) || defined(COMPAT_SUNOS) +/* + * Truncate a file given its path name. + */ +#ifndef _SYS_SYSPROTO_H_ +struct otruncate_args { + char *path; + long length; +}; +#endif +/* ARGSUSED */ +int +otruncate(td, uap) + struct thread *td; + register struct otruncate_args /* { + syscallarg(char *) path; + syscallarg(long) length; + } */ *uap; +{ + struct truncate_args /* { + syscallarg(char *) path; + syscallarg(int) pad; + syscallarg(off_t) length; + } */ nuap; + + SCARG(&nuap, path) = SCARG(uap, path); + SCARG(&nuap, length) = SCARG(uap, length); + return (truncate(td, &nuap)); +} + +/* + * Truncate a file given a file descriptor. + */ +#ifndef _SYS_SYSPROTO_H_ +struct oftruncate_args { + int fd; + long length; +}; +#endif +/* ARGSUSED */ +int +oftruncate(td, uap) + struct thread *td; + register struct oftruncate_args /* { + syscallarg(int) fd; + syscallarg(long) length; + } */ *uap; +{ + struct ftruncate_args /* { + syscallarg(int) fd; + syscallarg(int) pad; + syscallarg(off_t) length; + } */ nuap; + + SCARG(&nuap, fd) = SCARG(uap, fd); + SCARG(&nuap, length) = SCARG(uap, length); + return (ftruncate(td, &nuap)); +} +#endif /* COMPAT_43 || COMPAT_SUNOS */ + +/* + * Sync an open file. + */ +#ifndef _SYS_SYSPROTO_H_ +struct fsync_args { + int fd; +}; +#endif +/* ARGSUSED */ +int +fsync(td, uap) + struct thread *td; + struct fsync_args /* { + syscallarg(int) fd; + } */ *uap; +{ + struct vnode *vp; + struct mount *mp; + struct file *fp; + vm_object_t obj; + int error; + + GIANT_REQUIRED; + + if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) + return (error); + vp = (struct vnode *)fp->f_data; + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) { + fdrop(fp, td); + return (error); + } + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); + if (VOP_GETVOBJECT(vp, &obj) == 0) { + vm_object_page_clean(obj, 0, 0, 0); + } + error = VOP_FSYNC(vp, fp->f_cred, MNT_WAIT, td); +#ifdef SOFTUPDATES + if (error == 0 && vp->v_mount && (vp->v_mount->mnt_flag & MNT_SOFTDEP)) + error = softdep_fsync(vp); +#endif + + VOP_UNLOCK(vp, 0, td); + vn_finished_write(mp); + fdrop(fp, td); + return (error); +} + +/* + * Rename files. Source and destination must either both be directories, + * or both not be directories. If target is a directory, it must be empty. + */ +#ifndef _SYS_SYSPROTO_H_ +struct rename_args { + char *from; + char *to; +}; +#endif +/* ARGSUSED */ +int +rename(td, uap) + struct thread *td; + register struct rename_args /* { + syscallarg(char *) from; + syscallarg(char *) to; + } */ *uap; +{ + struct mount *mp; + struct vnode *tvp, *fvp, *tdvp; + struct nameidata fromnd, tond; + int error; + + bwillwrite(); + NDINIT(&fromnd, DELETE, WANTPARENT | SAVESTART, UIO_USERSPACE, + SCARG(uap, from), td); + if ((error = namei(&fromnd)) != 0) + return (error); + fvp = fromnd.ni_vp; + if ((error = vn_start_write(fvp, &mp, V_WAIT | PCATCH)) != 0) { + NDFREE(&fromnd, NDF_ONLY_PNBUF); + vrele(fromnd.ni_dvp); + vrele(fvp); + goto out1; + } + NDINIT(&tond, RENAME, LOCKPARENT | LOCKLEAF | NOCACHE | SAVESTART | NOOBJ, + UIO_USERSPACE, SCARG(uap, to), td); + if (fromnd.ni_vp->v_type == VDIR) + tond.ni_cnd.cn_flags |= WILLBEDIR; + if ((error = namei(&tond)) != 0) { + /* Translate error code for rename("dir1", "dir2/."). */ + if (error == EISDIR && fvp->v_type == VDIR) + error = EINVAL; + NDFREE(&fromnd, NDF_ONLY_PNBUF); + vrele(fromnd.ni_dvp); + vrele(fvp); + goto out1; + } + tdvp = tond.ni_dvp; + tvp = tond.ni_vp; + if (tvp != NULL) { + if (fvp->v_type == VDIR && tvp->v_type != VDIR) { + error = ENOTDIR; + goto out; + } else if (fvp->v_type != VDIR && tvp->v_type == VDIR) { + error = EISDIR; + goto out; + } + } + if (fvp == tdvp) + error = EINVAL; + /* + * If source is the same as the destination (that is the + * same inode number with the same name in the same directory), + * then there is nothing to do. + */ + if (fvp == tvp && fromnd.ni_dvp == tdvp && + fromnd.ni_cnd.cn_namelen == tond.ni_cnd.cn_namelen && + !bcmp(fromnd.ni_cnd.cn_nameptr, tond.ni_cnd.cn_nameptr, + fromnd.ni_cnd.cn_namelen)) + error = -1; +out: + if (!error) { + VOP_LEASE(tdvp, td, td->td_ucred, LEASE_WRITE); + if (fromnd.ni_dvp != tdvp) { + VOP_LEASE(fromnd.ni_dvp, td, td->td_ucred, LEASE_WRITE); + } + if (tvp) { + VOP_LEASE(tvp, td, td->td_ucred, LEASE_WRITE); + } + error = VOP_RENAME(fromnd.ni_dvp, fromnd.ni_vp, &fromnd.ni_cnd, + tond.ni_dvp, tond.ni_vp, &tond.ni_cnd); + NDFREE(&fromnd, NDF_ONLY_PNBUF); + NDFREE(&tond, NDF_ONLY_PNBUF); + } else { + NDFREE(&fromnd, NDF_ONLY_PNBUF); + NDFREE(&tond, NDF_ONLY_PNBUF); + if (tdvp == tvp) + vrele(tdvp); + else + vput(tdvp); + if (tvp) + vput(tvp); + vrele(fromnd.ni_dvp); + vrele(fvp); + } + vrele(tond.ni_startdir); + vn_finished_write(mp); + ASSERT_VOP_UNLOCKED(fromnd.ni_dvp, "rename"); + ASSERT_VOP_UNLOCKED(fromnd.ni_vp, "rename"); + ASSERT_VOP_UNLOCKED(tond.ni_dvp, "rename"); + ASSERT_VOP_UNLOCKED(tond.ni_vp, "rename"); +out1: + if (fromnd.ni_startdir) + vrele(fromnd.ni_startdir); + if (error == -1) + return (0); + return (error); +} + +/* + * Make a directory file. + */ +#ifndef _SYS_SYSPROTO_H_ +struct mkdir_args { + char *path; + int mode; +}; +#endif +/* ARGSUSED */ +int +mkdir(td, uap) + struct thread *td; + register struct mkdir_args /* { + syscallarg(char *) path; + syscallarg(int) mode; + } */ *uap; +{ + + return vn_mkdir(uap->path, uap->mode, UIO_USERSPACE, td); +} + +int +vn_mkdir(path, mode, segflg, td) + char *path; + int mode; + enum uio_seg segflg; + struct thread *td; +{ + struct mount *mp; + struct vnode *vp; + struct vattr vattr; + int error; + struct nameidata nd; + +restart: + bwillwrite(); + NDINIT(&nd, CREATE, LOCKPARENT, segflg, path, td); + nd.ni_cnd.cn_flags |= WILLBEDIR; + if ((error = namei(&nd)) != 0) + return (error); + vp = nd.ni_vp; + if (vp != NULL) { + NDFREE(&nd, NDF_ONLY_PNBUF); + vrele(vp); + /* + * XXX namei called with LOCKPARENT but not LOCKLEAF has + * the strange behaviour of leaving the vnode unlocked + * if the target is the same vnode as the parent. + */ + if (vp == nd.ni_dvp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + return (EEXIST); + } + if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(nd.ni_dvp); + if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) + return (error); + goto restart; + } + VATTR_NULL(&vattr); + vattr.va_type = VDIR; + FILEDESC_LOCK(td->td_proc->p_fd); + vattr.va_mode = (mode & ACCESSPERMS) &~ td->td_proc->p_fd->fd_cmask; + FILEDESC_UNLOCK(td->td_proc->p_fd); + VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); + error = VOP_MKDIR(nd.ni_dvp, &nd.ni_vp, &nd.ni_cnd, &vattr); + NDFREE(&nd, NDF_ONLY_PNBUF); + vput(nd.ni_dvp); + if (!error) + vput(nd.ni_vp); + vn_finished_write(mp); + ASSERT_VOP_UNLOCKED(nd.ni_dvp, "mkdir"); + ASSERT_VOP_UNLOCKED(nd.ni_vp, "mkdir"); + return (error); +} + +/* + * Remove a directory file. + */ +#ifndef _SYS_SYSPROTO_H_ +struct rmdir_args { + char *path; +}; +#endif +/* ARGSUSED */ +int +rmdir(td, uap) + struct thread *td; + struct rmdir_args /* { + syscallarg(char *) path; + } */ *uap; +{ + struct mount *mp; + struct vnode *vp; + int error; + struct nameidata nd; + +restart: + bwillwrite(); + NDINIT(&nd, DELETE, LOCKPARENT | LOCKLEAF, UIO_USERSPACE, + SCARG(uap, path), td); + if ((error = namei(&nd)) != 0) + return (error); + vp = nd.ni_vp; + if (vp->v_type != VDIR) { + error = ENOTDIR; + goto out; + } + /* + * No rmdir "." please. + */ + if (nd.ni_dvp == vp) { + error = EINVAL; + goto out; + } + /* + * The root of a mounted filesystem cannot be deleted. + */ + if (vp->v_flag & VROOT) { + error = EBUSY; + goto out; + } + if (vn_start_write(nd.ni_dvp, &mp, V_NOWAIT) != 0) { + NDFREE(&nd, NDF_ONLY_PNBUF); + if (nd.ni_dvp == vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + vput(vp); + if ((error = vn_start_write(NULL, &mp, V_XSLEEP | PCATCH)) != 0) + return (error); + goto restart; + } + VOP_LEASE(nd.ni_dvp, td, td->td_ucred, LEASE_WRITE); + VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); + error = VOP_RMDIR(nd.ni_dvp, nd.ni_vp, &nd.ni_cnd); + vn_finished_write(mp); +out: + NDFREE(&nd, NDF_ONLY_PNBUF); + if (nd.ni_dvp == vp) + vrele(nd.ni_dvp); + else + vput(nd.ni_dvp); + vput(vp); + ASSERT_VOP_UNLOCKED(nd.ni_dvp, "rmdir"); + ASSERT_VOP_UNLOCKED(nd.ni_vp, "rmdir"); + return (error); +} + +#ifdef COMPAT_43 +/* + * Read a block of directory entries in a filesystem independent format. + */ +#ifndef _SYS_SYSPROTO_H_ +struct ogetdirentries_args { + int fd; + char *buf; + u_int count; + long *basep; +}; +#endif +int +ogetdirentries(td, uap) + struct thread *td; + register struct ogetdirentries_args /* { + syscallarg(int) fd; + syscallarg(char *) buf; + syscallarg(u_int) count; + syscallarg(long *) basep; + } */ *uap; +{ + struct vnode *vp; + struct file *fp; + struct uio auio, kuio; + struct iovec aiov, kiov; + struct dirent *dp, *edp; + caddr_t dirbuf; + int error, eofflag, readcnt; + long loff; + + /* XXX arbitrary sanity limit on `count'. */ + if (SCARG(uap, count) > 64 * 1024) + return (EINVAL); + if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) + return (error); + if ((fp->f_flag & FREAD) == 0) { + fdrop(fp, td); + return (EBADF); + } + vp = (struct vnode *)fp->f_data; +unionread: + if (vp->v_type != VDIR) { + fdrop(fp, td); + return (EINVAL); + } + aiov.iov_base = SCARG(uap, buf); + aiov.iov_len = SCARG(uap, count); + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_rw = UIO_READ; + auio.uio_segflg = UIO_USERSPACE; + auio.uio_td = td; + auio.uio_resid = SCARG(uap, count); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); + loff = auio.uio_offset = fp->f_offset; +# if (BYTE_ORDER != LITTLE_ENDIAN) + if (vp->v_mount->mnt_maxsymlinklen <= 0) { + error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, + NULL, NULL); + fp->f_offset = auio.uio_offset; + } else +# endif + { + kuio = auio; + kuio.uio_iov = &kiov; + kuio.uio_segflg = UIO_SYSSPACE; + kiov.iov_len = SCARG(uap, count); + MALLOC(dirbuf, caddr_t, SCARG(uap, count), M_TEMP, M_WAITOK); + kiov.iov_base = dirbuf; + error = VOP_READDIR(vp, &kuio, fp->f_cred, &eofflag, + NULL, NULL); + fp->f_offset = kuio.uio_offset; + if (error == 0) { + readcnt = SCARG(uap, count) - kuio.uio_resid; + edp = (struct dirent *)&dirbuf[readcnt]; + for (dp = (struct dirent *)dirbuf; dp < edp; ) { +# if (BYTE_ORDER == LITTLE_ENDIAN) + /* + * The expected low byte of + * dp->d_namlen is our dp->d_type. + * The high MBZ byte of dp->d_namlen + * is our dp->d_namlen. + */ + dp->d_type = dp->d_namlen; + dp->d_namlen = 0; +# else + /* + * The dp->d_type is the high byte + * of the expected dp->d_namlen, + * so must be zero'ed. + */ + dp->d_type = 0; +# endif + if (dp->d_reclen > 0) { + dp = (struct dirent *) + ((char *)dp + dp->d_reclen); + } else { + error = EIO; + break; + } + } + if (dp >= edp) + error = uiomove(dirbuf, readcnt, &auio); + } + FREE(dirbuf, M_TEMP); + } + VOP_UNLOCK(vp, 0, td); + if (error) { + fdrop(fp, td); + return (error); + } + if (SCARG(uap, count) == auio.uio_resid) { + if (union_dircheckp) { + error = union_dircheckp(td, &vp, fp); + if (error == -1) + goto unionread; + if (error) { + fdrop(fp, td); + return (error); + } + } + if ((vp->v_flag & VROOT) && + (vp->v_mount->mnt_flag & MNT_UNION)) { + struct vnode *tvp = vp; + vp = vp->v_mount->mnt_vnodecovered; + VREF(vp); + fp->f_data = vp; + fp->f_offset = 0; + vrele(tvp); + goto unionread; + } + } + error = copyout(&loff, SCARG(uap, basep), sizeof(long)); + fdrop(fp, td); + td->td_retval[0] = SCARG(uap, count) - auio.uio_resid; + return (error); +} +#endif /* COMPAT_43 */ + +/* + * Read a block of directory entries in a filesystem independent format. + */ +#ifndef _SYS_SYSPROTO_H_ +struct getdirentries_args { + int fd; + char *buf; + u_int count; + long *basep; +}; +#endif +int +getdirentries(td, uap) + struct thread *td; + register struct getdirentries_args /* { + syscallarg(int) fd; + syscallarg(char *) buf; + syscallarg(u_int) count; + syscallarg(long *) basep; + } */ *uap; +{ + struct vnode *vp; + struct file *fp; + struct uio auio; + struct iovec aiov; + long loff; + int error, eofflag; + + if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) + return (error); + if ((fp->f_flag & FREAD) == 0) { + fdrop(fp, td); + return (EBADF); + } + vp = (struct vnode *)fp->f_data; +unionread: + if (vp->v_type != VDIR) { + fdrop(fp, td); + return (EINVAL); + } + aiov.iov_base = SCARG(uap, buf); + aiov.iov_len = SCARG(uap, count); + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_rw = UIO_READ; + auio.uio_segflg = UIO_USERSPACE; + auio.uio_td = td; + auio.uio_resid = SCARG(uap, count); + /* vn_lock(vp, LK_SHARED | LK_RETRY, td); */ + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); + loff = auio.uio_offset = fp->f_offset; + error = VOP_READDIR(vp, &auio, fp->f_cred, &eofflag, NULL, NULL); + fp->f_offset = auio.uio_offset; + VOP_UNLOCK(vp, 0, td); + if (error) { + fdrop(fp, td); + return (error); + } + if (SCARG(uap, count) == auio.uio_resid) { + if (union_dircheckp) { + error = union_dircheckp(td, &vp, fp); + if (error == -1) + goto unionread; + if (error) { + fdrop(fp, td); + return (error); + } + } + if ((vp->v_flag & VROOT) && + (vp->v_mount->mnt_flag & MNT_UNION)) { + struct vnode *tvp = vp; + vp = vp->v_mount->mnt_vnodecovered; + VREF(vp); + fp->f_data = vp; + fp->f_offset = 0; + vrele(tvp); + goto unionread; + } + } + if (SCARG(uap, basep) != NULL) { + error = copyout(&loff, SCARG(uap, basep), sizeof(long)); + } + td->td_retval[0] = SCARG(uap, count) - auio.uio_resid; + fdrop(fp, td); + return (error); +} +#ifndef _SYS_SYSPROTO_H_ +struct getdents_args { + int fd; + char *buf; + size_t count; +}; +#endif +int +getdents(td, uap) + struct thread *td; + register struct getdents_args /* { + syscallarg(int) fd; + syscallarg(char *) buf; + syscallarg(u_int) count; + } */ *uap; +{ + struct getdirentries_args ap; + ap.fd = uap->fd; + ap.buf = uap->buf; + ap.count = uap->count; + ap.basep = NULL; + return getdirentries(td, &ap); +} + +/* + * Set the mode mask for creation of filesystem nodes. + * + * MP SAFE + */ +#ifndef _SYS_SYSPROTO_H_ +struct umask_args { + int newmask; +}; +#endif +int +umask(td, uap) + struct thread *td; + struct umask_args /* { + syscallarg(int) newmask; + } */ *uap; +{ + register struct filedesc *fdp; + + FILEDESC_LOCK(td->td_proc->p_fd); + fdp = td->td_proc->p_fd; + td->td_retval[0] = fdp->fd_cmask; + fdp->fd_cmask = SCARG(uap, newmask) & ALLPERMS; + FILEDESC_UNLOCK(td->td_proc->p_fd); + return (0); +} + +/* + * Void all references to file by ripping underlying filesystem + * away from vnode. + */ +#ifndef _SYS_SYSPROTO_H_ +struct revoke_args { + char *path; +}; +#endif +/* ARGSUSED */ +int +revoke(td, uap) + struct thread *td; + register struct revoke_args /* { + syscallarg(char *) path; + } */ *uap; +{ + struct mount *mp; + struct vnode *vp; + struct vattr vattr; + int error; + struct nameidata nd; + + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, SCARG(uap, path), + td); + if ((error = namei(&nd)) != 0) + return (error); + vp = nd.ni_vp; + NDFREE(&nd, NDF_ONLY_PNBUF); + if (vp->v_type != VCHR) { + vput(vp); + return (EINVAL); + } + error = VOP_GETATTR(vp, &vattr, td->td_ucred, td); + if (error) { + vput(vp); + return (error); + } + VOP_UNLOCK(vp, 0, td); + if (td->td_ucred->cr_uid != vattr.va_uid) { + error = suser_cred(td->td_ucred, PRISON_ROOT); + if (error) + goto out; + } + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) + goto out; + if (vcount(vp) > 1) + VOP_REVOKE(vp, REVOKEALL); + vn_finished_write(mp); +out: + vrele(vp); + return (error); +} + +/* + * Convert a user file descriptor to a kernel file entry. + * The file entry is locked upon returning. + */ +int +getvnode(fdp, fd, fpp) + struct filedesc *fdp; + int fd; + struct file **fpp; +{ + int error; + struct file *fp; + + fp = NULL; + if (fdp == NULL) + error = EBADF; + else { + FILEDESC_LOCK(fdp); + if ((u_int)fd >= fdp->fd_nfiles || + (fp = fdp->fd_ofiles[fd]) == NULL) + error = EBADF; + else if (fp->f_type != DTYPE_VNODE && fp->f_type != DTYPE_FIFO) { + fp = NULL; + error = EINVAL; + } else { + fhold(fp); + error = 0; + } + FILEDESC_UNLOCK(fdp); + } + *fpp = fp; + return (error); +} +/* + * Get (NFS) file handle + */ +#ifndef _SYS_SYSPROTO_H_ +struct getfh_args { + char *fname; + fhandle_t *fhp; +}; +#endif +int +getfh(td, uap) + struct thread *td; + register struct getfh_args *uap; +{ + struct nameidata nd; + fhandle_t fh; + register struct vnode *vp; + int error; + + /* + * Must be super user + */ + error = suser(td); + if (error) + return (error); + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, uap->fname, td); + error = namei(&nd); + if (error) + return (error); + NDFREE(&nd, NDF_ONLY_PNBUF); + vp = nd.ni_vp; + bzero(&fh, sizeof(fh)); + fh.fh_fsid = vp->v_mount->mnt_stat.f_fsid; + error = VFS_VPTOFH(vp, &fh.fh_fid); + vput(vp); + if (error) + return (error); + error = copyout(&fh, uap->fhp, sizeof (fh)); + return (error); +} + +/* + * syscall for the rpc.lockd to use to translate a NFS file handle into + * an open descriptor. + * + * warning: do not remove the suser() call or this becomes one giant + * security hole. + */ +#ifndef _SYS_SYSPROTO_H_ +struct fhopen_args { + const struct fhandle *u_fhp; + int flags; +}; +#endif +int +fhopen(td, uap) + struct thread *td; + struct fhopen_args /* { + syscallarg(const struct fhandle *) u_fhp; + syscallarg(int) flags; + } */ *uap; +{ + struct proc *p = td->td_proc; + struct mount *mp; + struct vnode *vp; + struct fhandle fhp; + struct vattr vat; + struct vattr *vap = &vat; + struct flock lf; + struct file *fp; + register struct filedesc *fdp = p->p_fd; + int fmode, mode, error, type; + struct file *nfp; + int indx; + + /* + * Must be super user + */ + error = suser(td); + if (error) + return (error); + + fmode = FFLAGS(SCARG(uap, flags)); + /* why not allow a non-read/write open for our lockd? */ + if (((fmode & (FREAD | FWRITE)) == 0) || (fmode & O_CREAT)) + return (EINVAL); + error = copyin(SCARG(uap,u_fhp), &fhp, sizeof(fhp)); + if (error) + return(error); + /* find the mount point */ + mp = vfs_getvfs(&fhp.fh_fsid); + if (mp == NULL) + return (ESTALE); + /* now give me my vnode, it gets returned to me locked */ + error = VFS_FHTOVP(mp, &fhp.fh_fid, &vp); + if (error) + return (error); + /* + * from now on we have to make sure not + * to forget about the vnode + * any error that causes an abort must vput(vp) + * just set error = err and 'goto bad;'. + */ + + /* + * from vn_open + */ + if (vp->v_type == VLNK) { + error = EMLINK; + goto bad; + } + if (vp->v_type == VSOCK) { + error = EOPNOTSUPP; + goto bad; + } + mode = 0; + if (fmode & (FWRITE | O_TRUNC)) { + if (vp->v_type == VDIR) { + error = EISDIR; + goto bad; + } + error = vn_writechk(vp); + if (error) + goto bad; + mode |= VWRITE; + } + if (fmode & FREAD) + mode |= VREAD; + if (mode) { + error = VOP_ACCESS(vp, mode, td->td_ucred, td); + if (error) + goto bad; + } + if (fmode & O_TRUNC) { + VOP_UNLOCK(vp, 0, td); /* XXX */ + if ((error = vn_start_write(NULL, &mp, V_WAIT | PCATCH)) != 0) { + vrele(vp); + return (error); + } + VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); /* XXX */ + VATTR_NULL(vap); + vap->va_size = 0; + error = VOP_SETATTR(vp, vap, td->td_ucred, td); + vn_finished_write(mp); + if (error) + goto bad; + } + error = VOP_OPEN(vp, fmode, td->td_ucred, td); + if (error) + goto bad; + /* + * Make sure that a VM object is created for VMIO support. + */ + if (vn_canvmio(vp) == TRUE) { + if ((error = vfs_object_create(vp, td, td->td_ucred)) != 0) + goto bad; + } + if (fmode & FWRITE) + vp->v_writecount++; + + /* + * end of vn_open code + */ + + if ((error = falloc(td, &nfp, &indx)) != 0) { + if (fmode & FWRITE) + vp->v_writecount--; + goto bad; + } + fp = nfp; + + /* + * Hold an extra reference to avoid having fp ripped out + * from under us while we block in the lock op + */ + fhold(fp); + nfp->f_data = vp; + nfp->f_flag = fmode & FMASK; + nfp->f_ops = &vnops; + nfp->f_type = DTYPE_VNODE; + if (fmode & (O_EXLOCK | O_SHLOCK)) { + lf.l_whence = SEEK_SET; + lf.l_start = 0; + lf.l_len = 0; + if (fmode & O_EXLOCK) + lf.l_type = F_WRLCK; + else + lf.l_type = F_RDLCK; + type = F_FLOCK; + if ((fmode & FNONBLOCK) == 0) + type |= F_WAIT; + VOP_UNLOCK(vp, 0, td); + if ((error = VOP_ADVLOCK(vp, (caddr_t)fp, F_SETLK, &lf, + type)) != 0) { + /* + * The lock request failed. Normally close the + * descriptor but handle the case where someone might + * have dup()d or close()d it when we weren't looking. + */ + FILEDESC_LOCK(fdp); + if (fdp->fd_ofiles[indx] == fp) { + fdp->fd_ofiles[indx] = NULL; + FILEDESC_UNLOCK(fdp); + fdrop(fp, td); + } else + FILEDESC_UNLOCK(fdp); + /* + * release our private reference + */ + fdrop(fp, td); + return(error); + } + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); + fp->f_flag |= FHASLOCK; + } + if ((vp->v_type == VREG) && (VOP_GETVOBJECT(vp, NULL) != 0)) + vfs_object_create(vp, td, td->td_ucred); + + VOP_UNLOCK(vp, 0, td); + fdrop(fp, td); + td->td_retval[0] = indx; + return (0); + +bad: + vput(vp); + return (error); +} + +/* + * Stat an (NFS) file handle. + */ +#ifndef _SYS_SYSPROTO_H_ +struct fhstat_args { + struct fhandle *u_fhp; + struct stat *sb; +}; +#endif +int +fhstat(td, uap) + struct thread *td; + register struct fhstat_args /* { + syscallarg(struct fhandle *) u_fhp; + syscallarg(struct stat *) sb; + } */ *uap; +{ + struct stat sb; + fhandle_t fh; + struct mount *mp; + struct vnode *vp; + int error; + + /* + * Must be super user + */ + error = suser(td); + if (error) + return (error); + + error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t)); + if (error) + return (error); + + if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) + return (ESTALE); + if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) + return (error); + error = vn_stat(vp, &sb, td); + vput(vp); + if (error) + return (error); + error = copyout(&sb, SCARG(uap, sb), sizeof(sb)); + return (error); +} + +/* + * Implement fstatfs() for (NFS) file handles. + */ +#ifndef _SYS_SYSPROTO_H_ +struct fhstatfs_args { + struct fhandle *u_fhp; + struct statfs *buf; +}; +#endif +int +fhstatfs(td, uap) + struct thread *td; + struct fhstatfs_args /* { + syscallarg(struct fhandle) *u_fhp; + syscallarg(struct statfs) *buf; + } */ *uap; +{ + struct statfs *sp; + struct mount *mp; + struct vnode *vp; + struct statfs sb; + fhandle_t fh; + int error; + + /* + * Must be super user + */ + error = suser(td); + if (error) + return (error); + + if ((error = copyin(SCARG(uap, u_fhp), &fh, sizeof(fhandle_t))) != 0) + return (error); + + if ((mp = vfs_getvfs(&fh.fh_fsid)) == NULL) + return (ESTALE); + if ((error = VFS_FHTOVP(mp, &fh.fh_fid, &vp))) + return (error); + mp = vp->v_mount; + sp = &mp->mnt_stat; + vput(vp); + if ((error = VFS_STATFS(mp, sp, td)) != 0) + return (error); + sp->f_flags = mp->mnt_flag & MNT_VISFLAGMASK; + if (suser(td)) { + bcopy(sp, &sb, sizeof(sb)); + sb.f_fsid.val[0] = sb.f_fsid.val[1] = 0; + sp = &sb; + } + return (copyout(sp, SCARG(uap, buf), sizeof(*sp))); +} + +/* + * Syscall to push extended attribute configuration information into the + * VFS. Accepts a path, which it converts to a mountpoint, as well as + * a command (int cmd), and attribute name and misc data. For now, the + * attribute name is left in userspace for consumption by the VFS_op. + * It will probably be changed to be copied into sysspace by the + * syscall in the future, once issues with various consumers of the + * attribute code have raised their hands. + * + * Currently this is used only by UFS Extended Attributes. + */ +int +extattrctl(td, uap) + struct thread *td; + struct extattrctl_args /* { + syscallarg(const char *) path; + syscallarg(int) cmd; + syscallarg(const char *) filename; + syscallarg(int) attrnamespace; + syscallarg(const char *) attrname; + } */ *uap; +{ + struct vnode *filename_vp; + struct nameidata nd; + struct mount *mp, *mp_writable; + char attrname[EXTATTR_MAXNAMELEN]; + int error; + + /* + * uap->attrname is not always defined. We check again later when we + * invoke the VFS call so as to pass in NULL there if needed. + */ + if (uap->attrname != NULL) { + error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, + NULL); + if (error) + return (error); + } + + /* + * uap->filename is not always defined. If it is, grab a vnode lock, + * which VFS_EXTATTRCTL() will later release. + */ + filename_vp = NULL; + if (uap->filename != NULL) { + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_USERSPACE, + uap->filename, td); + if ((error = namei(&nd)) != 0) + return (error); + filename_vp = nd.ni_vp; + NDFREE(&nd, NDF_NO_VP_RELE | NDF_NO_VP_UNLOCK); + } + + /* uap->path is always defined. */ + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td); + if ((error = namei(&nd)) != 0) { + if (filename_vp != NULL) + vput(filename_vp); + return (error); + } + mp = nd.ni_vp->v_mount; + error = vn_start_write(nd.ni_vp, &mp_writable, V_WAIT | PCATCH); + NDFREE(&nd, 0); + if (error) { + if (filename_vp != NULL) + vput(filename_vp); + return (error); + } + + if (uap->attrname != NULL) { + error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, + uap->attrnamespace, attrname, td); + } else { + error = VFS_EXTATTRCTL(mp, uap->cmd, filename_vp, + uap->attrnamespace, NULL, td); + } + + vn_finished_write(mp_writable); + /* + * VFS_EXTATTRCTL will have unlocked, but not de-ref'd, + * filename_vp, so vrele it if it is defined. + */ + if (filename_vp != NULL) + vrele(filename_vp); + + return (error); +} + +/*- + * Set a named extended attribute on a file or directory + * + * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace", + * kernelspace string pointer "attrname", userspace buffer + * pointer "data", buffer length "nbytes", thread "td". + * Returns: 0 on success, an error number otherwise + * Locks: none + * References: vp must be a valid reference for the duration of the call + */ +static int +extattr_set_vp(struct vnode *vp, int attrnamespace, const char *attrname, + void *data, size_t nbytes, struct thread *td) +{ + struct mount *mp; + struct uio auio; + struct iovec aiov; + ssize_t cnt; + int error; + + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) + return (error); + VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); + + aiov.iov_base = data; + aiov.iov_len = nbytes; + auio.uio_iov = &aiov; + auio.uio_iovcnt = 1; + auio.uio_offset = 0; + if (nbytes > INT_MAX) { + error = EINVAL; + goto done; + } + auio.uio_resid = nbytes; + auio.uio_rw = UIO_WRITE; + auio.uio_segflg = UIO_USERSPACE; + auio.uio_td = td; + cnt = nbytes; + + error = VOP_SETEXTATTR(vp, attrnamespace, attrname, &auio, + td->td_ucred, td); + cnt -= auio.uio_resid; + td->td_retval[0] = cnt; + +done: + VOP_UNLOCK(vp, 0, td); + vn_finished_write(mp); + return (error); +} + +int +extattr_set_file(td, uap) + struct thread *td; + struct extattr_set_file_args /* { + syscallarg(const char *) path; + syscallarg(int) attrnamespace; + syscallarg(const char *) attrname; + syscallarg(void *) data; + syscallarg(size_t) nbytes; + } */ *uap; +{ + struct nameidata nd; + char attrname[EXTATTR_MAXNAMELEN]; + int error; + + error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL); + if (error) + return (error); + + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td); + if ((error = namei(&nd)) != 0) + return (error); + NDFREE(&nd, NDF_ONLY_PNBUF); + + error = extattr_set_vp(nd.ni_vp, uap->attrnamespace, attrname, + uap->data, uap->nbytes, td); + + vrele(nd.ni_vp); + return (error); +} + +int +extattr_set_fd(td, uap) + struct thread *td; + struct extattr_set_fd_args /* { + syscallarg(int) fd; + syscallarg(int) attrnamespace; + syscallarg(const char *) attrname; + syscallarg(void *) data; + syscallarg(size_t) nbytes; + } */ *uap; +{ + struct file *fp; + char attrname[EXTATTR_MAXNAMELEN]; + int error; + + error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL); + if (error) + return (error); + + if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0) + return (error); + + error = extattr_set_vp((struct vnode *)fp->f_data, uap->attrnamespace, + attrname, uap->data, uap->nbytes, td); + fdrop(fp, td); + + return (error); +} + +/*- + * Get a named extended attribute on a file or directory + * + * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace", + * kernelspace string pointer "attrname", userspace buffer + * pointer "data", buffer length "nbytes", thread "td". + * Returns: 0 on success, an error number otherwise + * Locks: none + * References: vp must be a valid reference for the duration of the call + */ +static int +extattr_get_vp(struct vnode *vp, int attrnamespace, const char *attrname, + void *data, size_t nbytes, struct thread *td) +{ + struct uio auio, *auiop; + struct iovec aiov; + ssize_t cnt; + size_t size, *sizep; + int error; + + VOP_LEASE(vp, td, td->td_ucred, LEASE_READ); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); + + /* + * Slightly unusual semantics: if the user provides a NULL data + * pointer, they don't want to receive the data, just the + * maximum read length. + */ + auiop = NULL; + sizep = NULL; + cnt = 0; + if (data != NULL) { + aiov.iov_base = data; + aiov.iov_len = nbytes; + auio.uio_iov = &aiov; + auio.uio_offset = 0; + if (nbytes > INT_MAX) { + error = EINVAL; + goto done; + } + auio.uio_resid = nbytes; + auio.uio_rw = UIO_READ; + auio.uio_segflg = UIO_USERSPACE; + auio.uio_td = td; + auiop = &auio; + cnt = nbytes; + } else + sizep = &size; + + error = VOP_GETEXTATTR(vp, attrnamespace, attrname, auiop, sizep, + td->td_ucred, td); + + if (auiop != NULL) { + cnt -= auio.uio_resid; + td->td_retval[0] = cnt; + } else + td->td_retval[0] = size; + +done: + VOP_UNLOCK(vp, 0, td); + return (error); +} + +int +extattr_get_file(td, uap) + struct thread *td; + struct extattr_get_file_args /* { + syscallarg(const char *) path; + syscallarg(int) attrnamespace; + syscallarg(const char *) attrname; + syscallarg(void *) data; + syscallarg(size_t) nbytes; + } */ *uap; +{ + struct nameidata nd; + char attrname[EXTATTR_MAXNAMELEN]; + int error; + + error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL); + if (error) + return (error); + + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td); + if ((error = namei(&nd)) != 0) + return (error); + NDFREE(&nd, NDF_ONLY_PNBUF); + + error = extattr_get_vp(nd.ni_vp, uap->attrnamespace, attrname, + uap->data, uap->nbytes, td); + + vrele(nd.ni_vp); + return (error); +} + +int +extattr_get_fd(td, uap) + struct thread *td; + struct extattr_get_fd_args /* { + syscallarg(int) fd; + syscallarg(int) attrnamespace; + syscallarg(const char *) attrname; + syscallarg(void *) data; + syscallarg(size_t) nbytes; + } */ *uap; +{ + struct file *fp; + char attrname[EXTATTR_MAXNAMELEN]; + int error; + + error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL); + if (error) + return (error); + + if ((error = getvnode(td->td_proc->p_fd, SCARG(uap, fd), &fp)) != 0) + return (error); + + error = extattr_get_vp((struct vnode *)fp->f_data, uap->attrnamespace, + attrname, uap->data, uap->nbytes, td); + + fdrop(fp, td); + return (error); +} + +/* + * extattr_delete_vp(): Delete a named extended attribute on a file or + * directory + * + * Arguments: unlocked vnode "vp", attribute namespace "attrnamespace", + * kernelspace string pointer "attrname", proc "p" + * Returns: 0 on success, an error number otherwise + * Locks: none + * References: vp must be a valid reference for the duration of the call + */ +static int +extattr_delete_vp(struct vnode *vp, int attrnamespace, const char *attrname, + struct thread *td) +{ + struct mount *mp; + int error; + + if ((error = vn_start_write(vp, &mp, V_WAIT | PCATCH)) != 0) + return (error); + VOP_LEASE(vp, td, td->td_ucred, LEASE_WRITE); + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); + + error = VOP_SETEXTATTR(vp, attrnamespace, attrname, NULL, td->td_ucred, + td); + + VOP_UNLOCK(vp, 0, td); + vn_finished_write(mp); + return (error); +} + +int +extattr_delete_file(td, uap) + struct thread *td; + struct extattr_delete_file_args /* { + syscallarg(const char *) path; + syscallarg(int) attrnamespace; + syscallarg(const char *) attrname; + } */ *uap; +{ + struct nameidata nd; + char attrname[EXTATTR_MAXNAMELEN]; + int error; + + error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL); + if (error) + return(error); + + NDINIT(&nd, LOOKUP, FOLLOW, UIO_USERSPACE, uap->path, td); + if ((error = namei(&nd)) != 0) + return(error); + NDFREE(&nd, NDF_ONLY_PNBUF); + + error = extattr_delete_vp(nd.ni_vp, uap->attrnamespace, attrname, td); + + vrele(nd.ni_vp); + return(error); +} + +int +extattr_delete_fd(td, uap) + struct thread *td; + struct extattr_delete_fd_args /* { + syscallarg(int) fd; + syscallarg(int) attrnamespace; + syscallarg(const char *) attrname; + } */ *uap; +{ + struct file *fp; + struct vnode *vp; + char attrname[EXTATTR_MAXNAMELEN]; + int error; + + error = copyinstr(uap->attrname, attrname, EXTATTR_MAXNAMELEN, NULL); + if (error) + return (error); + + if ((error = getvnode(td->td_proc->p_fd, uap->fd, &fp)) != 0) + return (error); + vp = (struct vnode *)fp->f_data; + + error = extattr_delete_vp((struct vnode *)fp->f_data, + uap->attrnamespace, attrname, td); + + fdrop(fp, td); + return (error); +} |