diff options
author | mux <mux@FreeBSD.org> | 2002-03-26 15:33:44 +0000 |
---|---|---|
committer | mux <mux@FreeBSD.org> | 2002-03-26 15:33:44 +0000 |
commit | 124c6d3a2681db60183934e41d92ed78ad107c78 (patch) | |
tree | 3418e9a1ae964d7ba6364b9a56eecaabb6c72462 | |
parent | 67738c4bc20ae25af458b873fcb7bb7de7de88c7 (diff) | |
download | FreeBSD-src-124c6d3a2681db60183934e41d92ed78ad107c78.zip FreeBSD-src-124c6d3a2681db60183934e41d92ed78ad107c78.tar.gz |
As discussed in -arch, add the new nmount(2) system call and the
new vfs_getopt()/vfs_copyopt() API. This is intended to be used
later, when there will be filesystems implementing the VFS_NMOUNT
operation. The mount(2) system call will disappear when all
filesystems will be converted to the new API. Documentation will
be committed in a while.
Reviewed by: phk
-rw-r--r-- | sys/kern/vfs_extattr.c | 556 | ||||
-rw-r--r-- | sys/kern/vfs_subr.c | 77 | ||||
-rw-r--r-- | sys/kern/vfs_syscalls.c | 556 | ||||
-rw-r--r-- | sys/sys/mount.h | 24 |
4 files changed, 1207 insertions, 6 deletions
diff --git a/sys/kern/vfs_extattr.c b/sys/kern/vfs_extattr.c index 77eece4..bb29491 100644 --- a/sys/kern/vfs_extattr.c +++ b/sys/kern/vfs_extattr.c @@ -69,6 +69,7 @@ #include <sys/sysctl.h> #include <machine/limits.h> +#include <machine/stdarg.h> #include <vm/vm.h> #include <vm/vm_object.h> @@ -86,6 +87,8 @@ static int setutimes(struct thread *td, struct vnode *, const struct timespec *, int); static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, struct thread *td); +static void vfs_freeopts(struct vfsoptlist *opt); +static int vfs_nmount(struct thread *td, int, struct uio *); static int usermount = 0; /* if 1, non-root can mount fs. */ @@ -102,7 +105,7 @@ struct nmount_args { struct iovec *iovp; unsigned int iovcnt; int flags; - }; +}; #endif /* ARGSUSED */ int @@ -114,12 +117,546 @@ nmount(td, uap) syscallarg(int) flags; } */ *uap; { + struct uio auio; + struct iovec *iov, *needfree; + struct iovec aiov[UIO_SMALLIOV]; + long error, i; + u_int iovlen, iovcnt; + + iovcnt = SCARG(uap, iovcnt); + iovlen = iovcnt * sizeof (struct iovec); + /* + * Check that we have an even number of iovec's + * and that we have at least two options. + */ + if ((iovcnt & 1) || (iovcnt < 4) || (iovcnt > UIO_MAXIOV)) + return (EINVAL); - return(EOPNOTSUPP); + if (iovcnt > UIO_SMALLIOV) { + MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); + needfree = iov; + } else { + iov = aiov; + needfree = NULL; + } + auio.uio_iov = iov; + auio.uio_iovcnt = iovcnt; + auio.uio_rw = UIO_WRITE; + auio.uio_segflg = UIO_USERSPACE; + auio.uio_td = td; + auio.uio_offset = 0; + auio.uio_resid = 0; + if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))) + goto finish; + for (i = 0; i < iovcnt; i++) { + if (iov->iov_len > INT_MAX - auio.uio_resid) { + error = EINVAL; + goto finish; + } + auio.uio_resid += iov->iov_len; + iov++; + } + error = vfs_nmount(td, SCARG(uap, flags), &auio); +finish: + if (needfree != NULL) + free(needfree, M_TEMP); + return (error); } /* - * Mount a file system. + * Release all resources related to the + * mount options. + */ +static void +vfs_freeopts(struct vfsoptlist *opt) +{ + free(opt->opt, M_MOUNT); + free(opt->optbuf, M_MOUNT); + free(opt, M_MOUNT); +} + +int +kernel_mount(iovp, iovcnt, flags) + struct iovec *iovp; + unsigned int iovcnt; + int flags; +{ + struct uio auio; + struct iovec *iov; + int error, i; + + /* + * Check that we have an even number of iovec's + * and that we have at least two options. + */ + if ((iovcnt & 1) || (iovcnt < 4)) + return (EINVAL); + + auio.uio_iov = iovp; + auio.uio_iovcnt = iovcnt; + auio.uio_rw = UIO_WRITE; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_offset = 0; + auio.uio_td = NULL; + auio.uio_resid = 0; + iov = iovp; + for (i = 0; i < iovcnt; i++) { + if (iov->iov_len > INT_MAX - auio.uio_resid) { + return (EINVAL); + } + auio.uio_resid += iov->iov_len; + iov++; + } + + error = vfs_nmount(curthread, flags, &auio); + return (error); +} + +int +kernel_vmount(int flags, ...) +{ + struct iovec *iovp, *iov; + struct uio auio; + va_list ap; + unsigned int iovcnt, iovlen, len, optcnt; + const char *opt; + char *sep, *buf, *pos; + int error, i; + + len = 0; + va_start(ap, flags); + for (optcnt = 0; (opt = va_arg(ap, const char *)) != NULL; optcnt++) + len += strlen(opt) + 1; + va_end(ap); + + if (optcnt < 2) + return (EINVAL); + + iovcnt = optcnt << 1; + iovlen = iovcnt * sizeof (struct iovec); + MALLOC(iovp, struct iovec *, iovlen, M_MOUNT, M_WAITOK); + MALLOC(buf, char *, len, M_MOUNT, M_WAITOK); + pos = buf; + va_start(ap, flags); + for (i = 0; i < optcnt; i++) { + opt = va_arg(ap, const char *); + strcpy(pos, opt); + sep = index(pos, '='); + if (sep == NULL) { + FREE(iovp, M_MOUNT); + FREE(buf, M_MOUNT); + return (EINVAL); + } + *sep = '\0'; + iov = iovp + i * 2; + iov->iov_base = pos; + iov->iov_len = sep - pos + 1; + pos = sep + 1; + iov++; + iov->iov_base = pos; + iovlen = strlen(pos) + 1; + iov->iov_len = iovlen; + pos += iovlen; + } + va_end(ap); + + auio.uio_iov = iovp; + auio.uio_iovcnt = iovcnt; + auio.uio_rw = UIO_WRITE; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_offset = 0; + auio.uio_td = NULL; + auio.uio_resid = len; + + error = vfs_nmount(curthread, flags, &auio); + FREE(iovp, M_MOUNT); + FREE(buf, M_MOUNT); + return (error); +} + +/* + * vfs_nmount(): actually attempt a filesystem mount. + */ +static int +vfs_nmount(td, fsflags, fsoptions) + struct thread *td; + int fsflags; /* Flags common to all filesystems */ + struct uio *fsoptions; /* Options local to the filesystem */ +{ + struct vnode *vp; + struct mount *mp; + struct vfsconf *vfsp; + struct iovec *cur; + struct vfsoptlist *optlist; + struct vfsopt *opt; + char *buf, *fstype, *fspath; + int error, flag = 0, flag2 = 0, i, len, optcnt; + int offset, iovcnt, fstypelen, fspathlen; + struct vattr va; + struct nameidata nd; + + /* + * Allocate memory to hold the vfsopt structures. + */ + iovcnt = fsoptions->uio_iovcnt; + optcnt = iovcnt >> 1; + opt = malloc(sizeof (struct vfsopt) * optcnt, + M_MOUNT, M_WAITOK | M_ZERO); + + /* + * Count the size of the buffer for options, + * allocate it, and fill in the vfsopt structures. + */ + cur = fsoptions->uio_iov; + len = fsoptions->uio_resid; + buf = malloc(len, M_TEMP, M_WAITOK | M_ZERO); + + optlist = malloc(sizeof (struct vfsoptlist), M_MOUNT, M_WAITOK); + optlist->opt = opt; + optlist->optbuf = buf; + optlist->optcnt = optcnt; + + offset = i = 0; + cur = fsoptions->uio_iov; + while (i < optcnt) { + opt[i].name = buf + offset; + /* Ensure the name of an option is a string */ + if (opt[i].name[cur->iov_len - 1] != '\0') { + error = EINVAL; + goto bad; + } + offset += cur->iov_len; + cur++; + opt[i].len = cur->iov_len; + /* + * Prevent consumers from trying to + * read the value of a 0 length option + * by setting it to NULL. + */ + if (opt[i].len == 0) + opt[i].value = NULL; + else + opt[i].value = buf + offset; + offset += cur->iov_len; + cur++; i++; + } + + if ((error = uiomove(buf, len, fsoptions)) != 0) + goto bad; + + /* + * We need these two options before the others, + * and they are mandatory for any filesystem. + * Ensure they are NULL terminated as well. + */ + fstypelen = 0; + error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen); + if ((error != 0) || (fstype[fstypelen - 1] != '\0')) { + error = EINVAL; + goto bad; + } + fspathlen = 0; + error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen); + if ((error != 0) || (fspath[fspathlen - 1] != '\0')) { + error = EINVAL; + goto bad; + } + + /* + * Be ultra-paranoid about making sure the type and fspath + * variables will fit in our mp buffers, including the + * terminating NUL. + */ + if ((fstypelen >= MFSNAMELEN - 1) || + (fspathlen >= MNAMELEN - 1)) { + error = ENAMETOOLONG; + goto bad; + } + + if (usermount == 0) { + error = suser_td(td); + if (error) + goto bad; + } + /* + * Do not allow NFS export by non-root users. + */ + if (fsflags & MNT_EXPORTED) { + error = suser_td(td); + if (error) + goto bad; + } + /* + * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users + */ + if (suser_xxx(td->td_proc->p_ucred, 0, 0)) + fsflags |= MNT_NOSUID | MNT_NODEV; + /* + * Get vnode to be covered + */ + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td); + if ((error = namei(&nd)) != 0) + goto bad; + NDFREE(&nd, NDF_ONLY_PNBUF); + vp = nd.ni_vp; + if (fsflags & MNT_UPDATE) { + if ((vp->v_flag & VROOT) == 0) { + vput(vp); + error = EINVAL; + goto bad; + } + mp = vp->v_mount; + flag = mp->mnt_flag; + flag2 = mp->mnt_kern_flag; + /* + * We only allow the filesystem to be reloaded if it + * is currently mounted read-only. + */ + if ((fsflags & MNT_RELOAD) && + ((mp->mnt_flag & MNT_RDONLY) == 0)) { + vput(vp); + error = EOPNOTSUPP; /* Needs translation */ + goto bad; + } + /* + * Only root, or the user that did the original mount is + * permitted to update it. + */ + if (mp->mnt_stat.f_owner != td->td_proc->p_ucred->cr_uid) { + error = suser_td(td); + if (error) { + vput(vp); + goto bad; + } + } + if (vfs_busy(mp, LK_NOWAIT, 0, td)) { + vput(vp); + error = EBUSY; + goto bad; + } + mtx_lock(&vp->v_interlock); + if ((vp->v_flag & VMOUNT) != 0 || + vp->v_mountedhere != NULL) { + mtx_unlock(&vp->v_interlock); + vfs_unbusy(mp, td); + vput(vp); + error = EBUSY; + goto bad; + } + vp->v_flag |= VMOUNT; + mtx_unlock(&vp->v_interlock); + mp->mnt_flag |= fsflags & + (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT); + VOP_UNLOCK(vp, 0, td); + mp->mnt_optnew = optlist; + goto update; + } + /* + * If the user is not root, ensure that they own the directory + * onto which we are attempting to mount. + */ + error = VOP_GETATTR(vp, &va, td->td_proc->p_ucred, td); + if (error) { + vput(vp); + goto bad; + } + if (va.va_uid != td->td_proc->p_ucred->cr_uid) { + error = suser_td(td); + if (error) { + vput(vp); + goto bad; + } + } + if ((error = vinvalbuf(vp, V_SAVE, td->td_proc->p_ucred, td, 0, 0)) + != 0) { + vput(vp); + goto bad; + } + if (vp->v_type != VDIR) { + vput(vp); + error = ENOTDIR; + goto bad; + } + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) + if (!strcmp(vfsp->vfc_name, fstype)) + break; + if (vfsp == NULL) { + linker_file_t lf; + + /* Only load modules for root (very important!) */ + error = suser_td(td); + if (error) { + vput(vp); + goto bad; + } + error = securelevel_gt(td->td_ucred, 0); + if (error != 0) { + vput(vp); + goto bad; + } + error = linker_load_file(fstype, &lf); + if (error || lf == NULL) { + vput(vp); + if (lf == NULL) + error = ENODEV; + goto bad; + } + lf->userrefs++; + /* lookup again, see if the VFS was loaded */ + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) + if (!strcmp(vfsp->vfc_name, fstype)) + break; + if (vfsp == NULL) { + lf->userrefs--; + linker_file_unload(lf); + vput(vp); + error = ENODEV; + goto bad; + } + } + + mtx_lock(&vp->v_interlock); + if ((vp->v_flag & VMOUNT) != 0 || + vp->v_mountedhere != NULL) { + mtx_unlock(&vp->v_interlock); + vput(vp); + error = EBUSY; + goto bad; + } + vp->v_flag |= VMOUNT; + mtx_unlock(&vp->v_interlock); + + /* + * Allocate and initialize the filesystem. + */ + mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO); + TAILQ_INIT(&mp->mnt_nvnodelist); + TAILQ_INIT(&mp->mnt_reservedvnlist); + lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE); + vfs_busy(mp, LK_NOWAIT, 0, td); + mp->mnt_op = vfsp->vfc_vfsops; + mp->mnt_vfc = vfsp; + vfsp->vfc_refcount++; + mp->mnt_stat.f_type = vfsp->vfc_typenum; + mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; + strncpy(mp->mnt_stat.f_fstypename, fstype, MFSNAMELEN); + mp->mnt_stat.f_fstypename[MFSNAMELEN - 1] = '\0'; + mp->mnt_vnodecovered = vp; + mp->mnt_stat.f_owner = td->td_proc->p_ucred->cr_uid; + strncpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN); + mp->mnt_stat.f_mntonname[MNAMELEN - 1] = '\0'; + mp->mnt_iosize_max = DFLTPHYS; + VOP_UNLOCK(vp, 0, td); + + mp->mnt_opt = optlist; +update: + /* + * Check if the fs implements the new VFS_NMOUNT() + * function, since the new system call was used. + */ + if (mp->mnt_op->vfs_mount != NULL) { + printf("%s doesn't support the new mount syscall\n", + mp->mnt_vfc->vfc_name); + vput(vp); + error = EOPNOTSUPP; + goto bad; + } + + /* + * Set the mount level flags. + */ + if (fsflags & MNT_RDONLY) + mp->mnt_flag |= MNT_RDONLY; + else if (mp->mnt_flag & MNT_RDONLY) + mp->mnt_kern_flag |= MNTK_WANTRDWR; + mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | + MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME | + MNT_NOSYMFOLLOW | MNT_IGNORE | + MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); + mp->mnt_flag |= fsflags & (MNT_NOSUID | MNT_NOEXEC | + MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE | + MNT_NOSYMFOLLOW | MNT_IGNORE | + MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); + /* + * Mount the filesystem. + * XXX The final recipients of VFS_MOUNT just overwrite the ndp they + * get. No freeing of cn_pnbuf. + */ + error = VFS_NMOUNT(mp, &nd, td); + if (mp->mnt_flag & MNT_UPDATE) { + if (mp->mnt_kern_flag & MNTK_WANTRDWR) + mp->mnt_flag &= ~MNT_RDONLY; + mp->mnt_flag &=~ + (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT); + mp->mnt_kern_flag &=~ MNTK_WANTRDWR; + if (error) { + mp->mnt_flag = flag; + mp->mnt_kern_flag = flag2; + vfs_freeopts(mp->mnt_optnew); + } else { + vfs_freeopts(mp->mnt_opt); + mp->mnt_opt = mp->mnt_optnew; + } + if ((mp->mnt_flag & MNT_RDONLY) == 0) { + if (mp->mnt_syncer == NULL) + error = vfs_allocate_syncvnode(mp); + } else { + if (mp->mnt_syncer != NULL) + vrele(mp->mnt_syncer); + mp->mnt_syncer = NULL; + } + vfs_unbusy(mp, td); + mtx_lock(&vp->v_interlock); + vp->v_flag &= ~VMOUNT; + mtx_unlock(&vp->v_interlock); + vrele(vp); + return (error); + } + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); + /* + * Put the new filesystem on the mount list after root. + */ + cache_purge(vp); + if (!error) { + struct vnode *newdp; + + mtx_lock(&vp->v_interlock); + vp->v_flag &= ~VMOUNT; + vp->v_mountedhere = mp; + mtx_unlock(&vp->v_interlock); + mtx_lock(&mountlist_mtx); + TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); + mtx_unlock(&mountlist_mtx); + if (VFS_ROOT(mp, &newdp)) + panic("mount: lost mount"); + checkdirs(vp, newdp); + vput(newdp); + VOP_UNLOCK(vp, 0, td); + if ((mp->mnt_flag & MNT_RDONLY) == 0) + error = vfs_allocate_syncvnode(mp); + vfs_unbusy(mp, td); + if ((error = VFS_START(mp, 0, td)) != 0) { + vrele(vp); + goto bad; + } + } else { + mtx_lock(&vp->v_interlock); + vp->v_flag &= ~VMOUNT; + mtx_unlock(&vp->v_interlock); + mp->mnt_vfc->vfc_refcount--; + vfs_unbusy(mp, td); + free((caddr_t)mp, M_MOUNT); + vput(vp); + goto bad; + } + return (0); +bad: + vfs_freeopts(optlist); + return (error); +} + +/* + * Old Mount API */ #ifndef _SYS_SYSPROTO_H_ struct mount_args { @@ -364,6 +901,17 @@ vfs_mount(td, fstype, fspath, fsflags, fsdata) VOP_UNLOCK(vp, 0, td); update: /* + * Check if the fs implements the old VFS_MOUNT() + * function, since the old system call was used. + */ + if (mp->mnt_op->vfs_mount == NULL) { + printf("%s doesn't support the old mount syscall\n", + mp->mnt_vfc->vfc_name); + vput(vp); + return (EOPNOTSUPP); + } + + /* * Set the mount level flags. */ if (fsflags & MNT_RDONLY) @@ -638,6 +1186,8 @@ dounmount(mp, flags, td) vrele(coveredvp); if (mp->mnt_kern_flag & MNTK_MWAIT) wakeup((caddr_t)mp); + if (mp->mnt_op->vfs_mount == NULL) + vfs_freeopts(mp->mnt_opt); free((caddr_t)mp, M_MOUNT); return (0); } diff --git a/sys/kern/vfs_subr.c b/sys/kern/vfs_subr.c index ca6fa9c..e3290a1 100644 --- a/sys/kern/vfs_subr.c +++ b/sys/kern/vfs_subr.c @@ -507,6 +507,83 @@ vfs_timestamp(tsp) } /* + * Get a mount option by its name. + * + * Return 0 if the option was found. + * Return ENOENT if the option wasn't found. + * If len is a non-NULL pointer and *len + * a integer different from 0, then the size + * of the option will be compared with *len and + * if they doesn't match, EINVAL is returned. + * If len is non-NULL and *len == 0, it will + * be filled with the length of the option. + * Finally, if buf is non-NULL, it will be + * filled with the address of the option. + */ +int +vfs_getopt(opts, name, buf, len) + struct vfsoptlist *opts; + const char *name; + void **buf; + int *len; +{ + struct vfsopt *opt; + int i; + + i = 0; + opt = opts->opt; + while (i++ < opts->optcnt) { + if (strcmp(name, opt->name) == 0) { + if (len != NULL) { + if ((*len != 0) && (*len != opt->len)) + return (EINVAL); + *len = opt->len; + } + if (buf != NULL) + *buf = opt->value; + return (0); + } + opt++; + } + return (ENOENT); +} + +/* + * Find and copy a mount option. + * The size of the buffer has to be specified + * in len, if it is not big enough, EINVAL is + * returned. Returns ENOENT if the option is + * not found. Otherwise, the number of bytes + * actually copied are put in done if it's + * non-NULL and 0 is returned. + */ +int +vfs_copyopt(opts, name, dest, len, done) + struct vfsoptlist *opts; + const char *name; + void *dest; + int len, *done; +{ + struct vfsopt *opt; + int i; + + i = 0; + opt = opts->opt; + while (i++ < opts->optcnt) { + if (strcmp(name, opt->name) == 0) { + if (len < opt->len) + return (EINVAL); + bcopy(dest, opt->value, opt->len); + if (done != NULL) + *done = opt->len; + return (0); + } + opt++; + } + return (ENOENT); +} + +/* * Set vnode attributes to VNOVAL */ void diff --git a/sys/kern/vfs_syscalls.c b/sys/kern/vfs_syscalls.c index 77eece4..bb29491 100644 --- a/sys/kern/vfs_syscalls.c +++ b/sys/kern/vfs_syscalls.c @@ -69,6 +69,7 @@ #include <sys/sysctl.h> #include <machine/limits.h> +#include <machine/stdarg.h> #include <vm/vm.h> #include <vm/vm_object.h> @@ -86,6 +87,8 @@ static int setutimes(struct thread *td, struct vnode *, const struct timespec *, int); static int vn_access(struct vnode *vp, int user_flags, struct ucred *cred, struct thread *td); +static void vfs_freeopts(struct vfsoptlist *opt); +static int vfs_nmount(struct thread *td, int, struct uio *); static int usermount = 0; /* if 1, non-root can mount fs. */ @@ -102,7 +105,7 @@ struct nmount_args { struct iovec *iovp; unsigned int iovcnt; int flags; - }; +}; #endif /* ARGSUSED */ int @@ -114,12 +117,546 @@ nmount(td, uap) syscallarg(int) flags; } */ *uap; { + struct uio auio; + struct iovec *iov, *needfree; + struct iovec aiov[UIO_SMALLIOV]; + long error, i; + u_int iovlen, iovcnt; + + iovcnt = SCARG(uap, iovcnt); + iovlen = iovcnt * sizeof (struct iovec); + /* + * Check that we have an even number of iovec's + * and that we have at least two options. + */ + if ((iovcnt & 1) || (iovcnt < 4) || (iovcnt > UIO_MAXIOV)) + return (EINVAL); - return(EOPNOTSUPP); + if (iovcnt > UIO_SMALLIOV) { + MALLOC(iov, struct iovec *, iovlen, M_IOV, M_WAITOK); + needfree = iov; + } else { + iov = aiov; + needfree = NULL; + } + auio.uio_iov = iov; + auio.uio_iovcnt = iovcnt; + auio.uio_rw = UIO_WRITE; + auio.uio_segflg = UIO_USERSPACE; + auio.uio_td = td; + auio.uio_offset = 0; + auio.uio_resid = 0; + if ((error = copyin((caddr_t)uap->iovp, (caddr_t)iov, iovlen))) + goto finish; + for (i = 0; i < iovcnt; i++) { + if (iov->iov_len > INT_MAX - auio.uio_resid) { + error = EINVAL; + goto finish; + } + auio.uio_resid += iov->iov_len; + iov++; + } + error = vfs_nmount(td, SCARG(uap, flags), &auio); +finish: + if (needfree != NULL) + free(needfree, M_TEMP); + return (error); } /* - * Mount a file system. + * Release all resources related to the + * mount options. + */ +static void +vfs_freeopts(struct vfsoptlist *opt) +{ + free(opt->opt, M_MOUNT); + free(opt->optbuf, M_MOUNT); + free(opt, M_MOUNT); +} + +int +kernel_mount(iovp, iovcnt, flags) + struct iovec *iovp; + unsigned int iovcnt; + int flags; +{ + struct uio auio; + struct iovec *iov; + int error, i; + + /* + * Check that we have an even number of iovec's + * and that we have at least two options. + */ + if ((iovcnt & 1) || (iovcnt < 4)) + return (EINVAL); + + auio.uio_iov = iovp; + auio.uio_iovcnt = iovcnt; + auio.uio_rw = UIO_WRITE; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_offset = 0; + auio.uio_td = NULL; + auio.uio_resid = 0; + iov = iovp; + for (i = 0; i < iovcnt; i++) { + if (iov->iov_len > INT_MAX - auio.uio_resid) { + return (EINVAL); + } + auio.uio_resid += iov->iov_len; + iov++; + } + + error = vfs_nmount(curthread, flags, &auio); + return (error); +} + +int +kernel_vmount(int flags, ...) +{ + struct iovec *iovp, *iov; + struct uio auio; + va_list ap; + unsigned int iovcnt, iovlen, len, optcnt; + const char *opt; + char *sep, *buf, *pos; + int error, i; + + len = 0; + va_start(ap, flags); + for (optcnt = 0; (opt = va_arg(ap, const char *)) != NULL; optcnt++) + len += strlen(opt) + 1; + va_end(ap); + + if (optcnt < 2) + return (EINVAL); + + iovcnt = optcnt << 1; + iovlen = iovcnt * sizeof (struct iovec); + MALLOC(iovp, struct iovec *, iovlen, M_MOUNT, M_WAITOK); + MALLOC(buf, char *, len, M_MOUNT, M_WAITOK); + pos = buf; + va_start(ap, flags); + for (i = 0; i < optcnt; i++) { + opt = va_arg(ap, const char *); + strcpy(pos, opt); + sep = index(pos, '='); + if (sep == NULL) { + FREE(iovp, M_MOUNT); + FREE(buf, M_MOUNT); + return (EINVAL); + } + *sep = '\0'; + iov = iovp + i * 2; + iov->iov_base = pos; + iov->iov_len = sep - pos + 1; + pos = sep + 1; + iov++; + iov->iov_base = pos; + iovlen = strlen(pos) + 1; + iov->iov_len = iovlen; + pos += iovlen; + } + va_end(ap); + + auio.uio_iov = iovp; + auio.uio_iovcnt = iovcnt; + auio.uio_rw = UIO_WRITE; + auio.uio_segflg = UIO_SYSSPACE; + auio.uio_offset = 0; + auio.uio_td = NULL; + auio.uio_resid = len; + + error = vfs_nmount(curthread, flags, &auio); + FREE(iovp, M_MOUNT); + FREE(buf, M_MOUNT); + return (error); +} + +/* + * vfs_nmount(): actually attempt a filesystem mount. + */ +static int +vfs_nmount(td, fsflags, fsoptions) + struct thread *td; + int fsflags; /* Flags common to all filesystems */ + struct uio *fsoptions; /* Options local to the filesystem */ +{ + struct vnode *vp; + struct mount *mp; + struct vfsconf *vfsp; + struct iovec *cur; + struct vfsoptlist *optlist; + struct vfsopt *opt; + char *buf, *fstype, *fspath; + int error, flag = 0, flag2 = 0, i, len, optcnt; + int offset, iovcnt, fstypelen, fspathlen; + struct vattr va; + struct nameidata nd; + + /* + * Allocate memory to hold the vfsopt structures. + */ + iovcnt = fsoptions->uio_iovcnt; + optcnt = iovcnt >> 1; + opt = malloc(sizeof (struct vfsopt) * optcnt, + M_MOUNT, M_WAITOK | M_ZERO); + + /* + * Count the size of the buffer for options, + * allocate it, and fill in the vfsopt structures. + */ + cur = fsoptions->uio_iov; + len = fsoptions->uio_resid; + buf = malloc(len, M_TEMP, M_WAITOK | M_ZERO); + + optlist = malloc(sizeof (struct vfsoptlist), M_MOUNT, M_WAITOK); + optlist->opt = opt; + optlist->optbuf = buf; + optlist->optcnt = optcnt; + + offset = i = 0; + cur = fsoptions->uio_iov; + while (i < optcnt) { + opt[i].name = buf + offset; + /* Ensure the name of an option is a string */ + if (opt[i].name[cur->iov_len - 1] != '\0') { + error = EINVAL; + goto bad; + } + offset += cur->iov_len; + cur++; + opt[i].len = cur->iov_len; + /* + * Prevent consumers from trying to + * read the value of a 0 length option + * by setting it to NULL. + */ + if (opt[i].len == 0) + opt[i].value = NULL; + else + opt[i].value = buf + offset; + offset += cur->iov_len; + cur++; i++; + } + + if ((error = uiomove(buf, len, fsoptions)) != 0) + goto bad; + + /* + * We need these two options before the others, + * and they are mandatory for any filesystem. + * Ensure they are NULL terminated as well. + */ + fstypelen = 0; + error = vfs_getopt(optlist, "fstype", (void **)&fstype, &fstypelen); + if ((error != 0) || (fstype[fstypelen - 1] != '\0')) { + error = EINVAL; + goto bad; + } + fspathlen = 0; + error = vfs_getopt(optlist, "fspath", (void **)&fspath, &fspathlen); + if ((error != 0) || (fspath[fspathlen - 1] != '\0')) { + error = EINVAL; + goto bad; + } + + /* + * Be ultra-paranoid about making sure the type and fspath + * variables will fit in our mp buffers, including the + * terminating NUL. + */ + if ((fstypelen >= MFSNAMELEN - 1) || + (fspathlen >= MNAMELEN - 1)) { + error = ENAMETOOLONG; + goto bad; + } + + if (usermount == 0) { + error = suser_td(td); + if (error) + goto bad; + } + /* + * Do not allow NFS export by non-root users. + */ + if (fsflags & MNT_EXPORTED) { + error = suser_td(td); + if (error) + goto bad; + } + /* + * Silently enforce MNT_NOSUID and MNT_NODEV for non-root users + */ + if (suser_xxx(td->td_proc->p_ucred, 0, 0)) + fsflags |= MNT_NOSUID | MNT_NODEV; + /* + * Get vnode to be covered + */ + NDINIT(&nd, LOOKUP, FOLLOW | LOCKLEAF, UIO_SYSSPACE, fspath, td); + if ((error = namei(&nd)) != 0) + goto bad; + NDFREE(&nd, NDF_ONLY_PNBUF); + vp = nd.ni_vp; + if (fsflags & MNT_UPDATE) { + if ((vp->v_flag & VROOT) == 0) { + vput(vp); + error = EINVAL; + goto bad; + } + mp = vp->v_mount; + flag = mp->mnt_flag; + flag2 = mp->mnt_kern_flag; + /* + * We only allow the filesystem to be reloaded if it + * is currently mounted read-only. + */ + if ((fsflags & MNT_RELOAD) && + ((mp->mnt_flag & MNT_RDONLY) == 0)) { + vput(vp); + error = EOPNOTSUPP; /* Needs translation */ + goto bad; + } + /* + * Only root, or the user that did the original mount is + * permitted to update it. + */ + if (mp->mnt_stat.f_owner != td->td_proc->p_ucred->cr_uid) { + error = suser_td(td); + if (error) { + vput(vp); + goto bad; + } + } + if (vfs_busy(mp, LK_NOWAIT, 0, td)) { + vput(vp); + error = EBUSY; + goto bad; + } + mtx_lock(&vp->v_interlock); + if ((vp->v_flag & VMOUNT) != 0 || + vp->v_mountedhere != NULL) { + mtx_unlock(&vp->v_interlock); + vfs_unbusy(mp, td); + vput(vp); + error = EBUSY; + goto bad; + } + vp->v_flag |= VMOUNT; + mtx_unlock(&vp->v_interlock); + mp->mnt_flag |= fsflags & + (MNT_RELOAD | MNT_FORCE | MNT_UPDATE | MNT_SNAPSHOT); + VOP_UNLOCK(vp, 0, td); + mp->mnt_optnew = optlist; + goto update; + } + /* + * If the user is not root, ensure that they own the directory + * onto which we are attempting to mount. + */ + error = VOP_GETATTR(vp, &va, td->td_proc->p_ucred, td); + if (error) { + vput(vp); + goto bad; + } + if (va.va_uid != td->td_proc->p_ucred->cr_uid) { + error = suser_td(td); + if (error) { + vput(vp); + goto bad; + } + } + if ((error = vinvalbuf(vp, V_SAVE, td->td_proc->p_ucred, td, 0, 0)) + != 0) { + vput(vp); + goto bad; + } + if (vp->v_type != VDIR) { + vput(vp); + error = ENOTDIR; + goto bad; + } + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) + if (!strcmp(vfsp->vfc_name, fstype)) + break; + if (vfsp == NULL) { + linker_file_t lf; + + /* Only load modules for root (very important!) */ + error = suser_td(td); + if (error) { + vput(vp); + goto bad; + } + error = securelevel_gt(td->td_ucred, 0); + if (error != 0) { + vput(vp); + goto bad; + } + error = linker_load_file(fstype, &lf); + if (error || lf == NULL) { + vput(vp); + if (lf == NULL) + error = ENODEV; + goto bad; + } + lf->userrefs++; + /* lookup again, see if the VFS was loaded */ + for (vfsp = vfsconf; vfsp; vfsp = vfsp->vfc_next) + if (!strcmp(vfsp->vfc_name, fstype)) + break; + if (vfsp == NULL) { + lf->userrefs--; + linker_file_unload(lf); + vput(vp); + error = ENODEV; + goto bad; + } + } + + mtx_lock(&vp->v_interlock); + if ((vp->v_flag & VMOUNT) != 0 || + vp->v_mountedhere != NULL) { + mtx_unlock(&vp->v_interlock); + vput(vp); + error = EBUSY; + goto bad; + } + vp->v_flag |= VMOUNT; + mtx_unlock(&vp->v_interlock); + + /* + * Allocate and initialize the filesystem. + */ + mp = malloc(sizeof(struct mount), M_MOUNT, M_WAITOK | M_ZERO); + TAILQ_INIT(&mp->mnt_nvnodelist); + TAILQ_INIT(&mp->mnt_reservedvnlist); + lockinit(&mp->mnt_lock, PVFS, "vfslock", 0, LK_NOPAUSE); + vfs_busy(mp, LK_NOWAIT, 0, td); + mp->mnt_op = vfsp->vfc_vfsops; + mp->mnt_vfc = vfsp; + vfsp->vfc_refcount++; + mp->mnt_stat.f_type = vfsp->vfc_typenum; + mp->mnt_flag |= vfsp->vfc_flags & MNT_VISFLAGMASK; + strncpy(mp->mnt_stat.f_fstypename, fstype, MFSNAMELEN); + mp->mnt_stat.f_fstypename[MFSNAMELEN - 1] = '\0'; + mp->mnt_vnodecovered = vp; + mp->mnt_stat.f_owner = td->td_proc->p_ucred->cr_uid; + strncpy(mp->mnt_stat.f_mntonname, fspath, MNAMELEN); + mp->mnt_stat.f_mntonname[MNAMELEN - 1] = '\0'; + mp->mnt_iosize_max = DFLTPHYS; + VOP_UNLOCK(vp, 0, td); + + mp->mnt_opt = optlist; +update: + /* + * Check if the fs implements the new VFS_NMOUNT() + * function, since the new system call was used. + */ + if (mp->mnt_op->vfs_mount != NULL) { + printf("%s doesn't support the new mount syscall\n", + mp->mnt_vfc->vfc_name); + vput(vp); + error = EOPNOTSUPP; + goto bad; + } + + /* + * Set the mount level flags. + */ + if (fsflags & MNT_RDONLY) + mp->mnt_flag |= MNT_RDONLY; + else if (mp->mnt_flag & MNT_RDONLY) + mp->mnt_kern_flag |= MNTK_WANTRDWR; + mp->mnt_flag &=~ (MNT_NOSUID | MNT_NOEXEC | MNT_NODEV | + MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_NOATIME | + MNT_NOSYMFOLLOW | MNT_IGNORE | + MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); + mp->mnt_flag |= fsflags & (MNT_NOSUID | MNT_NOEXEC | + MNT_NODEV | MNT_SYNCHRONOUS | MNT_UNION | MNT_ASYNC | MNT_FORCE | + MNT_NOSYMFOLLOW | MNT_IGNORE | + MNT_NOATIME | MNT_NOCLUSTERR | MNT_NOCLUSTERW | MNT_SUIDDIR); + /* + * Mount the filesystem. + * XXX The final recipients of VFS_MOUNT just overwrite the ndp they + * get. No freeing of cn_pnbuf. + */ + error = VFS_NMOUNT(mp, &nd, td); + if (mp->mnt_flag & MNT_UPDATE) { + if (mp->mnt_kern_flag & MNTK_WANTRDWR) + mp->mnt_flag &= ~MNT_RDONLY; + mp->mnt_flag &=~ + (MNT_UPDATE | MNT_RELOAD | MNT_FORCE | MNT_SNAPSHOT); + mp->mnt_kern_flag &=~ MNTK_WANTRDWR; + if (error) { + mp->mnt_flag = flag; + mp->mnt_kern_flag = flag2; + vfs_freeopts(mp->mnt_optnew); + } else { + vfs_freeopts(mp->mnt_opt); + mp->mnt_opt = mp->mnt_optnew; + } + if ((mp->mnt_flag & MNT_RDONLY) == 0) { + if (mp->mnt_syncer == NULL) + error = vfs_allocate_syncvnode(mp); + } else { + if (mp->mnt_syncer != NULL) + vrele(mp->mnt_syncer); + mp->mnt_syncer = NULL; + } + vfs_unbusy(mp, td); + mtx_lock(&vp->v_interlock); + vp->v_flag &= ~VMOUNT; + mtx_unlock(&vp->v_interlock); + vrele(vp); + return (error); + } + vn_lock(vp, LK_EXCLUSIVE | LK_RETRY, td); + /* + * Put the new filesystem on the mount list after root. + */ + cache_purge(vp); + if (!error) { + struct vnode *newdp; + + mtx_lock(&vp->v_interlock); + vp->v_flag &= ~VMOUNT; + vp->v_mountedhere = mp; + mtx_unlock(&vp->v_interlock); + mtx_lock(&mountlist_mtx); + TAILQ_INSERT_TAIL(&mountlist, mp, mnt_list); + mtx_unlock(&mountlist_mtx); + if (VFS_ROOT(mp, &newdp)) + panic("mount: lost mount"); + checkdirs(vp, newdp); + vput(newdp); + VOP_UNLOCK(vp, 0, td); + if ((mp->mnt_flag & MNT_RDONLY) == 0) + error = vfs_allocate_syncvnode(mp); + vfs_unbusy(mp, td); + if ((error = VFS_START(mp, 0, td)) != 0) { + vrele(vp); + goto bad; + } + } else { + mtx_lock(&vp->v_interlock); + vp->v_flag &= ~VMOUNT; + mtx_unlock(&vp->v_interlock); + mp->mnt_vfc->vfc_refcount--; + vfs_unbusy(mp, td); + free((caddr_t)mp, M_MOUNT); + vput(vp); + goto bad; + } + return (0); +bad: + vfs_freeopts(optlist); + return (error); +} + +/* + * Old Mount API */ #ifndef _SYS_SYSPROTO_H_ struct mount_args { @@ -364,6 +901,17 @@ vfs_mount(td, fstype, fspath, fsflags, fsdata) VOP_UNLOCK(vp, 0, td); update: /* + * Check if the fs implements the old VFS_MOUNT() + * function, since the old system call was used. + */ + if (mp->mnt_op->vfs_mount == NULL) { + printf("%s doesn't support the old mount syscall\n", + mp->mnt_vfc->vfc_name); + vput(vp); + return (EOPNOTSUPP); + } + + /* * Set the mount level flags. */ if (fsflags & MNT_RDONLY) @@ -638,6 +1186,8 @@ dounmount(mp, flags, td) vrele(coveredvp); if (mp->mnt_kern_flag & MNTK_MWAIT) wakeup((caddr_t)mp); + if (mp->mnt_op->vfs_mount == NULL) + vfs_freeopts(mp->mnt_opt); free((caddr_t)mp, M_MOUNT); return (0); } diff --git a/sys/sys/mount.h b/sys/sys/mount.h index eea3a22..858a451 100644 --- a/sys/sys/mount.h +++ b/sys/sys/mount.h @@ -45,6 +45,7 @@ #include <sys/ucred.h> #include <sys/queue.h> +#include <sys/uio.h> #ifdef _KERNEL #include <sys/_lock.h> #include <sys/_mutex.h> @@ -119,6 +120,18 @@ struct statfs { */ TAILQ_HEAD(vnodelst, vnode); +struct vfsoptlist { + struct vfsopt *opt; + unsigned int optcnt; + char *optbuf; +}; + +struct vfsopt { + char *name; + void *value; + int len; +}; + struct mount { TAILQ_ENTRY(mount) mnt_list; /* mount list */ struct vfsops *mnt_op; /* operations on fs */ @@ -130,6 +143,8 @@ struct mount { struct lock mnt_lock; /* mount structure lock */ int mnt_writeopcount; /* write syscalls in progress */ int mnt_flag; /* flags shared with user */ + struct vfsoptlist *mnt_opt; /* mount options */ + struct vfsoptlist *mnt_optnew; /* for MNT_UPDATE */ int mnt_kern_flag; /* kernel only flags */ int mnt_maxsymlinklen; /* max size of short symlink */ struct statfs mnt_stat; /* cache of filesystem stats */ @@ -369,8 +384,12 @@ struct vfsops { int (*vfs_extattrctl)(struct mount *mp, int cmd, struct vnode *filename_vp, int attrnamespace, const char *attrname, struct thread *td); + /* additions below are not binary compatible with 5.0 and below */ + int (*vfs_nmount)(struct mount *mp, struct nameidata *ndp, + struct thread *td); }; +#define VFS_NMOUNT(MP, NDP, P) (*(MP)->mnt_op->vfs_nmount)(MP, NDP, P) #define VFS_MOUNT(MP, PATH, DATA, NDP, P) \ (*(MP)->mnt_op->vfs_mount)(MP, PATH, DATA, NDP, P) #define VFS_START(MP, FLAGS, P) (*(MP)->mnt_op->vfs_start)(MP, FLAGS, P) @@ -412,6 +431,10 @@ extern char *mountrootfsname; * exported vnode operations */ int dounmount(struct mount *, int, struct thread *td); +int kernel_mount(struct iovec *iovp, unsigned int iovcnt, int flags); +int kernel_vmount(int flags, ...); +int vfs_getopt(struct vfsoptlist *, const char *, void **, int *); +int vfs_copyopt(struct vfsoptlist *, const char *, void *, int, int *); int vfs_mount(struct thread *td, const char *type, char *path, int flags, void *data); int vfs_setpublicfs /* set publicly exported fs */ @@ -483,6 +506,7 @@ int fstatfs(int, struct statfs *); int getfh(const char *, fhandle_t *); int getfsstat(struct statfs *, long, int); int getmntinfo(struct statfs **, int); +int nmount(const char *, const char *, int, struct iovec *, int); int mount(const char *, const char *, int, void *); int statfs(const char *, struct statfs *); int unmount(const char *, int); |