diff options
-rw-r--r-- | UPDATING | 3 | ||||
-rw-r--r-- | sbin/Makefile | 1 | ||||
-rw-r--r-- | sbin/mount_fusefs/Makefile | 33 | ||||
-rw-r--r-- | sbin/mount_fusefs/mount_fusefs.8 | 363 | ||||
-rw-r--r-- | sbin/mount_fusefs/mount_fusefs.c | 504 | ||||
-rw-r--r-- | sys/conf/NOTES | 1 | ||||
-rw-r--r-- | sys/conf/files | 9 | ||||
-rw-r--r-- | sys/conf/options | 1 | ||||
-rw-r--r-- | sys/fs/fuse/fuse.h | 223 | ||||
-rw-r--r-- | sys/fs/fuse/fuse_debug.h | 79 | ||||
-rw-r--r-- | sys/fs/fuse/fuse_device.c | 447 | ||||
-rw-r--r-- | sys/fs/fuse/fuse_file.c | 260 | ||||
-rw-r--r-- | sys/fs/fuse/fuse_file.h | 153 | ||||
-rw-r--r-- | sys/fs/fuse/fuse_internal.c | 696 | ||||
-rw-r--r-- | sys/fs/fuse/fuse_internal.h | 393 | ||||
-rw-r--r-- | sys/fs/fuse/fuse_io.c | 811 | ||||
-rw-r--r-- | sys/fs/fuse/fuse_io.h | 67 | ||||
-rw-r--r-- | sys/fs/fuse/fuse_ipc.c | 904 | ||||
-rw-r--r-- | sys/fs/fuse/fuse_ipc.h | 423 | ||||
-rw-r--r-- | sys/fs/fuse/fuse_kernel.h | 373 | ||||
-rw-r--r-- | sys/fs/fuse/fuse_main.c | 162 | ||||
-rw-r--r-- | sys/fs/fuse/fuse_node.c | 384 | ||||
-rw-r--r-- | sys/fs/fuse/fuse_node.h | 142 | ||||
-rw-r--r-- | sys/fs/fuse/fuse_param.h | 80 | ||||
-rw-r--r-- | sys/fs/fuse/fuse_vfsops.c | 533 | ||||
-rw-r--r-- | sys/fs/fuse/fuse_vnops.c | 2037 | ||||
-rw-r--r-- | sys/modules/Makefile | 1 | ||||
-rw-r--r-- | sys/modules/fuse/Makefile | 10 |
28 files changed, 9093 insertions, 0 deletions
@@ -24,6 +24,9 @@ NOTE TO PEOPLE WHO THINK THAT FreeBSD 10.x IS SLOW: disable the most expensive debugging functionality run "ln -s 'abort:false,junk:false' /etc/malloc.conf".) +20121014: + Import the FUSE kernel and userland support into base system. + 20121013: The GNU sort(1) program has been removed since the BSD-licensed sort(1) has been the default for quite some time and no serious diff --git a/sbin/Makefile b/sbin/Makefile index fcdb567..29a5f16 100644 --- a/sbin/Makefile +++ b/sbin/Makefile @@ -49,6 +49,7 @@ SUBDIR=adjkerntz \ mksnap_ffs \ mount \ mount_cd9660 \ + mount_fusefs \ mount_msdosfs \ mount_nfs \ mount_ntfs \ diff --git a/sbin/mount_fusefs/Makefile b/sbin/mount_fusefs/Makefile new file mode 100644 index 0000000..0af8a80 --- /dev/null +++ b/sbin/mount_fusefs/Makefile @@ -0,0 +1,33 @@ +# $FreeBSD$ + +.if defined(DEBUG) +DEBUG_FLAGS+= -D_DEBUG -g +.endif + +.if defined(DEBUG2G) +DEBUG_FLAGS+= -D_DEBUG2G -g +.endif + +.if defined(DEBUG3G) +DEBUG_FLAGS+= -D_DEBUG3G -g +.endif + +.if defined(DEBUG_MSG) +DEBUG_FLAGS+= -D_DEBUG_MSG +.endif + +.if defined(F4BVERS) +DEBUG_FLAGS+= -DFUSE4BSD_VERSION="\"${F4BVERS}\"" +.endif + +PROG= mount_fusefs +SRCS= mount_fusefs.c getmntopts.c +MAN8= mount_fusefs.8 +NO_MANCOMPRESS?= yes + +MOUNT= ${.CURDIR}/../mount +CFLAGS+= -I${MOUNT} + +.PATH: ${MOUNT} + +.include <bsd.prog.mk> diff --git a/sbin/mount_fusefs/mount_fusefs.8 b/sbin/mount_fusefs/mount_fusefs.8 new file mode 100644 index 0000000..a6a1b2c --- /dev/null +++ b/sbin/mount_fusefs/mount_fusefs.8 @@ -0,0 +1,363 @@ +.\" Copyright (c) 1980, 1989, 1991, 1993 +.\" The Regents of the University of California. +.\" Copyright (c) 2005, 2006 Csaba Henk +.\" All rights reserved. +.\" +.\" Redistribution and use in source and binary forms, with or without +.\" modification, are permitted provided that the following conditions +.\" are met: +.\" 1. Redistributions of source code must retain the above copyright +.\" notice, this list of conditions and the following disclaimer. +.\" 2. Redistributions in binary form must reproduce the above copyright +.\" notice, this list of conditions and the following disclaimer in the +.\" documentation and/or other materials provided with the distribution. +.\" 4. Neither the name of the University nor the names of its contributors +.\" may be used to endorse or promote products derived from this software +.\" without specific prior written permission. +.\" +.\" THIS SOFTWARE IS PROVIDED BY THE REGENTS AND CONTRIBUTORS ``AS IS'' AND +.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE +.\" ARE DISCLAIMED. IN NO EVENT SHALL THE REGENTS OR CONTRIBUTORS BE LIABLE +.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL +.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS +.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) +.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT +.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY +.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF +.\" SUCH DAMAGE. +.\" +.\" $FreeBSD$ +.\" +.Dd October 11, 2012 +.Dt MOUNT_FUSEFS 8 +.Os +.Sh NAME +.Nm mount_fusefs +.Nd mount a Fuse file system daemon +.Sh SYNOPSIS +.Nm +.Op Fl A +.Op Fl S +.Op Fl v +.Op Fl D Ar fuse_daemon +.Op Fl O Ar daemon_opts +.Op Fl s Ar special +.Op Fl m Ar node +.Op Fl h +.Op Fl V +.Op Fl o Ar option ... +.Ar special node +.Op Ar fuse_daemon ... +.Sh DESCRIPTION +Basic usage is to start a fuse daemon on the given +.Ar special +file. In practice, the daemon is assigned a +.Ar special +file automatically, which can then be indentified via +.Xr fstat 1 . +That special file can then be mounted by +.Nm . +.Pp +However, the procedure of spawning a daemon will usually be automated +so that it is performed by +.Nm . +If the command invoking a given +.Ar fuse_daemon +is appended to the list of arguments, +.Nm +will call the +.Ar fuse_daemon +via that command. In that way the +.Ar fuse_daemon +will be instructed to attach itself to +.Ar special . +From that on mounting goes as in the simple case. (See +.Sx DAEMON MOUNTS . ) +.Pp +The +.Ar special +argument normally will be treated as the path of the special file to mount. +.Pp +However, if +.Pa auto +is passed as +.Ar special , +then +.Nm +will look for a suitable free fuse device by itself. +.Pp +Finally, if +.Ar special +is an integer it will be interpreted as the number +of the file descriptor of an already open fuse device +(used when the Fuse library invokes +.Nm . +(See +.Sx DAEMON MOUNTS ) . +.Pp +The options are as follows: +.Bl -tag -width indent +.It Fl A , Ic --reject-allow_other +Prohibit the +.Cm allow_other +mount flag. Intended for use in scripts and the +.Xr sudoers 5 +file. +.It Fl S , Ic --safe +Run in safe mode (i.e. reject invoking a filesystem daemon) +.It Fl v +Be verbose +.It Fl D, Ic --daemon Ar daemon +Call the specified +.Ar daemon +.It Fl O, Ic --daemon_opts Ar opts +Add +.Ar opts +to the daemon's command line +.It Fl s, Ic --special Ar special +Use +.Ar special +as special +.It Fl m, Ic --mountpath Ar node +Mount on +.Ar node +.It Fl h, Ic --help +Show help +.It Fl V, Ic --version +Show version information +.It Fl o +Mount options are specified via +.Fl o . +The following options are available (and also their negated versions, +by prefixing them with +.Dq no ) : +.Bl -tag -width indent +.It Cm default_permissions +Enable traditional (file mode based) permission checking in kernel +.It Cm allow_other +Do not apply +.Sx STRICT ACCESS POLICY . +Only root can use this option +.It Cm max_read Ns = Ns Ar n +Limit size of read requests to +.Ar n +.It Cm private +Refuse shared mounting of the daemon. This is the default behaviour, +to allow sharing, expicitly use +.Fl o Cm noprivate +.It Cm neglect_shares +Don't refuse unmounting if there are secondary mounts +.It Cm push_symlinks_in +Prefix absolute symlinks with the mountpoint +.El +.Pp +.El +.Pp +Besides the above mount options, there is a set of pseudo-mount options which +are supported by the Fuse library. One can list these by passing +.Fl h +to a Fuse daemon. Most of these options have effect only on the behaviour of +the daemon (that is, their scope is limited to userspace). However, +there are some which do require in-kernel support. +Currently the options supported by the kernel are: +.Bl -tag -width indent +.It Cm direct_io +Bypass the buffer cache system +.It Cm kernel_cache +By default cached buffers of a given file are flushed at each +.Xr open 2 . +This option disables this behaviour +.El +.Sh DAEMON MOUNTS +Usually users don't need to use +.Nm +directly, as the Fuse library enables Fuse daemons to invoke +.Nm . +That is, +.Pp +.Dl fuse_daemon device mountpoint +.Pp +has the same effect as +.Pp +.Dl mount_fusefs auto mountpoint fuse_daemon +.Pp +This is the recommended usage when you want basic usage +(eg, run the daemon at a low privilege level but mount it as root). +.Sh STRICT ACCESS POLICY +The strict access policy for Fuse filesystems lets one to use the filesystem +only if the filesystem daemon has the same credentials (uid, real uid, gid, +real gid) as the user. +.Pp +This is applied for Fuse mounts by default and only root can mount without +the strict access policy (ie. the +.Cm allow_other +mount option). +.Pp +This is to shield users from the daemon +.Dq spying +on their I/O activities. +.Pp +Users might opt to willingly relax strict access policy (as far they +are concerned) by doing their own secondary mount (See +.Sx SHARED MOUNTS ) . +.Sh SHARED MOUNTS +A Fuse daemon can be shared (ie. mounted multiple times). +When doing the first (primary) mount, the spawner and the mounter of the daemon +must have the same uid, or the mounter should be the superuser. +.Pp +After the primary mount is in place, secondary mounts can be done by anyone +unless this feature is disabled by +.Cm private . +The behaviour of a secondary mount is analogous to that of symbolic +links: they redirect all filesystem operations to the primary mount. +.Pp +Doing a secondary mount is like signing an agreement: by this action, the mounter +agrees that the Fuse daemon can trace her I/O activities. From then on +she is not banned from using the filesystem (either via her own mount or +via the primary mount), regardless whether +.Cm allow_other +is used or not. +.Pp +The device name of a secondary mount is the device name of the corresponding +primary mount, followed by a '#' character and the index of the secondary +mount; e.g. +.Pa /dev/fuse0#3 . +.Sh SECURITY +System administrators might want to use a custom mount policy (ie., one going +beyond the +.Va vfs.usermount +sysctl). The primary tool for such purposes is +.Xr sudo 8 . +However, given that +.Nm +is capable of invoking an arbitrary program, one must be careful when doing this. +.Nm +is designed in a way such that it makes that easy. For this purpose, +there are options which disable certain risky features (ie. +.Fl S +and +.Fl A ) , +and command line parsing is done in a flexible way: mixing options and +non-options is allowed, but processing them stops at the third non-option +argument (after the first two has been utilized as device and mountpoint). +The rest of the command line specifies the daemon and its arguments. +(Alternatively, the daemon, the special and the mount path can be +specified using the respective options.) Note that +.Nm +ignores the environment variable +.Ev POSIXLY_CORRECT +and always behaves as described. +.Pp +In general, to be as scripting / +.Xr sudoers 5 +friendly as possible, no information has a fixed +position in the command line, but once a given piece of information is +provided, subsequent arguments/options cannot override it (with the +exception of some non-critical ones). +.Sh ENVIRONMENT +.Bl -tag -width ".Ev MOUNT_FUSEFS_SAFE" +.It Ev MOUNT_FUSEFS_SAFE +This has the same effect as the +.Fl S +option. +.It Ev MOUNT_FUSEFS_VERBOSE +This has the same effect as the +.Fl v +option. +.It Ev MOUNT_FUSEFS_IGNORE_UNKNOWN +If set, +.Nm +will ignore uknown mount options. +.It Ev MOUNT_FUSEFS_CALL_BY_LIB +Adjust behaviour to the needs of the FUSE library. Currently it effects +help output. +.El +.Pp +Although the following variables don't have any effect on +.Nm +itself, they affect the behaviour of fuse daemons: +.Bl -tag -width ".Ev FUSE_DEV_NAME" +.It Ev FUSE_DEV_NAME +Device to attach. If not set, the multiplexer path +.Ar /dev/fuse +is used. +.It Ev FUSE_DEV_FD +File desciptor of an opened Fuse device to use. Overrides +.Ev FUSE_DEV_NAME . +.It Ev FUSE_NO_MOUNT +If set, the library won't attempt to mount the filesystem, even +if a mountpoint argument is supplied. +.El +.Sh FILES +.Bl -tag -width /dev/fuse +.It Pa /dev/fuse +Fuse device with which the kernel and Fuse daemons can communicate. +.It Pa /dev/fuse +The multiplexer path. An +.Xr open 2 +performed on it automatically is passed to a free Fuse device by the kernel +(which might be created just for this puprose). +.El +.Sh EXAMPLES +Mount the example filesystem in the Fuse distribution (from its directory): +either +.Pp +.Dl ./fusexmp /mnt/fuse +.Pp +or +.Pp +.Dl mount_fusefs auto /mnt/fuse ./fusexmp +.Pp +Doing the same in two steps, using +.Pa /dev/fuse0 : +.Pp +.Dl FUSE_DEV_NAME=/dev/fuse ./fusexmp && +.Dl mount_fusefs /dev/fuse /mnt/fuse +.Pp +A script wrapper for fusexmp which ensures that +.Nm +doesn't call any external utility and also provides a hacky +(non race-free) automatic device selection: +.Pp +.Dl #!/bin/sh -e +.Pp +.Dl FUSE_DEV_NAME=/dev/fuse fusexmp +.Dl mount_fusefs -S /dev/fuse /mnt/fuse \(lq$@\(rq +.Sh SEE ALSO +.Xr fstat 1 , +.Xr mount 8 , +.Xr umount 8 , +.Xr sudo 8 +.Sh CAVEATS +Secondary mounts should be unmounted via their device name. If an attempt is +made to be unmount them via their filesystem root path, the unmount request +will be forwarded to the primary mount path. +In general, unmounting by device name is less error-prone than by mount path +(although the latter will also work under normal circumstances). +.Pp +If the daemon is specified via the +.Fl D +and +.Fl O +options, it will be invoked via +.Xr system 3 , +and the daemon's command line will also have an +.Dq & +control operator appended, so that we don't have to wait for its termination. +You should use a simple command line when invoking the daemon via these options. +.Sh HISTORY +.Nm +appears as the part of the FreeBSD implementation of the Fuse userspace filesystem +framework (see http://fuse.sourceforge.net). This user interface is FreeBSD specific. +.Sh BUGS +.Ar special +is treated as a multiplexer if and only if it is literally the same as +.Pa auto +or +.Pa /dev/fuse . +Other paths which are equivalent with +.Pa /dev/fuse +(eg., +.Pa /../dev/fuse ) +are not. diff --git a/sbin/mount_fusefs/mount_fusefs.c b/sbin/mount_fusefs/mount_fusefs.c new file mode 100644 index 0000000..71a022a --- /dev/null +++ b/sbin/mount_fusefs/mount_fusefs.c @@ -0,0 +1,504 @@ +/*- + * Copyright (c) 2005 Jean-Sebastien Pedron + * Copyright (c) 2005 Csaba Henk + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/mount.h> +#include <sys/uio.h> +#include <sys/stat.h> +#include <sys/sysctl.h> + +#include <err.h> +#include <stdio.h> +#include <stdlib.h> +#include <string.h> +#include <sysexits.h> +#include <unistd.h> +#include <fcntl.h> +#include <signal.h> +#include <getopt.h> +#include <libgen.h> +#include <limits.h> +#include <osreldate.h> +#include <paths.h> + +#include "mntopts.h" + +#ifndef FUSE4BSD_VERSION +#define FUSE4BSD_VERSION "0.3.9-pre1" +#endif + +void __usage_short(void); +void usage(void); +void helpmsg(void); +void showversion(void); +int init_backgrounded(void); + +struct mntopt mopts[] = { + #define ALTF_PRIVATE 0x01 + { "private", 0, ALTF_PRIVATE, 1 }, + { "neglect_shares", 0, 0x02, 1 }, + { "push_symlinks_in", 0, 0x04, 1 }, + { "allow_other", 0, 0x08, 1 }, + { "default_permissions", 0, 0x10, 1 }, + #define ALTF_MAXREAD 0x20 + { "max_read=", 0, ALTF_MAXREAD, 1 }, + #define ALTF_SUBTYPE 0x40 + { "subtype=", 0, ALTF_SUBTYPE, 1 }, + #define ALTF_SYNC_UNMOUNT 0x80 + { "sync_unmount", 0, ALTF_SYNC_UNMOUNT, 1 }, + /* Linux specific options, we silently ignore them */ + { "fsname=", 0, 0x00, 1 }, + { "fd=", 0, 0x00, 1 }, + { "rootmode=", 0, 0x00, 1 }, + { "user_id=", 0, 0x00, 1 }, + { "group_id=", 0, 0x00, 1 }, + { "large_read", 0, 0x00, 1 }, + /* "nonempty", just the first two chars are stripped off during parsing */ + { "nempty", 0, 0x00, 1 }, + MOPT_STDOPTS, + MOPT_END +}; + +struct mntval { + int mv_flag; + void *mv_value; + int mv_len; +}; + +struct mntval mvals[] = { + { ALTF_MAXREAD, NULL, 0 }, + { ALTF_SUBTYPE, NULL, 0 }, + { 0, NULL, 0 } +}; + +char *progname; + +#define DEFAULT_MOUNT_FLAGS ALTF_PRIVATE | ALTF_SYNC_UNMOUNT + +int +main(int argc, char *argv[]) +{ + struct iovec *iov; + char ch = '\0'; + int mntflags, iovlen, verbose = 0; + char *dev = NULL, *dir = NULL, mntpath[MAXPATHLEN]; + char *devo = NULL, *diro = NULL; + char ndev[128], fdstr[15]; + int i, done = 0, reject_allow_other = 0, safe_level = 0; + int altflags = DEFAULT_MOUNT_FLAGS; + int __altflags = DEFAULT_MOUNT_FLAGS; + struct mntopt *mo; + struct mntval *mv; + static struct option longopts[] = { + {"reject-allow_other", no_argument, NULL, 'A'}, + {"safe", no_argument, NULL, 'S'}, + {"daemon", required_argument, NULL, 'D'}, + {"daemon_opts", required_argument, NULL, 'O'}, + {"special", required_argument, NULL, 's'}, + {"mountpath", required_argument, NULL, 'm'}, + {"version", no_argument, NULL, 'V'}, + {"help", no_argument, NULL, 'h'}, + {0,0,0,0} + }; + int pid = 0; + int fd = -1, fdx; + char *ep; + char *daemon_str = NULL, *daemon_opts = NULL; + + progname = argv[0]; + + /* + * We want a parsing routine which is not sensitive to + * the position of args/opts; it should extract the + * first two args and stop at the beginning of the rest. + * (This makes it easier to call mount_fusefs from external + * utils than it is with a strict "util flags args" syntax.) + */ + + iov = NULL; + iovlen = 0; + mntflags = 0; + /* All in all, I feel it more robust this way... */ + unsetenv("POSIXLY_CORRECT"); + if (getenv("MOUNT_FUSEFS_IGNORE_UNKNOWN")) + getmnt_silent = 1; + if (getenv("MOUNT_FUSEFS_VERBOSE")) + verbose = 1; + + do { + for (i = 0; i < 3; i++) { + if (optind < argc && argv[optind][0] != '-') { + if (dir) { + done = 1; + break; + } + if (dev) + dir = argv[optind]; + else + dev = argv[optind]; + optind++; + } + } + switch(ch) { + case 'A': + reject_allow_other = 1; + break; + case 'S': + safe_level = 1; + break; + case 'D': + if (daemon_str) + errx(1, "daemon specified inconsistently"); + daemon_str = optarg; + break; + case 'O': + if (daemon_opts) + errx(1, "daemon opts specified inconsistently"); + daemon_opts = optarg; + break; + case 'o': + getmntopts(optarg, mopts, &mntflags, &altflags); + for (mv = mvals; mv->mv_flag; ++mv) { + if (! (altflags & mv->mv_flag)) + continue; + for (mo = mopts; mo->m_flag; ++mo) { + char *p, *q; + + if (mo->m_flag != mv->mv_flag) + continue; + p = strstr(optarg, mo->m_option); + if (p) { + p += strlen(mo->m_option); + q = p; + while (*q != '\0' && *q != ',') + q++; + mv->mv_len = q - p + 1; + mv->mv_value = malloc(mv->mv_len); + memcpy(mv->mv_value, p, mv->mv_len - 1); + ((char *)mv->mv_value)[mv->mv_len - 1] = '\0'; + break; + } + } + } + break; + case 's': + if (devo) + errx(1, "special specified inconsistently"); + devo = optarg; + break; + case 'm': + if (diro) + errx(1, "mount path specified inconsistently"); + diro = optarg; + break; + case 'v': + verbose = 1; + break; + case 'h': + helpmsg(); + break; + case 'V': + showversion(); + break; + case '\0': + break; + case '?': + default: + usage(); + } + if (done) + break; + } while ((ch = getopt_long(argc, argv, "AvVho:SD:O:s:m:", longopts, NULL)) != -1); + + argc -= optind; + argv += optind; + + if (devo) { + if (dev) + errx(1, "special specified inconsistently"); + dev = devo; + } else if (diro) + errx(1, "if mountpoint is given via an option, special should also be given via an option"); + + if (diro) { + if (dir) + errx(1, "mount path specified inconsistently"); + dir = diro; + } + + if ((! dev) && argc > 0) { + dev = *argv++; + argc--; + } + + if ((! dir) && argc > 0) { + dir = *argv++; + argc--; + } + + if (! (dev && dir)) + errx(1, "missing special and/or mountpoint"); + + for (mo = mopts; mo->m_flag; ++mo) { + if (altflags & mo->m_flag) { + int iov_done = 0; + + if (reject_allow_other && + strcmp(mo->m_option, "allow_other") == 0) + /* + * reject_allow_other is stronger than a + * negative of allow_other: if this is set, + * allow_other is blocked, period. + */ + errx(1, "\"allow_other\" usage is banned by respective option"); + + for (mv = mvals; mv->mv_flag; ++mv) { + if (mo->m_flag != mv->mv_flag) + continue; + if (mv->mv_value) { + build_iovec(&iov, &iovlen, mo->m_option, mv->mv_value, mv->mv_len); + iov_done = 1; + break; + } + } + if (! iov_done) + build_iovec(&iov, &iovlen, mo->m_option, + __DECONST(void *, ""), -1); + } + if (__altflags & mo->m_flag) { + char *uscore_opt; + + if (asprintf(&uscore_opt, "__%s", mo->m_option) == -1) + err(1, "failed to allocate memory"); + build_iovec(&iov, &iovlen, uscore_opt, + __DECONST(void *, ""), -1); + free(uscore_opt); + } + } + + if (getenv("MOUNT_FUSEFS_SAFE")) + safe_level = 1; + + if (safe_level > 0 && (argc > 0 || daemon_str || daemon_opts)) + errx(1, "safe mode, spawning daemon not allowed"); + + if ((argc > 0 && (daemon_str || daemon_opts)) || + (daemon_opts && ! daemon_str)) + errx(1, "daemon specified inconsistently"); + + /* + * Resolve the mountpoint with realpath(3) and remove unnecessary + * slashes from the devicename if there are any. + */ + if (checkpath(dir, mntpath) != 0) + err(1, "%s", mntpath); + (void)rmslashes(dev, dev); + + if (strcmp(dev, "auto") == 0) + dev = __DECONST(char *, "/dev/fuse"); + + if (strcmp(dev, "/dev/fuse") == 0) { + if (! (argc > 0 || daemon_str)) { + fprintf(stderr, "Please also specify the fuse daemon to run when mounting via the multiplexer!\n"); + usage(); + } + if ((fd = open(dev, O_RDWR)) < 0) + err(1, "failed to open fuse device"); + } else { + fdx = strtol(dev, &ep, 10); + if (*ep == '\0') + fd = fdx; + } + + /* Identifying device */ + if (fd >= 0) { + struct stat sbuf; + char *ndevbas, *lep; + + if (fstat(fd, &sbuf) == -1) + err(1, "cannot stat device file descriptor"); + + strcpy(ndev, _PATH_DEV); + ndevbas = ndev + strlen(_PATH_DEV); + devname_r(sbuf.st_rdev, S_IFCHR, ndevbas, + sizeof(ndev) - strlen(_PATH_DEV)); + + if (strncmp(ndevbas, "fuse", 4)) + errx(1, "mounting inappropriate device"); + + strtol(ndevbas + 4, &lep, 10); + if (*lep != '\0') + errx(1, "mounting inappropriate device"); + + dev = ndev; + } + + if (argc > 0 || daemon_str) { + char *fds; + + if (fd < 0 && (fd = open(dev, O_RDWR)) < 0) + err(1, "failed to open fuse device"); + + if (asprintf(&fds, "%d", fd) == -1) + err(1, "failed to allocate memory"); + setenv("FUSE_DEV_FD", fds, 1); + free(fds); + setenv("FUSE_NO_MOUNT", "1", 1); + + if (daemon_str) { + char *bgdaemon; + int len; + + if (! daemon_opts) + daemon_opts = __DECONST(char *, ""); + + len = strlen(daemon_str) + 1 + strlen(daemon_opts) + + 2 + 1; + bgdaemon = calloc(1, len); + + if (! bgdaemon) + err(1, "failed to allocate memory"); + + strlcpy(bgdaemon, daemon_str, len); + strlcat(bgdaemon, " ", len); + strlcat(bgdaemon, daemon_opts, len); + strlcat(bgdaemon, " &", len); + + if (system(bgdaemon)) + err(1, "failed to call fuse daemon"); + } else { + if ((pid = fork()) < 0) + err(1, "failed to fork for fuse daemon"); + + if (pid == 0) { + execvp(argv[0], argv); + err(1, "failed to exec fuse daemon"); + } + } + } + + if (fd >= 0 && ! init_backgrounded() && close(fd) < 0) { + if (pid) + kill(pid, SIGKILL); + err(1, "failed to close fuse device"); + } + + /* Prepare the options vector for nmount(). build_iovec() is declared + * in mntopts.h. */ + sprintf(fdstr, "%d", fd); + build_iovec(&iov, &iovlen, "fstype", __DECONST(void *, "fusefs"), -1); + build_iovec(&iov, &iovlen, "fspath", mntpath, -1); + build_iovec(&iov, &iovlen, "from", dev, -1); + build_iovec(&iov, &iovlen, "fd", fdstr, -1); + + if (verbose) + fprintf(stderr, "mounting fuse daemon on device %s\n", dev); + + if (nmount(iov, iovlen, mntflags) < 0) + err(EX_OSERR, "%s on %s", dev, mntpath); + + exit(0); +} + +void +__usage_short(void) { + fprintf(stderr, + "usage:\n%s [-A|-S|-v|-V|-h|-D daemon|-O args|-s special|-m node|-o option...] special node [daemon args...]\n\n", + basename(progname)); +} + +void +usage(void) +{ + struct mntopt *mo; + + __usage_short(); + + fprintf(stderr, "known options:\n"); + for (mo = mopts; mo->m_flag; ++mo) + fprintf(stderr, "\t%s\n", mo->m_option); + + fprintf(stderr, "\n(use -h for a detailed description of these options)\n"); + exit(EX_USAGE); +} + +void +helpmsg(void) +{ + if (! getenv("MOUNT_FUSEFS_CALL_BY_LIB")) { + __usage_short(); + fprintf(stderr, "description of options:\n"); + } + + /* + * The main use case of this function is giving info embedded in general + * FUSE lib help output. Therefore the style and the content of the output + * tries to fit there as much as possible. + */ + fprintf(stderr, + " -o allow_other allow access to other users\n" + /* " -o nonempty allow mounts over non-empty file/dir\n" */ + " -o default_permissions enable permission checking by kernel\n" + /* + " -o fsname=NAME set filesystem name\n" + " -o large_read issue large read requests (2.4 only)\n" + */ + " -o subtype=NAME set filesystem type\n" + " -o max_read=N set maximum size of read requests\n" + " -o noprivate allow secondary mounting of the filesystem\n" + " -o neglect_shares don't report EBUSY when unmount attempted\n" + " in presence of secondary mounts\n" + " -o push_symlinks_in prefix absolute symlinks with mountpoint\n" + " -o sync_unmount do unmount synchronously\n" + ); + exit(EX_USAGE); +} + +void +showversion(void) +{ + puts("mount_fusefs [fuse4bsd] version: " FUSE4BSD_VERSION); + exit(EX_USAGE); +} + +int +init_backgrounded(void) +{ + int ibg; + size_t len; + + len = sizeof(ibg); + + if (sysctlbyname("vfs.fuse.init_backgrounded", &ibg, &len, NULL, 0)) + return (0); + + return (ibg); +} diff --git a/sys/conf/NOTES b/sys/conf/NOTES index 6b9d6aa..622cbbf 100644 --- a/sys/conf/NOTES +++ b/sys/conf/NOTES @@ -1003,6 +1003,7 @@ options NFSCLIENT #Network File System client # The rest are optional: options CD9660 #ISO 9660 filesystem options FDESCFS #File descriptor filesystem +options FUSE #FUSE support module options HPFS #OS/2 File system options MSDOSFS #MS DOS File System (FAT, FAT32) options NFSSERVER #Network File System server diff --git a/sys/conf/files b/sys/conf/files index e9aeef3..812937c 100644 --- a/sys/conf/files +++ b/sys/conf/files @@ -2317,6 +2317,15 @@ fs/devfs/devfs_vnops.c standard fs/fdescfs/fdesc_vfsops.c optional fdescfs fs/fdescfs/fdesc_vnops.c optional fdescfs fs/fifofs/fifo_vnops.c standard +fs/fuse/fuse_device.c optional fuse +fs/fuse/fuse_file.c optional fuse +fs/fuse/fuse_internal.c optional fuse +fs/fuse/fuse_io.c optional fuse +fs/fuse/fuse_ipc.c optional fuse +fs/fuse/fuse_main.c optional fuse +fs/fuse/fuse_node.c optional fuse +fs/fuse/fuse_vfsops.c optional fuse +fs/fuse/fuse_vnops.c optional fuse fs/hpfs/hpfs_alsubr.c optional hpfs fs/hpfs/hpfs_lookup.c optional hpfs fs/hpfs/hpfs_subr.c optional hpfs diff --git a/sys/conf/options b/sys/conf/options index 0ed9ce6..1b6aa50 100644 --- a/sys/conf/options +++ b/sys/conf/options @@ -211,6 +211,7 @@ CODA opt_dontuse.h EXT2FS opt_dontuse.h FDESCFS opt_dontuse.h FFS opt_dontuse.h +FUSE opt_dontuse.h HPFS opt_dontuse.h MSDOSFS opt_dontuse.h NANDFS opt_dontuse.h diff --git a/sys/fs/fuse/fuse.h b/sys/fs/fuse/fuse.h new file mode 100644 index 0000000..ef9d561 --- /dev/null +++ b/sys/fs/fuse/fuse.h @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2007-2009 Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Copyright (C) 2005 Csaba Henk. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include "fuse_kernel.h" + +#define FUSE_DEFAULT_DAEMON_TIMEOUT 60 /* s */ +#define FUSE_MIN_DAEMON_TIMEOUT 0 /* s */ +#define FUSE_MAX_DAEMON_TIMEOUT 600 /* s */ + +#ifndef FUSE_FREEBSD_VERSION +#define FUSE_FREEBSD_VERSION "0.4.4" +#endif + +/* Mapping versions to features */ + +#define FUSE_KERNELABI_GEQ(maj, min) \ +(FUSE_KERNEL_VERSION > (maj) || (FUSE_KERNEL_VERSION == (maj) && FUSE_KERNEL_MINOR_VERSION >= (min))) + +/* + * Appearance of new FUSE operations is not always in par with version + * numbering... At least, 7.3 is a sufficient condition for having + * FUSE_{ACCESS,CREATE}. + */ +#if FUSE_KERNELABI_GEQ(7, 3) +#ifndef FUSE_HAS_ACCESS +#define FUSE_HAS_ACCESS 1 +#endif +#ifndef FUSE_HAS_CREATE +#define FUSE_HAS_CREATE 1 +#endif +#else /* FUSE_KERNELABI_GEQ(7, 3) */ +#ifndef FUSE_HAS_ACCESS +#define FUSE_HAS_ACCESS 0 +#endif +#ifndef FUSE_HAS_CREATE +#define FUSE_HAS_CREATE 0 +#endif +#endif + +#if FUSE_KERNELABI_GEQ(7, 7) +#ifndef FUSE_HAS_GETLK +#define FUSE_HAS_GETLK 1 +#endif +#ifndef FUSE_HAS_SETLK +#define FUSE_HAS_SETLK 1 +#endif +#ifndef FUSE_HAS_SETLKW +#define FUSE_HAS_SETLKW 1 +#endif +#ifndef FUSE_HAS_INTERRUPT +#define FUSE_HAS_INTERRUPT 1 +#endif +#else /* FUSE_KERNELABI_GEQ(7, 7) */ +#ifndef FUSE_HAS_GETLK +#define FUSE_HAS_GETLK 0 +#endif +#ifndef FUSE_HAS_SETLK +#define FUSE_HAS_SETLK 0 +#endif +#ifndef FUSE_HAS_SETLKW +#define FUSE_HAS_SETLKW 0 +#endif +#ifndef FUSE_HAS_INTERRUPT +#define FUSE_HAS_INTERRUPT 0 +#endif +#endif + +#if FUSE_KERNELABI_GEQ(7, 8) +#ifndef FUSE_HAS_FLUSH_RELEASE +#define FUSE_HAS_FLUSH_RELEASE 1 +/* + * "DESTROY" came in the middle of the 7.8 era, + * so this is not completely exact... + */ +#ifndef FUSE_HAS_DESTROY +#define FUSE_HAS_DESTROY 1 +#endif +#endif +#else /* FUSE_KERNELABI_GEQ(7, 8) */ +#ifndef FUSE_HAS_FLUSH_RELEASE +#define FUSE_HAS_FLUSH_RELEASE 0 +#ifndef FUSE_HAS_DESTROY +#define FUSE_HAS_DESTROY 0 +#endif +#endif +#endif + +/* misc */ + +SYSCTL_DECL(_vfs_fuse); + +/* Fuse locking */ + +extern struct mtx fuse_mtx; +#define FUSE_LOCK() fuse_lck_mtx_lock(fuse_mtx) +#define FUSE_UNLOCK() fuse_lck_mtx_unlock(fuse_mtx) + +#define RECTIFY_TDCR(td, cred) \ +do { \ + if (! (td)) \ + (td) = curthread; \ + if (! (cred)) \ + (cred) = (td)->td_ucred; \ +} while (0) + +/* Debug related stuff */ + +#ifndef FUSE_DEBUG_DEVICE +#define FUSE_DEBUG_DEVICE 0 +#endif + +#ifndef FUSE_DEBUG_FILE +#define FUSE_DEBUG_FILE 0 +#endif + +#ifndef FUSE_DEBUG_INTERNAL +#define FUSE_DEBUG_INTERNAL 0 +#endif + +#ifndef FUSE_DEBUG_IO +#define FUSE_DEBUG_IO 0 +#endif + +#ifndef FUSE_DEBUG_IPC +#define FUSE_DEBUG_IPC 0 +#endif + +#ifndef FUSE_DEBUG_LOCK +#define FUSE_DEBUG_LOCK 0 +#endif + +#ifndef FUSE_DEBUG_VFSOPS +#define FUSE_DEBUG_VFSOPS 0 +#endif + +#ifndef FUSE_DEBUG_VNOPS +#define FUSE_DEBUG_VNOPS 0 +#endif + +#ifndef FUSE_TRACE +#define FUSE_TRACE 0 +#endif + +#define DEBUGX(cond, fmt, ...) do { \ + if (((cond))) { \ + printf("%s: " fmt, __func__, ## __VA_ARGS__); \ + } } while (0) + +#define fuse_lck_mtx_lock(mtx) do { \ + DEBUGX(FUSE_DEBUG_LOCK, "0: lock(%s): %s@%d by %d\n", \ + __STRING(mtx), __func__, __LINE__, curthread->td_proc->p_pid); \ + mtx_lock(&(mtx)); \ + DEBUGX(FUSE_DEBUG_LOCK, "1: lock(%s): %s@%d by %d\n", \ + __STRING(mtx), __func__, __LINE__, curthread->td_proc->p_pid); \ + } while (0) + +#define fuse_lck_mtx_unlock(mtx) do { \ + DEBUGX(FUSE_DEBUG_LOCK, "0: unlock(%s): %s@%d by %d\n", \ + __STRING(mtx), __func__, __LINE__, curthread->td_proc->p_pid); \ + mtx_unlock(&(mtx)); \ + DEBUGX(FUSE_DEBUG_LOCK, "1: unlock(%s): %s@%d by %d\n", \ + __STRING(mtx), __func__, __LINE__, curthread->td_proc->p_pid); \ + } while (0) + +void fuse_ipc_init(void); +void fuse_ipc_destroy(void); + +int fuse_device_init(void); +void fuse_device_destroy(void); diff --git a/sys/fs/fuse/fuse_debug.h b/sys/fs/fuse/fuse_debug.h new file mode 100644 index 0000000..3faf372 --- /dev/null +++ b/sys/fs/fuse/fuse_debug.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2007-2009 Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Copyright (C) 2005 Csaba Henk. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#include <sys/cdefs.h> + +/* Debug related stuff */ + +#ifndef FUSE_DEBUG_MODULE +#error "FUSE_DEBUG_MODULE is not defined" +#else +#define FUSE_DEBUG_VAR __CONCAT(FUSE_DEBUG_,FUSE_DEBUG_MODULE) +#endif + +#define DEBUG(fmt, ...) DEBUGX(FUSE_DEBUG_VAR >= 1, fmt, ## __VA_ARGS__) +#define DEBUG2G(fmt, ...) DEBUGX(FUSE_DEBUG_VAR >= 2, fmt, ## __VA_ARGS__) + +#define debug_printf(fmt, ...) DEBUG(fmt, ## __VA_ARGS__) +#define kdebug_printf(fmt, ...) DEBUG(fmt, ## __VA_ARGS__) + +#define fuse_trace_printf(fmt, ...) \ + DEBUGX(FUSE_DEBUG_VAR && FUSE_TRACE, fmt, ## __VA_ARGS__) +#define fuse_trace_printf_func() \ + fuse_trace_printf("%s:%d\n", __FILE__, __LINE__) +#define fuse_trace_printf_vfsop() fuse_trace_printf_func() +#define fuse_trace_printf_vnop() fuse_trace_printf_func() diff --git a/sys/fs/fuse/fuse_device.c b/sys/fs/fuse/fuse_device.c new file mode 100644 index 0000000..0efc503 --- /dev/null +++ b/sys/fs/fuse/fuse_device.c @@ -0,0 +1,447 @@ +/* + * Copyright (c) 2007-2009 Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Copyright (C) 2005 Csaba Henk. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/types.h> +#include <sys/module.h> +#include <sys/systm.h> +#include <sys/errno.h> +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/conf.h> +#include <sys/uio.h> +#include <sys/malloc.h> +#include <sys/queue.h> +#include <sys/lock.h> +#include <sys/sx.h> +#include <sys/mutex.h> +#include <sys/proc.h> +#include <sys/mount.h> +#include <sys/stat.h> +#include <sys/fcntl.h> +#include <sys/sysctl.h> +#include <sys/poll.h> +#include <sys/selinfo.h> + +#include "fuse.h" +#include "fuse_ipc.h" + +#define FUSE_DEBUG_MODULE DEVICE +#include "fuse_debug.h" + +static struct cdev *fuse_dev; + +static d_open_t fuse_device_open; +static d_close_t fuse_device_close; +static d_poll_t fuse_device_poll; +static d_read_t fuse_device_read; +static d_write_t fuse_device_write; + +static struct cdevsw fuse_device_cdevsw = { + .d_open = fuse_device_open, + .d_close = fuse_device_close, + .d_name = "fuse", + .d_poll = fuse_device_poll, + .d_read = fuse_device_read, + .d_write = fuse_device_write, + .d_version = D_VERSION, + .d_flags = D_NEEDMINOR, +}; + +/**************************** + * + * >>> Fuse device op defs + * + ****************************/ + +static void +fdata_dtor(void *arg) +{ + struct fuse_data *fdata; + + fdata = arg; + fdata_trydestroy(fdata); +} + +/* + * Resources are set up on a per-open basis + */ +static int +fuse_device_open(struct cdev *dev, int oflags, int devtype, struct thread *td) +{ + struct fuse_data *fdata; + int error; + + DEBUG("device %p\n", dev); + + fdata = fdata_alloc(dev, td->td_ucred); + error = devfs_set_cdevpriv(fdata, fdata_dtor); + if (error != 0) + fdata_trydestroy(fdata); + else + DEBUG("%s: device opened by thread %d.\n", dev->si_name, + td->td_tid); + return (error); +} + +static int +fuse_device_close(struct cdev *dev, int fflag, int devtype, struct thread *td) +{ + struct fuse_data *data; + struct fuse_ticket *tick; + int error; + + error = devfs_get_cdevpriv((void **)&data); + if (error != 0) + return (error); + if (!data) + panic("no fuse data upon fuse device close"); + fdata_set_dead(data); + + FUSE_LOCK(); + fuse_lck_mtx_lock(data->aw_mtx); + /* wakup poll()ers */ + selwakeuppri(&data->ks_rsel, PZERO + 1); + /* Don't let syscall handlers wait in vain */ + while ((tick = fuse_aw_pop(data))) { + fuse_lck_mtx_lock(tick->tk_aw_mtx); + fticket_set_answered(tick); + tick->tk_aw_errno = ENOTCONN; + wakeup(tick); + fuse_lck_mtx_unlock(tick->tk_aw_mtx); + FUSE_ASSERT_AW_DONE(tick); + fuse_ticket_drop(tick); + } + fuse_lck_mtx_unlock(data->aw_mtx); + FUSE_UNLOCK(); + + DEBUG("%s: device closed by thread %d.\n", dev->si_name, td->td_tid); + return (0); +} + +int +fuse_device_poll(struct cdev *dev, int events, struct thread *td) +{ + struct fuse_data *data; + int error, revents = 0; + + error = devfs_get_cdevpriv((void **)&data); + if (error != 0) + return (events & + (POLLHUP|POLLIN|POLLRDNORM|POLLOUT|POLLWRNORM)); + + if (events & (POLLIN | POLLRDNORM)) { + fuse_lck_mtx_lock(data->ms_mtx); + if (fdata_get_dead(data) || STAILQ_FIRST(&data->ms_head)) + revents |= events & (POLLIN | POLLRDNORM); + else + selrecord(td, &data->ks_rsel); + fuse_lck_mtx_unlock(data->ms_mtx); + } + if (events & (POLLOUT | POLLWRNORM)) { + revents |= events & (POLLOUT | POLLWRNORM); + } + return (revents); +} + +/* + * fuse_device_read hangs on the queue of VFS messages. + * When it's notified that there is a new one, it picks that and + * passes up to the daemon + */ +int +fuse_device_read(struct cdev *dev, struct uio *uio, int ioflag) +{ + int err; + struct fuse_data *data; + struct fuse_ticket *tick; + void *buf[] = {NULL, NULL, NULL}; + int buflen[3]; + int i; + + DEBUG("fuse device being read on thread %d\n", uio->uio_td->td_tid); + + err = devfs_get_cdevpriv((void **)&data); + if (err != 0) + return (err); + + fuse_lck_mtx_lock(data->ms_mtx); +again: + if (fdata_get_dead(data)) { + DEBUG2G("we know early on that reader should be kicked so we don't wait for news\n"); + fuse_lck_mtx_unlock(data->ms_mtx); + return (ENODEV); + } + if (!(tick = fuse_ms_pop(data))) { + /* check if we may block */ + if (ioflag & O_NONBLOCK) { + /* get outa here soon */ + fuse_lck_mtx_unlock(data->ms_mtx); + return (EAGAIN); + } else { + err = msleep(data, &data->ms_mtx, PCATCH, "fu_msg", 0); + if (err != 0) { + fuse_lck_mtx_unlock(data->ms_mtx); + return (fdata_get_dead(data) ? ENODEV : err); + } + tick = fuse_ms_pop(data); + } + } + if (!tick) { + /* + * We can get here if fuse daemon suddenly terminates, + * eg, by being hit by a SIGKILL + * -- and some other cases, too, tho not totally clear, when + * (cv_signal/wakeup_one signals the whole process ?) + */ + DEBUG("no message on thread #%d\n", uio->uio_td->td_tid); + goto again; + } + fuse_lck_mtx_unlock(data->ms_mtx); + + if (fdata_get_dead(data)) { + /* + * somebody somewhere -- eg., umount routine -- + * wants this liaison finished off + */ + DEBUG2G("reader is to be sacked\n"); + if (tick) { + DEBUG2G("weird -- \"kick\" is set tho there is message\n"); + FUSE_ASSERT_MS_DONE(tick); + fuse_ticket_drop(tick); + } + return (ENODEV); /* This should make the daemon get off + * of us */ + } + DEBUG("message got on thread #%d\n", uio->uio_td->td_tid); + + KASSERT(tick->tk_ms_bufdata || tick->tk_ms_bufsize == 0, + ("non-null buf pointer with positive size")); + + switch (tick->tk_ms_type) { + case FT_M_FIOV: + buf[0] = tick->tk_ms_fiov.base; + buflen[0] = tick->tk_ms_fiov.len; + break; + case FT_M_BUF: + buf[0] = tick->tk_ms_fiov.base; + buflen[0] = tick->tk_ms_fiov.len; + buf[1] = tick->tk_ms_bufdata; + buflen[1] = tick->tk_ms_bufsize; + break; + default: + panic("unknown message type for fuse_ticket %p", tick); + } + + for (i = 0; buf[i]; i++) { + /* + * Why not ban mercilessly stupid daemons who can't keep up + * with us? (There is no much use of a partial read here...) + */ + /* + * XXX note that in such cases Linux FUSE throws EIO at the + * syscall invoker and stands back to the message queue. The + * rationale should be made clear (and possibly adopt that + * behaviour). Keeping the current scheme at least makes + * fallacy as loud as possible... + */ + if (uio->uio_resid < buflen[i]) { + fdata_set_dead(data); + DEBUG2G("daemon is stupid, kick it off...\n"); + err = ENODEV; + break; + } + err = uiomove(buf[i], buflen[i], uio); + if (err) + break; + } + + FUSE_ASSERT_MS_DONE(tick); + fuse_ticket_drop(tick); + + return (err); +} + +static __inline int +fuse_ohead_audit(struct fuse_out_header *ohead, struct uio *uio) +{ + DEBUG("Out header -- len: %i, error: %i, unique: %llu; iovecs: %d\n", + ohead->len, ohead->error, (unsigned long long)ohead->unique, + uio->uio_iovcnt); + + if (uio->uio_resid + sizeof(struct fuse_out_header) != ohead->len) { + DEBUG("Format error: body size differs from size claimed by header\n"); + return (EINVAL); + } + if (uio->uio_resid && ohead->error) { + DEBUG("Format error: non zero error but message had a body\n"); + return (EINVAL); + } + /* Sanitize the linuxism of negative errnos */ + ohead->error = -(ohead->error); + + return (0); +} + +/* + * fuse_device_write first reads the header sent by the daemon. + * If that's OK, looks up ticket/callback node by the unique id seen in header. + * If the callback node contains a handler function, the uio is passed over + * that. + */ +static int +fuse_device_write(struct cdev *dev, struct uio *uio, int ioflag) +{ + struct fuse_out_header ohead; + int err = 0; + struct fuse_data *data; + struct fuse_ticket *tick, *x_tick; + int found = 0; + + DEBUG("resid: %zd, iovcnt: %d, thread: %d\n", + uio->uio_resid, uio->uio_iovcnt, uio->uio_td->td_tid); + + err = devfs_get_cdevpriv((void **)&data); + if (err != 0) + return (err); + + if (uio->uio_resid < sizeof(struct fuse_out_header)) { + DEBUG("got less than a header!\n"); + fdata_set_dead(data); + return (EINVAL); + } + if ((err = uiomove(&ohead, sizeof(struct fuse_out_header), uio)) != 0) + return (err); + + /* + * We check header information (which is redundant) and compare it + * with what we see. If we see some inconsistency we discard the + * whole answer and proceed on as if it had never existed. In + * particular, no pretender will be woken up, regardless the + * "unique" value in the header. + */ + if ((err = fuse_ohead_audit(&ohead, uio))) { + fdata_set_dead(data); + return (err); + } + /* Pass stuff over to callback if there is one installed */ + + /* Looking for ticket with the unique id of header */ + fuse_lck_mtx_lock(data->aw_mtx); + TAILQ_FOREACH_SAFE(tick, &data->aw_head, tk_aw_link, + x_tick) { + DEBUG("bumped into callback #%llu\n", + (unsigned long long)tick->tk_unique); + if (tick->tk_unique == ohead.unique) { + found = 1; + fuse_aw_remove(tick); + break; + } + } + fuse_lck_mtx_unlock(data->aw_mtx); + + if (found) { + if (tick->tk_aw_handler) { + /* + * We found a callback with proper handler. In this + * case the out header will be 0wnd by the callback, + * so the fun of freeing that is left for her. + * (Then, by all chance, she'll just get that's done + * via ticket_drop(), so no manual mucking + * around...) + */ + DEBUG("pass ticket to a callback\n"); + memcpy(&tick->tk_aw_ohead, &ohead, sizeof(ohead)); + err = tick->tk_aw_handler(tick, uio); + } else { + /* pretender doesn't wanna do anything with answer */ + DEBUG("stuff devalidated, so we drop it\n"); + } + + /* + * As aw_mtx was not held during the callback execution the + * ticket may have been inserted again. However, this is safe + * because fuse_ticket_drop() will deal with refcount anyway. + */ + fuse_ticket_drop(tick); + } else { + /* no callback at all! */ + DEBUG("erhm, no handler for this response\n"); + err = EINVAL; + } + + return (err); +} + +int +fuse_device_init(void) +{ + + fuse_dev = make_dev(&fuse_device_cdevsw, 0, UID_ROOT, GID_OPERATOR, + S_IRUSR | S_IWUSR | S_IRGRP | S_IWGRP, "fuse"); + if (fuse_dev == NULL) + return (ENOMEM); + return (0); +} + +void +fuse_device_destroy(void) +{ + + MPASS(fuse_dev != NULL); + destroy_dev(fuse_dev); +} diff --git a/sys/fs/fuse/fuse_file.c b/sys/fs/fuse/fuse_file.c new file mode 100644 index 0000000..dda9149 --- /dev/null +++ b/sys/fs/fuse/fuse_file.c @@ -0,0 +1,260 @@ +/* + * Copyright (c) 2007-2009 Google Inc. and Amit Singh + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Copyright (C) 2005 Csaba Henk. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/types.h> +#include <sys/module.h> +#include <sys/systm.h> +#include <sys/errno.h> +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/conf.h> +#include <sys/uio.h> +#include <sys/malloc.h> +#include <sys/queue.h> +#include <sys/lock.h> +#include <sys/sx.h> +#include <sys/mutex.h> +#include <sys/proc.h> +#include <sys/mount.h> +#include <sys/vnode.h> +#include <sys/sysctl.h> + +#include "fuse.h" +#include "fuse_file.h" +#include "fuse_internal.h" +#include "fuse_ipc.h" +#include "fuse_node.h" + +#define FUSE_DEBUG_MODULE FILE +#include "fuse_debug.h" + +static int fuse_fh_count = 0; + +SYSCTL_INT(_vfs_fuse, OID_AUTO, filehandle_count, CTLFLAG_RD, + &fuse_fh_count, 0, ""); + +int +fuse_filehandle_open(struct vnode *vp, + fufh_type_t fufh_type, + struct fuse_filehandle **fufhp, + struct thread *td, + struct ucred *cred) +{ + struct fuse_dispatcher fdi; + struct fuse_open_in *foi; + struct fuse_open_out *foo; + + int err = 0; + int isdir = 0; + int oflags = 0; + int op = FUSE_OPEN; + + fuse_trace_printf("fuse_filehandle_open(vp=%p, fufh_type=%d)\n", + vp, fufh_type); + + if (fuse_filehandle_valid(vp, fufh_type)) { + panic("FUSE: filehandle_open called despite valid fufh (type=%d)", + fufh_type); + /* NOTREACHED */ + } + /* + * Note that this means we are effectively FILTERING OUT open() flags. + */ + oflags = fuse_filehandle_xlate_to_oflags(fufh_type); + + if (vnode_isdir(vp)) { + isdir = 1; + op = FUSE_OPENDIR; + if (fufh_type != FUFH_RDONLY) { + printf("FUSE:non-rdonly fh requested for a directory?\n"); + fufh_type = FUFH_RDONLY; + } + } + fdisp_init(&fdi, sizeof(*foi)); + fdisp_make_vp(&fdi, op, vp, td, cred); + + foi = fdi.indata; + foi->flags = oflags; + + if ((err = fdisp_wait_answ(&fdi))) { + debug_printf("OUCH ... daemon didn't give fh (err = %d)\n", err); + if (err == ENOENT) { + fuse_internal_vnode_disappear(vp); + } + goto out; + } + foo = fdi.answ; + + fuse_filehandle_init(vp, fufh_type, fufhp, foo->fh); + fuse_vnode_open(vp, foo->open_flags, td); + +out: + fdisp_destroy(&fdi); + return err; +} + +int +fuse_filehandle_close(struct vnode *vp, + fufh_type_t fufh_type, + struct thread *td, + struct ucred *cred) +{ + struct fuse_dispatcher fdi; + struct fuse_release_in *fri; + struct fuse_vnode_data *fvdat = VTOFUD(vp); + struct fuse_filehandle *fufh = NULL; + + int err = 0; + int isdir = 0; + int op = FUSE_RELEASE; + + fuse_trace_printf("fuse_filehandle_put(vp=%p, fufh_type=%d)\n", + vp, fufh_type); + + fufh = &(fvdat->fufh[fufh_type]); + if (!FUFH_IS_VALID(fufh)) { + panic("FUSE: filehandle_put called on invalid fufh (type=%d)", + fufh_type); + /* NOTREACHED */ + } + if (fuse_isdeadfs(vp)) { + goto out; + } + if (vnode_isdir(vp)) { + op = FUSE_RELEASEDIR; + isdir = 1; + } + fdisp_init(&fdi, sizeof(*fri)); + fdisp_make_vp(&fdi, op, vp, td, cred); + fri = fdi.indata; + fri->fh = fufh->fh_id; + fri->flags = fuse_filehandle_xlate_to_oflags(fufh_type); + + err = fdisp_wait_answ(&fdi); + fdisp_destroy(&fdi); + +out: + atomic_subtract_acq_int(&fuse_fh_count, 1); + fufh->fh_id = (uint64_t)-1; + fufh->fh_type = FUFH_INVALID; + fuse_invalidate_attr(vp); + + return err; +} + +int +fuse_filehandle_valid(struct vnode *vp, fufh_type_t fufh_type) +{ + struct fuse_vnode_data *fvdat = VTOFUD(vp); + struct fuse_filehandle *fufh; + + fufh = &(fvdat->fufh[fufh_type]); + return FUFH_IS_VALID(fufh); +} + +int +fuse_filehandle_get(struct vnode *vp, fufh_type_t fufh_type, + struct fuse_filehandle **fufhp) +{ + struct fuse_vnode_data *fvdat = VTOFUD(vp); + struct fuse_filehandle *fufh; + + fufh = &(fvdat->fufh[fufh_type]); + if (!FUFH_IS_VALID(fufh)) + return EBADF; + if (fufhp != NULL) + *fufhp = fufh; + return 0; +} + +int +fuse_filehandle_getrw(struct vnode *vp, fufh_type_t fufh_type, + struct fuse_filehandle **fufhp) +{ + struct fuse_vnode_data *fvdat = VTOFUD(vp); + struct fuse_filehandle *fufh; + + fufh = &(fvdat->fufh[fufh_type]); + if (!FUFH_IS_VALID(fufh)) { + fufh_type = FUFH_RDWR; + } + return fuse_filehandle_get(vp, fufh_type, fufhp); +} + +void +fuse_filehandle_init(struct vnode *vp, + fufh_type_t fufh_type, + struct fuse_filehandle **fufhp, + uint64_t fh_id) +{ + struct fuse_vnode_data *fvdat = VTOFUD(vp); + struct fuse_filehandle *fufh; + + DEBUG("id=%jd type=%d\n", (intmax_t)fh_id, fufh_type); + fufh = &(fvdat->fufh[fufh_type]); + MPASS(!FUFH_IS_VALID(fufh)); + fufh->fh_id = fh_id; + fufh->fh_type = fufh_type; + if (!FUFH_IS_VALID(fufh)) { + panic("FUSE: init: invalid filehandle id (type=%d)", fufh_type); + } + if (fufhp != NULL) + *fufhp = fufh; + + atomic_add_acq_int(&fuse_fh_count, 1); +} diff --git a/sys/fs/fuse/fuse_file.h b/sys/fs/fuse/fuse_file.h new file mode 100644 index 0000000..7d605ee --- /dev/null +++ b/sys/fs/fuse/fuse_file.h @@ -0,0 +1,153 @@ +/* + * Copyright (c) 2007-2009 Google Inc. and Amit Singh + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Copyright (C) 2005 Csaba Henk. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _FUSE_FILE_H_ +#define _FUSE_FILE_H_ + +#include <sys/types.h> +#include <sys/fcntl.h> +#include <sys/stat.h> +#include <sys/mman.h> +#include <sys/vnode.h> + +typedef enum fufh_type { + FUFH_INVALID = -1, + FUFH_RDONLY = 0, + FUFH_WRONLY = 1, + FUFH_RDWR = 2, + FUFH_MAXTYPE = 3, +} fufh_type_t; + +struct fuse_filehandle { + uint64_t fh_id; + fufh_type_t fh_type; +}; + +#define FUFH_IS_VALID(f) ((f)->fh_type != FUFH_INVALID) + +static __inline__ +fufh_type_t +fuse_filehandle_xlate_from_mmap(int fflags) +{ + if (fflags & (PROT_READ | PROT_WRITE)) { + return FUFH_RDWR; + } else if (fflags & (PROT_WRITE)) { + return FUFH_WRONLY; + } else if ((fflags & PROT_READ) || (fflags & PROT_EXEC)) { + return FUFH_RDONLY; + } else { + return FUFH_INVALID; + } +} + +static __inline__ +fufh_type_t +fuse_filehandle_xlate_from_fflags(int fflags) +{ + if ((fflags & FREAD) && (fflags & FWRITE)) { + return FUFH_RDWR; + } else if (fflags & (FWRITE)) { + return FUFH_WRONLY; + } else if (fflags & (FREAD)) { + return FUFH_RDONLY; + } else { + panic("FUSE: What kind of a flag is this (%x)?", fflags); + } +} + +static __inline__ +int +fuse_filehandle_xlate_to_oflags(fufh_type_t type) +{ + int oflags = -1; + + switch (type) { + + case FUFH_RDONLY: + oflags = O_RDONLY; + break; + + case FUFH_WRONLY: + oflags = O_WRONLY; + break; + + case FUFH_RDWR: + oflags = O_RDWR; + break; + + default: + break; + } + + return oflags; +} + +int fuse_filehandle_valid(struct vnode *vp, fufh_type_t fufh_type); +int fuse_filehandle_get(struct vnode *vp, fufh_type_t fufh_type, + struct fuse_filehandle **fufhp); +int fuse_filehandle_getrw(struct vnode *vp, fufh_type_t fufh_type, + struct fuse_filehandle **fufhp); + +void fuse_filehandle_init(struct vnode *vp, fufh_type_t fufh_type, + struct fuse_filehandle **fufhp, uint64_t fh_id); +int fuse_filehandle_open(struct vnode *vp, fufh_type_t fufh_type, + struct fuse_filehandle **fufhp, struct thread *td, + struct ucred *cred); +int fuse_filehandle_close(struct vnode *vp, fufh_type_t fufh_type, + struct thread *td, struct ucred *cred); + +#endif /* _FUSE_FILE_H_ */ diff --git a/sys/fs/fuse/fuse_internal.c b/sys/fs/fuse/fuse_internal.c new file mode 100644 index 0000000..96496c7 --- /dev/null +++ b/sys/fs/fuse/fuse_internal.c @@ -0,0 +1,696 @@ +/* + * Copyright (c) 2007-2009 Google Inc. and Amit Singh + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Copyright (C) 2005 Csaba Henk. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/types.h> +#include <sys/module.h> +#include <sys/systm.h> +#include <sys/errno.h> +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/conf.h> +#include <sys/uio.h> +#include <sys/malloc.h> +#include <sys/queue.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/sx.h> +#include <sys/proc.h> +#include <sys/mount.h> +#include <sys/vnode.h> +#include <sys/namei.h> +#include <sys/stat.h> +#include <sys/unistd.h> +#include <sys/filedesc.h> +#include <sys/file.h> +#include <sys/fcntl.h> +#include <sys/dirent.h> +#include <sys/bio.h> +#include <sys/buf.h> +#include <sys/sysctl.h> +#include <sys/priv.h> + +#include "fuse.h" +#include "fuse_file.h" +#include "fuse_internal.h" +#include "fuse_ipc.h" +#include "fuse_node.h" +#include "fuse_file.h" +#include "fuse_param.h" + +#define FUSE_DEBUG_MODULE INTERNAL +#include "fuse_debug.h" + +#ifdef ZERO_PAD_INCOMPLETE_BUFS +static int isbzero(void *buf, size_t len); + +#endif + +/* access */ + +int +fuse_internal_access(struct vnode *vp, + mode_t mode, + struct fuse_access_param *facp, + struct thread *td, + struct ucred *cred) +{ + int err = 0; + uint32_t mask = 0; + int dataflags; + int vtype; + struct mount *mp; + struct fuse_dispatcher fdi; + struct fuse_access_in *fai; + struct fuse_data *data; + + /* NOT YET DONE */ + /* + * If this vnop gives you trouble, just return 0 here for a lazy + * kludge. + */ + /* return 0;*/ + + fuse_trace_printf_func(); + + mp = vnode_mount(vp); + vtype = vnode_vtype(vp); + + data = fuse_get_mpdata(mp); + dataflags = data->dataflags; + + if ((mode & VWRITE) && vfs_isrdonly(mp)) { + return EACCES; + } + /* Unless explicitly permitted, deny everyone except the fs owner. */ + if (vnode_isvroot(vp) && !(facp->facc_flags & FACCESS_NOCHECKSPY)) { + if (!(dataflags & FSESS_DAEMON_CAN_SPY)) { + int denied = fuse_match_cred(data->daemoncred, + cred); + + if (denied) { + return EPERM; + } + } + facp->facc_flags |= FACCESS_NOCHECKSPY; + } + if (!(facp->facc_flags & FACCESS_DO_ACCESS)) { + return 0; + } + if (((vtype == VREG) && (mode & VEXEC))) { +#ifdef NEED_MOUNT_ARGUMENT_FOR_THIS + /* Let the kernel handle this through open / close heuristics.*/ + return ENOTSUP; +#else + /* Let the kernel handle this. */ + return 0; +#endif + } + if (!fsess_isimpl(mp, FUSE_ACCESS)) { + /* Let the kernel handle this. */ + return 0; + } + if (dataflags & FSESS_DEFAULT_PERMISSIONS) { + /* Let the kernel handle this. */ + return 0; + } + if ((mode & VADMIN) != 0) { + err = priv_check_cred(cred, PRIV_VFS_ADMIN, 0); + if (err) { + return err; + } + } + if ((mode & (VWRITE | VAPPEND | VADMIN)) != 0) { + mask |= W_OK; + } + if ((mode & VREAD) != 0) { + mask |= R_OK; + } + if ((mode & VEXEC) != 0) { + mask |= X_OK; + } + bzero(&fdi, sizeof(fdi)); + + fdisp_init(&fdi, sizeof(*fai)); + fdisp_make_vp(&fdi, FUSE_ACCESS, vp, td, cred); + + fai = fdi.indata; + fai->mask = F_OK; + fai->mask |= mask; + + err = fdisp_wait_answ(&fdi); + fdisp_destroy(&fdi); + + if (err == ENOSYS) { + fsess_set_notimpl(mp, FUSE_ACCESS); + err = 0; + } + return err; +} + +/* fsync */ + +int +fuse_internal_fsync_callback(struct fuse_ticket *tick, struct uio *uio) +{ + fuse_trace_printf_func(); + + if (tick->tk_aw_ohead.error == ENOSYS) { + fsess_set_notimpl(tick->tk_data->mp, fticket_opcode(tick)); + } + return 0; +} + +int +fuse_internal_fsync(struct vnode *vp, + struct thread *td, + struct ucred *cred, + struct fuse_filehandle *fufh) +{ + int op = FUSE_FSYNC; + struct fuse_fsync_in *ffsi; + struct fuse_dispatcher fdi; + + fuse_trace_printf_func(); + + if (vnode_isdir(vp)) { + op = FUSE_FSYNCDIR; + } + fdisp_init(&fdi, sizeof(*ffsi)); + fdisp_make_vp(&fdi, op, vp, td, cred); + ffsi = fdi.indata; + ffsi->fh = fufh->fh_id; + + ffsi->fsync_flags = 1; /* datasync */ + + fuse_insert_callback(fdi.tick, fuse_internal_fsync_callback); + fuse_insert_message(fdi.tick); + + fdisp_destroy(&fdi); + + return 0; + +} + +/* readdir */ + +int +fuse_internal_readdir(struct vnode *vp, + struct uio *uio, + struct fuse_filehandle *fufh, + struct fuse_iov *cookediov) +{ + int err = 0; + struct fuse_dispatcher fdi; + struct fuse_read_in *fri; + + if (uio_resid(uio) == 0) { + return 0; + } + fdisp_init(&fdi, 0); + + /* + * Note that we DO NOT have a UIO_SYSSPACE here (so no need for p2p + * I/O). + */ + + while (uio_resid(uio) > 0) { + + fdi.iosize = sizeof(*fri); + fdisp_make_vp(&fdi, FUSE_READDIR, vp, NULL, NULL); + + fri = fdi.indata; + fri->fh = fufh->fh_id; + fri->offset = uio_offset(uio); + fri->size = min(uio_resid(uio), FUSE_DEFAULT_IOSIZE); + /* mp->max_read */ + + if ((err = fdisp_wait_answ(&fdi))) { + break; + } + if ((err = fuse_internal_readdir_processdata(uio, fri->size, fdi.answ, + fdi.iosize, cookediov))) { + break; + } + } + + fdisp_destroy(&fdi); + return ((err == -1) ? 0 : err); +} + +int +fuse_internal_readdir_processdata(struct uio *uio, + size_t reqsize, + void *buf, + size_t bufsize, + void *param) +{ + int err = 0; + int cou = 0; + int bytesavail; + size_t freclen; + + struct dirent *de; + struct fuse_dirent *fudge; + struct fuse_iov *cookediov = param; + + if (bufsize < FUSE_NAME_OFFSET) { + return -1; + } + for (;;) { + + if (bufsize < FUSE_NAME_OFFSET) { + err = -1; + break; + } + fudge = (struct fuse_dirent *)buf; + freclen = FUSE_DIRENT_SIZE(fudge); + + cou++; + + if (bufsize < freclen) { + err = ((cou == 1) ? -1 : 0); + break; + } +#ifdef ZERO_PAD_INCOMPLETE_BUFS + if (isbzero(buf, FUSE_NAME_OFFSET)) { + err = -1; + break; + } +#endif + + if (!fudge->namelen || fudge->namelen > MAXNAMLEN) { + err = EINVAL; + break; + } + bytesavail = GENERIC_DIRSIZ((struct pseudo_dirent *) + &fudge->namelen); + + if (bytesavail > uio_resid(uio)) { + err = -1; + break; + } + fiov_refresh(cookediov); + fiov_adjust(cookediov, bytesavail); + + de = (struct dirent *)cookediov->base; + de->d_fileno = fudge->ino; /* XXX: truncation */ + de->d_reclen = bytesavail; + de->d_type = fudge->type; + de->d_namlen = fudge->namelen; + memcpy((char *)cookediov->base + sizeof(struct dirent) - + MAXNAMLEN - 1, + (char *)buf + FUSE_NAME_OFFSET, fudge->namelen); + ((char *)cookediov->base)[bytesavail] = '\0'; + + err = uiomove(cookediov->base, cookediov->len, uio); + if (err) { + break; + } + buf = (char *)buf + freclen; + bufsize -= freclen; + uio_setoffset(uio, fudge->off); + } + + return err; +} + +/* remove */ + +#ifdef XXXIP +static int +fuse_internal_remove_callback(struct vnode *vp, void *cargs) +{ + struct vattr *vap; + uint64_t target_nlink; + + vap = VTOVA(vp); + + target_nlink = *(uint64_t *)cargs; + + /* somewhat lame "heuristics", but you got better ideas? */ + if ((vap->va_nlink == target_nlink) && vnode_isreg(vp)) { + fuse_invalidate_attr(vp); + } + return 0; +} + +#endif + +#define INVALIDATE_CACHED_VATTRS_UPON_UNLINK 1 +int +fuse_internal_remove(struct vnode *dvp, + struct vnode *vp, + struct componentname *cnp, + enum fuse_opcode op) +{ + struct fuse_dispatcher fdi; + + struct vattr *vap = VTOVA(vp); + +#if INVALIDATE_CACHED_VATTRS_UPON_UNLINK + int need_invalidate = 0; + uint64_t target_nlink = 0; + +#endif + int err = 0; + + debug_printf("dvp=%p, cnp=%p, op=%d\n", vp, cnp, op); + + fdisp_init(&fdi, cnp->cn_namelen + 1); + fdisp_make_vp(&fdi, op, dvp, cnp->cn_thread, cnp->cn_cred); + + memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen); + ((char *)fdi.indata)[cnp->cn_namelen] = '\0'; + +#if INVALIDATE_CACHED_VATTRS_UPON_UNLINK + if (vap->va_nlink > 1) { + need_invalidate = 1; + target_nlink = vap->va_nlink; + } +#endif + + err = fdisp_wait_answ(&fdi); + fdisp_destroy(&fdi); + + fuse_invalidate_attr(dvp); + fuse_invalidate_attr(vp); + +#ifdef XXXIP + /* + * XXX: INVALIDATE_CACHED_VATTRS_UPON_UNLINK + * + * Consider the case where vap->va_nlink > 1 for the entity being + * removed. In our world, other in-memory vnodes that share a link + * count each with this one may not know right way that this one just + * got deleted. We should let them know, say, through a vnode_iterate() + * here and a callback that does fuse_invalidate_attr(vp) on each + * relevant vnode. + */ + if (need_invalidate && !err) { + vnode_iterate(vnode_mount(vp), 0, fuse_internal_remove_callback, + (void *)&target_nlink); + } +#endif + + return err; +} + +/* rename */ + +int +fuse_internal_rename(struct vnode *fdvp, + struct componentname *fcnp, + struct vnode *tdvp, + struct componentname *tcnp) +{ + struct fuse_dispatcher fdi; + struct fuse_rename_in *fri; + int err = 0; + + fdisp_init(&fdi, sizeof(*fri) + fcnp->cn_namelen + tcnp->cn_namelen + 2); + fdisp_make_vp(&fdi, FUSE_RENAME, fdvp, tcnp->cn_thread, tcnp->cn_cred); + + fri = fdi.indata; + fri->newdir = VTOI(tdvp); + memcpy((char *)fdi.indata + sizeof(*fri), fcnp->cn_nameptr, + fcnp->cn_namelen); + ((char *)fdi.indata)[sizeof(*fri) + fcnp->cn_namelen] = '\0'; + memcpy((char *)fdi.indata + sizeof(*fri) + fcnp->cn_namelen + 1, + tcnp->cn_nameptr, tcnp->cn_namelen); + ((char *)fdi.indata)[sizeof(*fri) + fcnp->cn_namelen + + tcnp->cn_namelen + 1] = '\0'; + + err = fdisp_wait_answ(&fdi); + fdisp_destroy(&fdi); + + fuse_invalidate_attr(fdvp); + if (tdvp != fdvp) { + fuse_invalidate_attr(tdvp); + } + return err; +} + +/* strategy */ + +/* entity creation */ + +void +fuse_internal_newentry_makerequest(struct mount *mp, + uint64_t dnid, + struct componentname *cnp, + enum fuse_opcode op, + void *buf, + size_t bufsize, + struct fuse_dispatcher *fdip) +{ + debug_printf("fdip=%p\n", fdip); + + fdip->iosize = bufsize + cnp->cn_namelen + 1; + + fdisp_make(fdip, op, mp, dnid, cnp->cn_thread, cnp->cn_cred); + memcpy(fdip->indata, buf, bufsize); + memcpy((char *)fdip->indata + bufsize, cnp->cn_nameptr, cnp->cn_namelen); + ((char *)fdip->indata)[bufsize + cnp->cn_namelen] = '\0'; +} + +int +fuse_internal_newentry_core(struct vnode *dvp, + struct vnode **vpp, + struct componentname *cnp, + enum vtype vtyp, + struct fuse_dispatcher *fdip) +{ + int err = 0; + struct fuse_entry_out *feo; + struct mount *mp = vnode_mount(dvp); + + if ((err = fdisp_wait_answ(fdip))) { + return err; + } + feo = fdip->answ; + + if ((err = fuse_internal_checkentry(feo, vtyp))) { + return err; + } + err = fuse_vnode_get(mp, feo->nodeid, dvp, vpp, cnp, vtyp); + if (err) { + fuse_internal_forget_send(mp, cnp->cn_thread, cnp->cn_cred, + feo->nodeid, 1); + return err; + } + cache_attrs(*vpp, feo); + + return err; +} + +int +fuse_internal_newentry(struct vnode *dvp, + struct vnode **vpp, + struct componentname *cnp, + enum fuse_opcode op, + void *buf, + size_t bufsize, + enum vtype vtype) +{ + int err; + struct fuse_dispatcher fdi; + struct mount *mp = vnode_mount(dvp); + + fdisp_init(&fdi, 0); + fuse_internal_newentry_makerequest(mp, VTOI(dvp), cnp, op, buf, + bufsize, &fdi); + err = fuse_internal_newentry_core(dvp, vpp, cnp, vtype, &fdi); + fdisp_destroy(&fdi); + fuse_invalidate_attr(dvp); + + return err; +} + +/* entity destruction */ + +int +fuse_internal_forget_callback(struct fuse_ticket *ftick, struct uio *uio) +{ + fuse_internal_forget_send(ftick->tk_data->mp, curthread, NULL, + ((struct fuse_in_header *)ftick->tk_ms_fiov.base)->nodeid, 1); + + return 0; +} + +void +fuse_internal_forget_send(struct mount *mp, + struct thread *td, + struct ucred *cred, + uint64_t nodeid, + uint64_t nlookup) +{ + + struct fuse_dispatcher fdi; + struct fuse_forget_in *ffi; + + debug_printf("mp=%p, nodeid=%ju, nlookup=%ju\n", + mp, (uintmax_t)nodeid, (uintmax_t)nlookup); + + /* + * KASSERT(nlookup > 0, ("zero-times forget for vp #%llu", + * (long long unsigned) nodeid)); + */ + + fdisp_init(&fdi, sizeof(*ffi)); + fdisp_make(&fdi, FUSE_FORGET, mp, nodeid, td, cred); + + ffi = fdi.indata; + ffi->nlookup = nlookup; + + fuse_insert_message(fdi.tick); + fdisp_destroy(&fdi); +} + +void +fuse_internal_vnode_disappear(struct vnode *vp) +{ + struct fuse_vnode_data *fvdat = VTOFUD(vp); + + ASSERT_VOP_ELOCKED(vp, "fuse_internal_vnode_disappear"); + fvdat->flag |= FN_REVOKED; + cache_purge(vp); +} + +/* fuse start/stop */ + +int +fuse_internal_init_callback(struct fuse_ticket *tick, struct uio *uio) +{ + int err = 0; + struct fuse_data *data = tick->tk_data; + struct fuse_init_out *fiio; + + if ((err = tick->tk_aw_ohead.error)) { + goto out; + } + if ((err = fticket_pull(tick, uio))) { + goto out; + } + fiio = fticket_resp(tick)->base; + + /* XXX: Do we want to check anything further besides this? */ + if (fiio->major < 7) { + debug_printf("userpace version too low\n"); + err = EPROTONOSUPPORT; + goto out; + } + data->fuse_libabi_major = fiio->major; + data->fuse_libabi_minor = fiio->minor; + + if (fuse_libabi_geq(data, 7, 5)) { + if (fticket_resp(tick)->len == sizeof(struct fuse_init_out)) { + data->max_write = fiio->max_write; + } else { + err = EINVAL; + } + } else { + /* Old fix values */ + data->max_write = 4096; + } + +out: + if (err) { + fdata_set_dead(data); + } + FUSE_LOCK(); + data->dataflags |= FSESS_INITED; + wakeup(&data->ticketer); + FUSE_UNLOCK(); + + return 0; +} + +void +fuse_internal_send_init(struct fuse_data *data, struct thread *td) +{ + struct fuse_init_in *fiii; + struct fuse_dispatcher fdi; + + fdisp_init(&fdi, sizeof(*fiii)); + fdisp_make(&fdi, FUSE_INIT, data->mp, 0, td, NULL); + fiii = fdi.indata; + fiii->major = FUSE_KERNEL_VERSION; + fiii->minor = FUSE_KERNEL_MINOR_VERSION; + fiii->max_readahead = FUSE_DEFAULT_IOSIZE * 16; + fiii->flags = 0; + + fuse_insert_callback(fdi.tick, fuse_internal_init_callback); + fuse_insert_message(fdi.tick); + fdisp_destroy(&fdi); +} + +#ifdef ZERO_PAD_INCOMPLETE_BUFS +static int +isbzero(void *buf, size_t len) +{ + int i; + + for (i = 0; i < len; i++) { + if (((char *)buf)[i]) + return (0); + } + + return (1); +} + +#endif diff --git a/sys/fs/fuse/fuse_internal.h b/sys/fs/fuse/fuse_internal.h new file mode 100644 index 0000000..9cf20e9 --- /dev/null +++ b/sys/fs/fuse/fuse_internal.h @@ -0,0 +1,393 @@ +/* + * Copyright (c) 2007-2009 Google Inc. and Amit Singh + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Copyright (C) 2005 Csaba Henk. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _FUSE_INTERNAL_H_ +#define _FUSE_INTERNAL_H_ + +#include <sys/types.h> +#include <sys/uio.h> +#include <sys/stat.h> +#include <sys/vnode.h> + +#include "fuse_ipc.h" +#include "fuse_node.h" + +static __inline int +vfs_isrdonly(struct mount *mp) +{ + return ((mp->mnt_flag & MNT_RDONLY) != 0 ? 1 : 0); +} + +static __inline struct mount * +vnode_mount(struct vnode *vp) +{ + return (vp->v_mount); +} + +static __inline int +vnode_mountedhere(struct vnode *vp) +{ + return (vp->v_mountedhere != NULL ? 1 : 0); +} + +static __inline enum vtype +vnode_vtype(struct vnode *vp) +{ + return (vp->v_type); +} + +static __inline int +vnode_isvroot(struct vnode *vp) +{ + return ((vp->v_vflag & VV_ROOT) != 0 ? 1 : 0); +} + +static __inline int +vnode_isreg(struct vnode *vp) +{ + return (vp->v_type == VREG ? 1 : 0); +} + +static __inline int +vnode_isdir(struct vnode *vp) +{ + return (vp->v_type == VDIR ? 1 : 0); +} + +static __inline int +vnode_islnk(struct vnode *vp) +{ + return (vp->v_type == VLNK ? 1 : 0); +} + +static __inline ssize_t +uio_resid(struct uio *uio) +{ + return (uio->uio_resid); +} + +static __inline off_t +uio_offset(struct uio *uio) +{ + return (uio->uio_offset); +} + +static __inline void +uio_setoffset(struct uio *uio, off_t offset) +{ + uio->uio_offset = offset; +} + +static __inline void +uio_setresid(struct uio *uio, ssize_t resid) +{ + uio->uio_resid = resid; +} + +/* time */ + +#define fuse_timespec_add(vvp, uvp) \ + do { \ + (vvp)->tv_sec += (uvp)->tv_sec; \ + (vvp)->tv_nsec += (uvp)->tv_nsec; \ + if ((vvp)->tv_nsec >= 1000000000) { \ + (vvp)->tv_sec++; \ + (vvp)->tv_nsec -= 1000000000; \ + } \ + } while (0) + +#define fuse_timespec_cmp(tvp, uvp, cmp) \ + (((tvp)->tv_sec == (uvp)->tv_sec) ? \ + ((tvp)->tv_nsec cmp (uvp)->tv_nsec) : \ + ((tvp)->tv_sec cmp (uvp)->tv_sec)) + +/* miscellaneous */ + +static __inline__ +int +fuse_isdeadfs(struct vnode *vp) +{ + struct fuse_data *data = fuse_get_mpdata(vnode_mount(vp)); + + return (data->dataflags & FSESS_DEAD); +} + +static __inline__ +int +fuse_iosize(struct vnode *vp) +{ + return vp->v_mount->mnt_stat.f_iosize; +} + +/* access */ + +#define FVP_ACCESS_NOOP 0x01 + +#define FACCESS_VA_VALID 0x01 +#define FACCESS_DO_ACCESS 0x02 +#define FACCESS_STICKY 0x04 +#define FACCESS_CHOWN 0x08 +#define FACCESS_NOCHECKSPY 0x10 +#define FACCESS_SETGID 0x12 + +#define FACCESS_XQUERIES FACCESS_STICKY | FACCESS_CHOWN | FACCESS_SETGID + +struct fuse_access_param { + uid_t xuid; + gid_t xgid; + uint32_t facc_flags; +}; + +static __inline int +fuse_match_cred(struct ucred *basecred, struct ucred *usercred) +{ + if (basecred->cr_uid == usercred->cr_uid && + basecred->cr_uid == usercred->cr_ruid && + basecred->cr_uid == usercred->cr_svuid && + basecred->cr_groups[0] == usercred->cr_groups[0] && + basecred->cr_groups[0] == usercred->cr_rgid && + basecred->cr_groups[0] == usercred->cr_svgid) + return 0; + + return EPERM; +} + +int +fuse_internal_access(struct vnode *vp, + mode_t mode, + struct fuse_access_param *facp, + struct thread *td, + struct ucred *cred); + +/* attributes */ + +static __inline +void +fuse_internal_attr_fat2vat(struct mount *mp, + struct fuse_attr *fat, + struct vattr *vap) +{ + DEBUGX(FUSE_DEBUG_INTERNAL, + "node #%ju, mode 0%o\n", (uintmax_t)fat->ino, fat->mode); + + vattr_null(vap); + + vap->va_fsid = mp->mnt_stat.f_fsid.val[0]; + vap->va_fileid = fat->ino; /* XXX cast from 64 bits to 32 */ + vap->va_mode = fat->mode & ~S_IFMT; + vap->va_nlink = fat->nlink; + vap->va_uid = fat->uid; + vap->va_gid = fat->gid; + vap->va_rdev = fat->rdev; + vap->va_size = fat->size; + vap->va_atime.tv_sec = fat->atime; /* XXX on some platforms cast from 64 bits to 32 */ + vap->va_atime.tv_nsec = fat->atimensec; + vap->va_mtime.tv_sec = fat->mtime; + vap->va_mtime.tv_nsec = fat->mtimensec; + vap->va_ctime.tv_sec = fat->ctime; + vap->va_ctime.tv_nsec = fat->ctimensec; + vap->va_blocksize = PAGE_SIZE; + vap->va_type = IFTOVT(fat->mode); + +#if (S_BLKSIZE == 512) + /* Optimize this case */ + vap->va_bytes = fat->blocks << 9; +#else + vap->va_bytes = fat->blocks * S_BLKSIZE; +#endif + + vap->va_flags = 0; +} + + +#define cache_attrs(vp, fuse_out) do { \ + struct timespec uptsp_ ## __func__; \ + \ + VTOFUD(vp)->cached_attrs_valid.tv_sec = (fuse_out)->attr_valid; \ + VTOFUD(vp)->cached_attrs_valid.tv_nsec = (fuse_out)->attr_valid_nsec; \ + nanouptime(&uptsp_ ## __func__); \ + \ + fuse_timespec_add(&VTOFUD(vp)->cached_attrs_valid, &uptsp_ ## __func__); \ + \ + fuse_internal_attr_fat2vat(vnode_mount(vp), &(fuse_out)->attr, VTOVA(vp)); \ +} while (0) + +/* fsync */ + +int +fuse_internal_fsync(struct vnode *vp, + struct thread *td, + struct ucred *cred, + struct fuse_filehandle *fufh); + +int +fuse_internal_fsync_callback(struct fuse_ticket *tick, struct uio *uio); + +/* readdir */ + +struct pseudo_dirent { + uint32_t d_namlen; +}; + +int +fuse_internal_readdir(struct vnode *vp, + struct uio *uio, + struct fuse_filehandle *fufh, + struct fuse_iov *cookediov); + +int +fuse_internal_readdir_processdata(struct uio *uio, + size_t reqsize, + void *buf, + size_t bufsize, + void *param); + +/* remove */ + +int +fuse_internal_remove(struct vnode *dvp, + struct vnode *vp, + struct componentname *cnp, + enum fuse_opcode op); + +/* rename */ + +int +fuse_internal_rename(struct vnode *fdvp, + struct componentname *fcnp, + struct vnode *tdvp, + struct componentname *tcnp); +/* revoke */ + +void +fuse_internal_vnode_disappear(struct vnode *vp); + +/* strategy */ + +/* entity creation */ + +static __inline +int +fuse_internal_checkentry(struct fuse_entry_out *feo, enum vtype vtyp) +{ + DEBUGX(FUSE_DEBUG_INTERNAL, + "feo=%p, vtype=%d\n", feo, vtyp); + + if (vtyp != IFTOVT(feo->attr.mode)) { + DEBUGX(FUSE_DEBUG_INTERNAL, + "EINVAL -- %x != %x\n", vtyp, IFTOVT(feo->attr.mode)); + return EINVAL; + } + + if (feo->nodeid == FUSE_NULL_ID) { + DEBUGX(FUSE_DEBUG_INTERNAL, + "EINVAL -- feo->nodeid is NULL\n"); + return EINVAL; + } + + if (feo->nodeid == FUSE_ROOT_ID) { + DEBUGX(FUSE_DEBUG_INTERNAL, + "EINVAL -- feo->nodeid is FUSE_ROOT_ID\n"); + return EINVAL; + } + + return 0; +} + +int +fuse_internal_newentry(struct vnode *dvp, + struct vnode **vpp, + struct componentname *cnp, + enum fuse_opcode op, + void *buf, + size_t bufsize, + enum vtype vtyp); + +void +fuse_internal_newentry_makerequest(struct mount *mp, + uint64_t dnid, + struct componentname *cnp, + enum fuse_opcode op, + void *buf, + size_t bufsize, + struct fuse_dispatcher *fdip); + +int +fuse_internal_newentry_core(struct vnode *dvp, + struct vnode **vpp, + struct componentname *cnp, + enum vtype vtyp, + struct fuse_dispatcher *fdip); + +/* entity destruction */ + +int +fuse_internal_forget_callback(struct fuse_ticket *tick, struct uio *uio); + +void +fuse_internal_forget_send(struct mount *mp, + struct thread *td, + struct ucred *cred, + uint64_t nodeid, + uint64_t nlookup); + +/* fuse start/stop */ + +int fuse_internal_init_callback(struct fuse_ticket *tick, struct uio *uio); +void fuse_internal_send_init(struct fuse_data *data, struct thread *td); + +#endif /* _FUSE_INTERNAL_H_ */ diff --git a/sys/fs/fuse/fuse_io.c b/sys/fs/fuse/fuse_io.c new file mode 100644 index 0000000..7735329 --- /dev/null +++ b/sys/fs/fuse/fuse_io.c @@ -0,0 +1,811 @@ +/* + * Copyright (c) 2007-2009 Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Copyright (C) 2005 Csaba Henk. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/types.h> +#include <sys/module.h> +#include <sys/systm.h> +#include <sys/errno.h> +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/conf.h> +#include <sys/uio.h> +#include <sys/malloc.h> +#include <sys/queue.h> +#include <sys/lock.h> +#include <sys/sx.h> +#include <sys/mutex.h> +#include <sys/proc.h> +#include <sys/mount.h> +#include <sys/vnode.h> +#include <sys/stat.h> +#include <sys/unistd.h> +#include <sys/filedesc.h> +#include <sys/file.h> +#include <sys/fcntl.h> +#include <sys/bio.h> +#include <sys/buf.h> +#include <sys/sysctl.h> + +#include <vm/vm.h> +#include <vm/vm_extern.h> +#include <vm/pmap.h> +#include <vm/vm_map.h> +#include <vm/vm_page.h> +#include <vm/vm_object.h> +#include <vm/vm_pager.h> +#include <vm/vnode_pager.h> +#include <vm/vm_object.h> + +#include "fuse.h" +#include "fuse_file.h" +#include "fuse_node.h" +#include "fuse_internal.h" +#include "fuse_ipc.h" +#include "fuse_io.h" + +#define FUSE_DEBUG_MODULE IO +#include "fuse_debug.h" + + +static int +fuse_read_directbackend(struct vnode *vp, struct uio *uio, + struct ucred *cred, struct fuse_filehandle *fufh); +static int +fuse_read_biobackend(struct vnode *vp, struct uio *uio, + struct ucred *cred, struct fuse_filehandle *fufh); +static int +fuse_write_directbackend(struct vnode *vp, struct uio *uio, + struct ucred *cred, struct fuse_filehandle *fufh); +static int +fuse_write_biobackend(struct vnode *vp, struct uio *uio, + struct ucred *cred, struct fuse_filehandle *fufh); + +int +fuse_io_dispatch(struct vnode *vp, struct uio *uio, int ioflag, + struct ucred *cred) +{ + struct fuse_filehandle *fufh; + int err, directio; + + MPASS(vp->v_type == VREG); + + err = fuse_filehandle_getrw(vp, + (uio->uio_rw == UIO_READ) ? FUFH_RDONLY : FUFH_WRONLY, &fufh); + if (err) { + printf("FUSE: io dispatch: filehandles are closed\n"); + return err; + } + /* + * Ideally, when the daemon asks for direct io at open time, the + * standard file flag should be set according to this, so that would + * just change the default mode, which later on could be changed via + * fcntl(2). + * But this doesn't work, the O_DIRECT flag gets cleared at some point + * (don't know where). So to make any use of the Fuse direct_io option, + * we hardwire it into the file's private data (similarly to Linux, + * btw.). + */ + directio = (ioflag & IO_DIRECT) || !fsess_opt_datacache(vnode_mount(vp)); + + switch (uio->uio_rw) { + case UIO_READ: + if (directio) { + DEBUG("direct read of vnode %ju via file handle %ju\n", + (uintmax_t)VTOILLU(vp), (uintmax_t)fufh->fh_id); + err = fuse_read_directbackend(vp, uio, cred, fufh); + } else { + DEBUG("buffered read of vnode %ju\n", + (uintmax_t)VTOILLU(vp)); + err = fuse_read_biobackend(vp, uio, cred, fufh); + } + break; + case UIO_WRITE: + if (directio) { + DEBUG("direct write of vnode %ju via file handle %ju\n", + (uintmax_t)VTOILLU(vp), (uintmax_t)fufh->fh_id); + err = fuse_write_directbackend(vp, uio, cred, fufh); + fuse_invalidate_attr(vp); + } else { + DEBUG("buffered write of vnode %ju\n", + (uintmax_t)VTOILLU(vp)); + err = fuse_write_biobackend(vp, uio, cred, fufh); + } + break; + default: + panic("uninterpreted mode passed to fuse_io_dispatch"); + } + + return (err); +} + +static int +fuse_read_biobackend(struct vnode *vp, struct uio *uio, + struct ucred *cred, struct fuse_filehandle *fufh) +{ + struct buf *bp; + daddr_t lbn; + int bcount; + int err = 0, n = 0, on = 0; + off_t filesize; + + const int biosize = fuse_iosize(vp); + + DEBUG("resid=%zx offset=%jx fsize=%jx\n", + uio->uio_resid, uio->uio_offset, VTOFUD(vp)->filesize); + + if (uio->uio_resid == 0) + return (0); + if (uio->uio_offset < 0) + return (EINVAL); + + bcount = MIN(MAXBSIZE, biosize); + filesize = VTOFUD(vp)->filesize; + + do { + if (fuse_isdeadfs(vp)) { + err = ENXIO; + break; + } + lbn = uio->uio_offset / biosize; + on = uio->uio_offset & (biosize - 1); + + DEBUG2G("biosize %d, lbn %d, on %d\n", biosize, (int)lbn, on); + + /* + * Obtain the buffer cache block. Figure out the buffer size + * when we are at EOF. If we are modifying the size of the + * buffer based on an EOF condition we need to hold + * nfs_rslock() through obtaining the buffer to prevent + * a potential writer-appender from messing with n_size. + * Otherwise we may accidently truncate the buffer and + * lose dirty data. + * + * Note that bcount is *not* DEV_BSIZE aligned. + */ + if ((off_t)lbn * biosize >= filesize) { + bcount = 0; + } else if ((off_t)(lbn + 1) * biosize > filesize) { + bcount = filesize - (off_t)lbn *biosize; + } + bp = getblk(vp, lbn, bcount, PCATCH, 0, 0); + + if (!bp) + return (EINTR); + + /* + * If B_CACHE is not set, we must issue the read. If this + * fails, we return an error. + */ + + if ((bp->b_flags & B_CACHE) == 0) { + bp->b_iocmd = BIO_READ; + vfs_busy_pages(bp, 0); + err = fuse_io_strategy(vp, bp); + if (err) { + brelse(bp); + return (err); + } + } + /* + * on is the offset into the current bp. Figure out how many + * bytes we can copy out of the bp. Note that bcount is + * NOT DEV_BSIZE aligned. + * + * Then figure out how many bytes we can copy into the uio. + */ + + n = 0; + if (on < bcount) + n = MIN((unsigned)(bcount - on), uio->uio_resid); + if (n > 0) { + DEBUG2G("feeding buffeater with %d bytes of buffer %p," + " saying %d was asked for\n", + n, bp->b_data + on, n + (int)bp->b_resid); + err = uiomove(bp->b_data + on, n, uio); + } + brelse(bp); + DEBUG2G("end of turn, err %d, uio->uio_resid %zd, n %d\n", + err, uio->uio_resid, n); + } while (err == 0 && uio->uio_resid > 0 && n > 0); + + return (err); +} + +static int +fuse_read_directbackend(struct vnode *vp, struct uio *uio, + struct ucred *cred, struct fuse_filehandle *fufh) +{ + struct fuse_dispatcher fdi; + struct fuse_read_in *fri; + int err = 0; + + if (uio->uio_resid == 0) + return (0); + + fdisp_init(&fdi, 0); + + /* + * XXX In "normal" case we use an intermediate kernel buffer for + * transmitting data from daemon's context to ours. Eventually, we should + * get rid of this. Anyway, if the target uio lives in sysspace (we are + * called from pageops), and the input data doesn't need kernel-side + * processing (we are not called from readdir) we can already invoke + * an optimized, "peer-to-peer" I/O routine. + */ + while (uio->uio_resid > 0) { + fdi.iosize = sizeof(*fri); + fdisp_make_vp(&fdi, FUSE_READ, vp, uio->uio_td, cred); + fri = fdi.indata; + fri->fh = fufh->fh_id; + fri->offset = uio->uio_offset; + fri->size = MIN(uio->uio_resid, + fuse_get_mpdata(vp->v_mount)->max_read); + + DEBUG2G("fri->fh %ju, fri->offset %ju, fri->size %ju\n", + (uintmax_t)fri->fh, (uintmax_t)fri->offset, + (uintmax_t)fri->size); + + if ((err = fdisp_wait_answ(&fdi))) + goto out; + + DEBUG2G("complete: got iosize=%d, requested fri.size=%zd; " + "resid=%zd offset=%ju\n", + fri->size, fdi.iosize, uio->uio_resid, + (uintmax_t)uio->uio_offset); + + if ((err = uiomove(fdi.answ, MIN(fri->size, fdi.iosize), uio))) + break; + if (fdi.iosize < fri->size) + break; + } + +out: + fdisp_destroy(&fdi); + return (err); +} + +static int +fuse_write_directbackend(struct vnode *vp, struct uio *uio, + struct ucred *cred, struct fuse_filehandle *fufh) +{ + struct fuse_vnode_data *fvdat = VTOFUD(vp); + struct fuse_write_in *fwi; + struct fuse_dispatcher fdi; + size_t chunksize; + int diff; + int err = 0; + + if (!uio->uio_resid) + return (0); + + fdisp_init(&fdi, 0); + + while (uio->uio_resid > 0) { + chunksize = MIN(uio->uio_resid, + fuse_get_mpdata(vp->v_mount)->max_write); + + fdi.iosize = sizeof(*fwi) + chunksize; + fdisp_make_vp(&fdi, FUSE_WRITE, vp, uio->uio_td, cred); + + fwi = fdi.indata; + fwi->fh = fufh->fh_id; + fwi->offset = uio->uio_offset; + fwi->size = chunksize; + + if ((err = uiomove((char *)fdi.indata + sizeof(*fwi), + chunksize, uio))) + break; + + if ((err = fdisp_wait_answ(&fdi))) + break; + + diff = chunksize - ((struct fuse_write_out *)fdi.answ)->size; + if (diff < 0) { + err = EINVAL; + break; + } + uio->uio_resid += diff; + uio->uio_offset -= diff; + if (uio->uio_offset > fvdat->filesize) + fuse_vnode_setsize(vp, cred, uio->uio_offset); + } + + fdisp_destroy(&fdi); + + return (err); +} + +static int +fuse_write_biobackend(struct vnode *vp, struct uio *uio, + struct ucred *cred, struct fuse_filehandle *fufh) +{ + struct fuse_vnode_data *fvdat = VTOFUD(vp); + struct buf *bp; + daddr_t lbn; + int bcount; + int n, on, err = 0; + + const int biosize = fuse_iosize(vp); + + KASSERT(uio->uio_rw == UIO_WRITE, ("ncl_write mode")); + DEBUG("resid=%zx offset=%jx fsize=%jx\n", + uio->uio_resid, uio->uio_offset, fvdat->filesize); + if (vp->v_type != VREG) + return (EIO); + if (uio->uio_offset < 0) + return (EINVAL); + if (uio->uio_resid == 0) + return (0); + + /* + * Find all of this file's B_NEEDCOMMIT buffers. If our writes + * would exceed the local maximum per-file write commit size when + * combined with those, we must decide whether to flush, + * go synchronous, or return err. We don't bother checking + * IO_UNIT -- we just make all writes atomic anyway, as there's + * no point optimizing for something that really won't ever happen. + */ + do { + if (fuse_isdeadfs(vp)) { + err = ENXIO; + break; + } + lbn = uio->uio_offset / biosize; + on = uio->uio_offset & (biosize - 1); + n = MIN((unsigned)(biosize - on), uio->uio_resid); + + DEBUG2G("lbn %ju, on %d, n %d, uio offset %ju, uio resid %zd\n", + (uintmax_t)lbn, on, n, + (uintmax_t)uio->uio_offset, uio->uio_resid); + +again: + /* + * Handle direct append and file extension cases, calculate + * unaligned buffer size. + */ + if (uio->uio_offset == fvdat->filesize && n) { + /* + * Get the buffer (in its pre-append state to maintain + * B_CACHE if it was previously set). Resize the + * nfsnode after we have locked the buffer to prevent + * readers from reading garbage. + */ + bcount = on; + DEBUG("getting block from OS, bcount %d\n", bcount); + bp = getblk(vp, lbn, bcount, PCATCH, 0, 0); + + if (bp != NULL) { + long save; + + err = fuse_vnode_setsize(vp, cred, + uio->uio_offset + n); + if (err) { + brelse(bp); + break; + } + save = bp->b_flags & B_CACHE; + bcount += n; + allocbuf(bp, bcount); + bp->b_flags |= save; + } + } else { + /* + * Obtain the locked cache block first, and then + * adjust the file's size as appropriate. + */ + bcount = on + n; + if ((off_t)lbn * biosize + bcount < fvdat->filesize) { + if ((off_t)(lbn + 1) * biosize < fvdat->filesize) + bcount = biosize; + else + bcount = fvdat->filesize - + (off_t)lbn *biosize; + } + DEBUG("getting block from OS, bcount %d\n", bcount); + bp = getblk(vp, lbn, bcount, PCATCH, 0, 0); + if (bp && uio->uio_offset + n > fvdat->filesize) { + err = fuse_vnode_setsize(vp, cred, + uio->uio_offset + n); + if (err) { + brelse(bp); + break; + } + } + } + + if (!bp) { + err = EINTR; + break; + } + /* + * Issue a READ if B_CACHE is not set. In special-append + * mode, B_CACHE is based on the buffer prior to the write + * op and is typically set, avoiding the read. If a read + * is required in special append mode, the server will + * probably send us a short-read since we extended the file + * on our end, resulting in b_resid == 0 and, thusly, + * B_CACHE getting set. + * + * We can also avoid issuing the read if the write covers + * the entire buffer. We have to make sure the buffer state + * is reasonable in this case since we will not be initiating + * I/O. See the comments in kern/vfs_bio.c's getblk() for + * more information. + * + * B_CACHE may also be set due to the buffer being cached + * normally. + */ + + if (on == 0 && n == bcount) { + bp->b_flags |= B_CACHE; + bp->b_flags &= ~B_INVAL; + bp->b_ioflags &= ~BIO_ERROR; + } + if ((bp->b_flags & B_CACHE) == 0) { + bp->b_iocmd = BIO_READ; + vfs_busy_pages(bp, 0); + fuse_io_strategy(vp, bp); + if ((err = bp->b_error)) { + brelse(bp); + break; + } + } + if (bp->b_wcred == NOCRED) + bp->b_wcred = crhold(cred); + + /* + * If dirtyend exceeds file size, chop it down. This should + * not normally occur but there is an append race where it + * might occur XXX, so we log it. + * + * If the chopping creates a reverse-indexed or degenerate + * situation with dirtyoff/end, we 0 both of them. + */ + + if (bp->b_dirtyend > bcount) { + DEBUG("FUSE append race @%lx:%d\n", + (long)bp->b_blkno * biosize, + bp->b_dirtyend - bcount); + bp->b_dirtyend = bcount; + } + if (bp->b_dirtyoff >= bp->b_dirtyend) + bp->b_dirtyoff = bp->b_dirtyend = 0; + + /* + * If the new write will leave a contiguous dirty + * area, just update the b_dirtyoff and b_dirtyend, + * otherwise force a write rpc of the old dirty area. + * + * While it is possible to merge discontiguous writes due to + * our having a B_CACHE buffer ( and thus valid read data + * for the hole), we don't because it could lead to + * significant cache coherency problems with multiple clients, + * especially if locking is implemented later on. + * + * as an optimization we could theoretically maintain + * a linked list of discontinuous areas, but we would still + * have to commit them separately so there isn't much + * advantage to it except perhaps a bit of asynchronization. + */ + + if (bp->b_dirtyend > 0 && + (on > bp->b_dirtyend || (on + n) < bp->b_dirtyoff)) { + /* + * Yes, we mean it. Write out everything to "storage" + * immediatly, without hesitation. (Apart from other + * reasons: the only way to know if a write is valid + * if its actually written out.) + */ + bwrite(bp); + if (bp->b_error == EINTR) { + err = EINTR; + break; + } + goto again; + } + err = uiomove((char *)bp->b_data + on, n, uio); + + /* + * Since this block is being modified, it must be written + * again and not just committed. Since write clustering does + * not work for the stage 1 data write, only the stage 2 + * commit rpc, we have to clear B_CLUSTEROK as well. + */ + bp->b_flags &= ~(B_NEEDCOMMIT | B_CLUSTEROK); + + if (err) { + bp->b_ioflags |= BIO_ERROR; + bp->b_error = err; + brelse(bp); + break; + } + /* + * Only update dirtyoff/dirtyend if not a degenerate + * condition. + */ + if (n) { + if (bp->b_dirtyend > 0) { + bp->b_dirtyoff = MIN(on, bp->b_dirtyoff); + bp->b_dirtyend = MAX((on + n), bp->b_dirtyend); + } else { + bp->b_dirtyoff = on; + bp->b_dirtyend = on + n; + } + vfs_bio_set_valid(bp, on, n); + } + err = bwrite(bp); + if (err) + break; + } while (uio->uio_resid > 0 && n > 0); + + if (fuse_sync_resize && (fvdat->flag & FN_SIZECHANGE) != 0) + fuse_vnode_savesize(vp, cred); + + return (err); +} + +int +fuse_io_strategy(struct vnode *vp, struct buf *bp) +{ + struct fuse_filehandle *fufh; + struct fuse_vnode_data *fvdat = VTOFUD(vp); + struct ucred *cred; + struct uio *uiop; + struct uio uio; + struct iovec io; + int error = 0; + + const int biosize = fuse_iosize(vp); + + MPASS(vp->v_type == VREG); + MPASS(bp->b_iocmd == BIO_READ || bp->b_iocmd == BIO_WRITE); + DEBUG("inode=%ju offset=%jd resid=%ld\n", + (uintmax_t)VTOI(vp), (intmax_t)(((off_t)bp->b_blkno) * biosize), + bp->b_bcount); + + error = fuse_filehandle_getrw(vp, + (bp->b_iocmd == BIO_READ) ? FUFH_RDONLY : FUFH_WRONLY, &fufh); + if (error) { + printf("FUSE: strategy: filehandles are closed\n"); + bp->b_ioflags |= BIO_ERROR; + bp->b_error = error; + return (error); + } + cred = bp->b_iocmd == BIO_READ ? bp->b_rcred : bp->b_wcred; + + uiop = &uio; + uiop->uio_iov = &io; + uiop->uio_iovcnt = 1; + uiop->uio_segflg = UIO_SYSSPACE; + uiop->uio_td = curthread; + + /* + * clear BIO_ERROR and B_INVAL state prior to initiating the I/O. We + * do this here so we do not have to do it in all the code that + * calls us. + */ + bp->b_flags &= ~B_INVAL; + bp->b_ioflags &= ~BIO_ERROR; + + KASSERT(!(bp->b_flags & B_DONE), + ("fuse_io_strategy: bp %p already marked done", bp)); + if (bp->b_iocmd == BIO_READ) { + io.iov_len = uiop->uio_resid = bp->b_bcount; + io.iov_base = bp->b_data; + uiop->uio_rw = UIO_READ; + + uiop->uio_offset = ((off_t)bp->b_blkno) * biosize; + error = fuse_read_directbackend(vp, uiop, cred, fufh); + + if ((!error && uiop->uio_resid) || + (fsess_opt_brokenio(vnode_mount(vp)) && error == EIO && + uiop->uio_offset < fvdat->filesize && fvdat->filesize > 0 && + uiop->uio_offset >= fvdat->cached_attrs.va_size)) { + /* + * If we had a short read with no error, we must have + * hit a file hole. We should zero-fill the remainder. + * This can also occur if the server hits the file EOF. + * + * Holes used to be able to occur due to pending + * writes, but that is not possible any longer. + */ + int nread = bp->b_bcount - uiop->uio_resid; + int left = uiop->uio_resid; + + if (error != 0) { + printf("FUSE: Fix broken io: offset %ju, " + " resid %zd, file size %ju/%ju\n", + (uintmax_t)uiop->uio_offset, + uiop->uio_resid, fvdat->filesize, + fvdat->cached_attrs.va_size); + error = 0; + } + if (left > 0) + bzero((char *)bp->b_data + nread, left); + uiop->uio_resid = 0; + } + if (error) { + bp->b_ioflags |= BIO_ERROR; + bp->b_error = error; + } + } else { + /* + * If we only need to commit, try to commit + */ + if (bp->b_flags & B_NEEDCOMMIT) { + DEBUG("write: B_NEEDCOMMIT flags set\n"); + } + /* + * Setup for actual write + */ + if ((off_t)bp->b_blkno * biosize + bp->b_dirtyend > + fvdat->filesize) + bp->b_dirtyend = fvdat->filesize - + (off_t)bp->b_blkno * biosize; + + if (bp->b_dirtyend > bp->b_dirtyoff) { + io.iov_len = uiop->uio_resid = bp->b_dirtyend + - bp->b_dirtyoff; + uiop->uio_offset = (off_t)bp->b_blkno * biosize + + bp->b_dirtyoff; + io.iov_base = (char *)bp->b_data + bp->b_dirtyoff; + uiop->uio_rw = UIO_WRITE; + + error = fuse_write_directbackend(vp, uiop, cred, fufh); + + if (error == EINTR || error == ETIMEDOUT + || (!error && (bp->b_flags & B_NEEDCOMMIT))) { + + bp->b_flags &= ~(B_INVAL | B_NOCACHE); + if ((bp->b_flags & B_PAGING) == 0) { + bdirty(bp); + bp->b_flags &= ~B_DONE; + } + if ((error == EINTR || error == ETIMEDOUT) && + (bp->b_flags & B_ASYNC) == 0) + bp->b_flags |= B_EINTR; + } else { + if (error) { + bp->b_ioflags |= BIO_ERROR; + bp->b_flags |= B_INVAL; + bp->b_error = error; + } + bp->b_dirtyoff = bp->b_dirtyend = 0; + } + } else { + bp->b_resid = 0; + bufdone(bp); + return (0); + } + } + bp->b_resid = uiop->uio_resid; + bufdone(bp); + return (error); +} + +int +fuse_io_flushbuf(struct vnode *vp, int waitfor, struct thread *td) +{ + struct vop_fsync_args a = { + .a_vp = vp, + .a_waitfor = waitfor, + .a_td = td, + }; + + return (vop_stdfsync(&a)); +} + +/* + * Flush and invalidate all dirty buffers. If another process is already + * doing the flush, just wait for completion. + */ +int +fuse_io_invalbuf(struct vnode *vp, struct thread *td) +{ + struct fuse_vnode_data *fvdat = VTOFUD(vp); + int error = 0; + + if (vp->v_iflag & VI_DOOMED) + return 0; + + ASSERT_VOP_ELOCKED(vp, "fuse_io_invalbuf"); + + while (fvdat->flag & FN_FLUSHINPROG) { + struct proc *p = td->td_proc; + + if (vp->v_mount->mnt_kern_flag & MNTK_UNMOUNTF) + return EIO; + fvdat->flag |= FN_FLUSHWANT; + tsleep(&fvdat->flag, PRIBIO + 2, "fusevinv", 2 * hz); + error = 0; + if (p != NULL) { + PROC_LOCK(p); + if (SIGNOTEMPTY(p->p_siglist) || + SIGNOTEMPTY(td->td_siglist)) + error = EINTR; + PROC_UNLOCK(p); + } + if (error == EINTR) + return EINTR; + } + fvdat->flag |= FN_FLUSHINPROG; + + if (vp->v_bufobj.bo_object != NULL) { + VM_OBJECT_LOCK(vp->v_bufobj.bo_object); + vm_object_page_clean(vp->v_bufobj.bo_object, 0, 0, OBJPC_SYNC); + VM_OBJECT_UNLOCK(vp->v_bufobj.bo_object); + } + error = vinvalbuf(vp, V_SAVE, PCATCH, 0); + while (error) { + if (error == ERESTART || error == EINTR) { + fvdat->flag &= ~FN_FLUSHINPROG; + if (fvdat->flag & FN_FLUSHWANT) { + fvdat->flag &= ~FN_FLUSHWANT; + wakeup(&fvdat->flag); + } + return EINTR; + } + error = vinvalbuf(vp, V_SAVE, PCATCH, 0); + } + fvdat->flag &= ~FN_FLUSHINPROG; + if (fvdat->flag & FN_FLUSHWANT) { + fvdat->flag &= ~FN_FLUSHWANT; + wakeup(&fvdat->flag); + } + return (error); +} diff --git a/sys/fs/fuse/fuse_io.h b/sys/fs/fuse/fuse_io.h new file mode 100644 index 0000000..b56e14a --- /dev/null +++ b/sys/fs/fuse/fuse_io.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2007-2009 Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Copyright (C) 2005 Csaba Henk. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _FUSE_IO_H_ +#define _FUSE_IO_H_ + +int fuse_io_dispatch(struct vnode *vp, struct uio *uio, int ioflag, + struct ucred *cred); +int fuse_io_strategy(struct vnode *vp, struct buf *bp); +int fuse_io_flushbuf(struct vnode *vp, int waitfor, struct thread *td); +int fuse_io_invalbuf(struct vnode *vp, struct thread *td); + +#endif /* _FUSE_IO_H_ */ diff --git a/sys/fs/fuse/fuse_ipc.c b/sys/fs/fuse/fuse_ipc.c new file mode 100644 index 0000000..52d69b5 --- /dev/null +++ b/sys/fs/fuse/fuse_ipc.c @@ -0,0 +1,904 @@ +/* + * Copyright (c) 2007-2009 Google Inc. and Amit Singh + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Copyright (C) 2005 Csaba Henk. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/types.h> +#include <sys/module.h> +#include <sys/systm.h> +#include <sys/errno.h> +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/conf.h> +#include <sys/uio.h> +#include <sys/malloc.h> +#include <sys/queue.h> +#include <sys/lock.h> +#include <sys/sx.h> +#include <sys/mutex.h> +#include <sys/proc.h> +#include <sys/mount.h> +#include <sys/vnode.h> +#include <sys/signalvar.h> +#include <sys/syscallsubr.h> +#include <sys/sysctl.h> +#include <vm/uma.h> + +#include "fuse.h" +#include "fuse_node.h" +#include "fuse_ipc.h" +#include "fuse_internal.h" + +#define FUSE_DEBUG_MODULE IPC +#include "fuse_debug.h" + +static struct fuse_ticket *fticket_alloc(struct fuse_data *data); +static void fticket_refresh(struct fuse_ticket *ftick); +static void fticket_destroy(struct fuse_ticket *ftick); +static int fticket_wait_answer(struct fuse_ticket *ftick); +static __inline__ int +fticket_aw_pull_uio(struct fuse_ticket *ftick, + struct uio *uio); + +static int fuse_body_audit(struct fuse_ticket *ftick, size_t blen); +static __inline__ void +fuse_setup_ihead(struct fuse_in_header *ihead, + struct fuse_ticket *ftick, + uint64_t nid, + enum fuse_opcode op, + size_t blen, + pid_t pid, + struct ucred *cred); + +static fuse_handler_t fuse_standard_handler; + +SYSCTL_NODE(_vfs, OID_AUTO, fuse, CTLFLAG_RW, 0, "FUSE tunables"); +SYSCTL_STRING(_vfs_fuse, OID_AUTO, version, CTLFLAG_RD, + FUSE_FREEBSD_VERSION, 0, "fuse-freebsd version"); +static int fuse_ticket_count = 0; + +SYSCTL_INT(_vfs_fuse, OID_AUTO, ticket_count, CTLFLAG_RW, + &fuse_ticket_count, 0, "number of allocated tickets"); +static long fuse_iov_permanent_bufsize = 1 << 19; + +SYSCTL_LONG(_vfs_fuse, OID_AUTO, iov_permanent_bufsize, CTLFLAG_RW, + &fuse_iov_permanent_bufsize, 0, + "limit for permanently stored buffer size for fuse_iovs"); +static int fuse_iov_credit = 16; + +SYSCTL_INT(_vfs_fuse, OID_AUTO, iov_credit, CTLFLAG_RW, + &fuse_iov_credit, 0, + "how many times is an oversized fuse_iov tolerated"); + +MALLOC_DEFINE(M_FUSEMSG, "fuse_msgbuf", "fuse message buffer"); +static uma_zone_t ticket_zone; + +static void +fuse_block_sigs(sigset_t *oldset) +{ + sigset_t newset; + + SIGFILLSET(newset); + SIGDELSET(newset, SIGKILL); + if (kern_sigprocmask(curthread, SIG_BLOCK, &newset, oldset, 0)) + panic("%s: Invalid operation for kern_sigprocmask()", + __func__); +} + +static void +fuse_restore_sigs(sigset_t *oldset) +{ + + if (kern_sigprocmask(curthread, SIG_SETMASK, oldset, NULL, 0)) + panic("%s: Invalid operation for kern_sigprocmask()", + __func__); +} + +void +fiov_init(struct fuse_iov *fiov, size_t size) +{ + uint32_t msize = FU_AT_LEAST(size); + + debug_printf("fiov=%p, size=%zd\n", fiov, size); + + fiov->len = 0; + + fiov->base = malloc(msize, M_FUSEMSG, M_WAITOK | M_ZERO); + + fiov->allocated_size = msize; + fiov->credit = fuse_iov_credit; +} + +void +fiov_teardown(struct fuse_iov *fiov) +{ + debug_printf("fiov=%p\n", fiov); + + MPASS(fiov->base != NULL); + free(fiov->base, M_FUSEMSG); +} + +void +fiov_adjust(struct fuse_iov *fiov, size_t size) +{ + debug_printf("fiov=%p, size=%zd\n", fiov, size); + + if (fiov->allocated_size < size || + (fuse_iov_permanent_bufsize >= 0 && + fiov->allocated_size - size > fuse_iov_permanent_bufsize && + --fiov->credit < 0)) { + + fiov->base = realloc(fiov->base, FU_AT_LEAST(size), M_FUSEMSG, + M_WAITOK | M_ZERO); + if (!fiov->base) { + panic("FUSE: realloc failed"); + } + fiov->allocated_size = FU_AT_LEAST(size); + fiov->credit = fuse_iov_credit; + } + fiov->len = size; +} + +void +fiov_refresh(struct fuse_iov *fiov) +{ + debug_printf("fiov=%p\n", fiov); + + bzero(fiov->base, fiov->len); + fiov_adjust(fiov, 0); +} + +static int +fticket_ctor(void *mem, int size, void *arg, int flags) +{ + struct fuse_ticket *ftick = mem; + struct fuse_data *data = arg; + + debug_printf("ftick=%p data=%p\n", ftick, data); + + FUSE_ASSERT_MS_DONE(ftick); + FUSE_ASSERT_AW_DONE(ftick); + + ftick->tk_data = data; + + if (ftick->tk_unique != 0) + fticket_refresh(ftick); + + /* May be truncated to 32 bits */ + ftick->tk_unique = atomic_fetchadd_long(&data->ticketer, 1); + if (ftick->tk_unique == 0) + ftick->tk_unique = atomic_fetchadd_long(&data->ticketer, 1); + + refcount_init(&ftick->tk_refcount, 1); + atomic_add_acq_int(&fuse_ticket_count, 1); + + return 0; +} + +static void +fticket_dtor(void *mem, int size, void *arg) +{ + struct fuse_ticket *ftick = mem; + + debug_printf("ftick=%p\n", ftick); + + FUSE_ASSERT_MS_DONE(ftick); + FUSE_ASSERT_AW_DONE(ftick); + + atomic_subtract_acq_int(&fuse_ticket_count, 1); +} + +static int +fticket_init(void *mem, int size, int flags) +{ + struct fuse_ticket *ftick = mem; + + DEBUG("ftick=%p\n", ftick); + + bzero(ftick, sizeof(struct fuse_ticket)); + + fiov_init(&ftick->tk_ms_fiov, sizeof(struct fuse_in_header)); + ftick->tk_ms_type = FT_M_FIOV; + + mtx_init(&ftick->tk_aw_mtx, "fuse answer delivery mutex", NULL, MTX_DEF); + fiov_init(&ftick->tk_aw_fiov, 0); + ftick->tk_aw_type = FT_A_FIOV; + + return 0; +} + +static void +fticket_fini(void *mem, int size) +{ + struct fuse_ticket *ftick = mem; + + DEBUG("ftick=%p\n", ftick); + + fiov_teardown(&ftick->tk_ms_fiov); + fiov_teardown(&ftick->tk_aw_fiov); + mtx_destroy(&ftick->tk_aw_mtx); +} + +static __inline struct fuse_ticket * +fticket_alloc(struct fuse_data *data) +{ + return uma_zalloc_arg(ticket_zone, data, M_WAITOK); +} + +static __inline void +fticket_destroy(struct fuse_ticket *ftick) +{ + return uma_zfree(ticket_zone, ftick); +} + +static __inline__ +void +fticket_refresh(struct fuse_ticket *ftick) +{ + debug_printf("ftick=%p\n", ftick); + + FUSE_ASSERT_MS_DONE(ftick); + FUSE_ASSERT_AW_DONE(ftick); + + fiov_refresh(&ftick->tk_ms_fiov); + ftick->tk_ms_bufdata = NULL; + ftick->tk_ms_bufsize = 0; + ftick->tk_ms_type = FT_M_FIOV; + + bzero(&ftick->tk_aw_ohead, sizeof(struct fuse_out_header)); + + fiov_refresh(&ftick->tk_aw_fiov); + ftick->tk_aw_errno = 0; + ftick->tk_aw_bufdata = NULL; + ftick->tk_aw_bufsize = 0; + ftick->tk_aw_type = FT_A_FIOV; + + ftick->tk_flag = 0; +} + +static int +fticket_wait_answer(struct fuse_ticket *ftick) +{ + sigset_t tset; + int err = 0; + struct fuse_data *data; + + debug_printf("ftick=%p\n", ftick); + fuse_lck_mtx_lock(ftick->tk_aw_mtx); + + if (fticket_answered(ftick)) { + goto out; + } + data = ftick->tk_data; + + if (fdata_get_dead(data)) { + err = ENOTCONN; + fticket_set_answered(ftick); + goto out; + } + fuse_block_sigs(&tset); + err = msleep(ftick, &ftick->tk_aw_mtx, PCATCH, "fu_ans", + data->daemon_timeout * hz); + fuse_restore_sigs(&tset); + if (err == EAGAIN) { /* same as EWOULDBLOCK */ +#ifdef XXXIP /* die conditionally */ + if (!fdata_get_dead(data)) { + fdata_set_dead(data); + } +#endif + err = ETIMEDOUT; + fticket_set_answered(ftick); + } +out: + if (!(err || fticket_answered(ftick))) { + debug_printf("FUSE: requester was woken up but still no answer"); + err = ENXIO; + } + fuse_lck_mtx_unlock(ftick->tk_aw_mtx); + + return err; +} + +static __inline__ +int +fticket_aw_pull_uio(struct fuse_ticket *ftick, struct uio *uio) +{ + int err = 0; + size_t len = uio_resid(uio); + + debug_printf("ftick=%p, uio=%p\n", ftick, uio); + + if (len) { + switch (ftick->tk_aw_type) { + case FT_A_FIOV: + fiov_adjust(fticket_resp(ftick), len); + err = uiomove(fticket_resp(ftick)->base, len, uio); + if (err) { + debug_printf("FUSE: FT_A_FIOV: error is %d" + " (%p, %zd, %p)\n", + err, fticket_resp(ftick)->base, + len, uio); + } + break; + + case FT_A_BUF: + ftick->tk_aw_bufsize = len; + err = uiomove(ftick->tk_aw_bufdata, len, uio); + if (err) { + debug_printf("FUSE: FT_A_BUF: error is %d" + " (%p, %zd, %p)\n", + err, ftick->tk_aw_bufdata, len, uio); + } + break; + + default: + panic("FUSE: unknown answer type for ticket %p", ftick); + } + } + return err; +} + +int +fticket_pull(struct fuse_ticket *ftick, struct uio *uio) +{ + int err = 0; + + debug_printf("ftick=%p, uio=%p\n", ftick, uio); + + if (ftick->tk_aw_ohead.error) { + return 0; + } + err = fuse_body_audit(ftick, uio_resid(uio)); + if (!err) { + err = fticket_aw_pull_uio(ftick, uio); + } + return err; +} + +struct fuse_data * +fdata_alloc(struct cdev *fdev, struct ucred *cred) +{ + struct fuse_data *data; + + debug_printf("fdev=%p\n", fdev); + + data = malloc(sizeof(struct fuse_data), M_FUSEMSG, M_WAITOK | M_ZERO); + + data->fdev = fdev; + mtx_init(&data->ms_mtx, "fuse message list mutex", NULL, MTX_DEF); + STAILQ_INIT(&data->ms_head); + mtx_init(&data->aw_mtx, "fuse answer list mutex", NULL, MTX_DEF); + TAILQ_INIT(&data->aw_head); + data->daemoncred = crhold(cred); + data->daemon_timeout = FUSE_DEFAULT_DAEMON_TIMEOUT; + sx_init(&data->rename_lock, "fuse rename lock"); + data->ref = 1; + + return data; +} + +void +fdata_trydestroy(struct fuse_data *data) +{ + DEBUG("data=%p data.mp=%p data.fdev=%p data.flags=%04x\n", + data, data->mp, data->fdev, data->dataflags); + + DEBUG("destroy: data=%p\n", data); + data->ref--; + MPASS(data->ref >= 0); + if (data->ref != 0) + return; + + /* Driving off stage all that stuff thrown at device... */ + mtx_destroy(&data->ms_mtx); + mtx_destroy(&data->aw_mtx); + sx_destroy(&data->rename_lock); + + crfree(data->daemoncred); + + free(data, M_FUSEMSG); +} + +void +fdata_set_dead(struct fuse_data *data) +{ + debug_printf("data=%p\n", data); + + FUSE_LOCK(); + if (fdata_get_dead(data)) { + FUSE_UNLOCK(); + return; + } + fuse_lck_mtx_lock(data->ms_mtx); + data->dataflags |= FSESS_DEAD; + wakeup_one(data); + selwakeuppri(&data->ks_rsel, PZERO + 1); + wakeup(&data->ticketer); + fuse_lck_mtx_unlock(data->ms_mtx); + FUSE_UNLOCK(); +} + +struct fuse_ticket * +fuse_ticket_fetch(struct fuse_data *data) +{ + int err = 0; + struct fuse_ticket *ftick; + + debug_printf("data=%p\n", data); + + ftick = fticket_alloc(data); + + if (!(data->dataflags & FSESS_INITED)) { + /* Sleep until get answer for INIT messsage */ + FUSE_LOCK(); + if (!(data->dataflags & FSESS_INITED) && data->ticketer > 2) { + err = msleep(&data->ticketer, &fuse_mtx, PCATCH | PDROP, + "fu_ini", 0); + if (err) + fdata_set_dead(data); + } else + FUSE_UNLOCK(); + } + return ftick; +} + +int +fuse_ticket_drop(struct fuse_ticket *ftick) +{ + int die; + + die = refcount_release(&ftick->tk_refcount); + debug_printf("ftick=%p refcount=%d\n", ftick, ftick->tk_refcount); + if (die) + fticket_destroy(ftick); + + return die; +} + +void +fuse_insert_callback(struct fuse_ticket *ftick, fuse_handler_t * handler) +{ + debug_printf("ftick=%p, handler=%p data=%p\n", ftick, ftick->tk_data, + handler); + + if (fdata_get_dead(ftick->tk_data)) { + return; + } + ftick->tk_aw_handler = handler; + + fuse_lck_mtx_lock(ftick->tk_data->aw_mtx); + fuse_aw_push(ftick); + fuse_lck_mtx_unlock(ftick->tk_data->aw_mtx); +} + +void +fuse_insert_message(struct fuse_ticket *ftick) +{ + debug_printf("ftick=%p\n", ftick); + + if (ftick->tk_flag & FT_DIRTY) { + panic("FUSE: ticket reused without being refreshed"); + } + ftick->tk_flag |= FT_DIRTY; + + if (fdata_get_dead(ftick->tk_data)) { + return; + } + fuse_lck_mtx_lock(ftick->tk_data->ms_mtx); + fuse_ms_push(ftick); + wakeup_one(ftick->tk_data); + selwakeuppri(&ftick->tk_data->ks_rsel, PZERO + 1); + fuse_lck_mtx_unlock(ftick->tk_data->ms_mtx); +} + +static int +fuse_body_audit(struct fuse_ticket *ftick, size_t blen) +{ + int err = 0; + enum fuse_opcode opcode; + + debug_printf("ftick=%p, blen = %zu\n", ftick, blen); + + opcode = fticket_opcode(ftick); + + switch (opcode) { + case FUSE_LOOKUP: + err = (blen == sizeof(struct fuse_entry_out)) ? 0 : EINVAL; + break; + + case FUSE_FORGET: + panic("FUSE: a handler has been intalled for FUSE_FORGET"); + break; + + case FUSE_GETATTR: + err = (blen == sizeof(struct fuse_attr_out)) ? 0 : EINVAL; + break; + + case FUSE_SETATTR: + err = (blen == sizeof(struct fuse_attr_out)) ? 0 : EINVAL; + break; + + case FUSE_READLINK: + err = (PAGE_SIZE >= blen) ? 0 : EINVAL; + break; + + case FUSE_SYMLINK: + err = (blen == sizeof(struct fuse_entry_out)) ? 0 : EINVAL; + break; + + case FUSE_MKNOD: + err = (blen == sizeof(struct fuse_entry_out)) ? 0 : EINVAL; + break; + + case FUSE_MKDIR: + err = (blen == sizeof(struct fuse_entry_out)) ? 0 : EINVAL; + break; + + case FUSE_UNLINK: + err = (blen == 0) ? 0 : EINVAL; + break; + + case FUSE_RMDIR: + err = (blen == 0) ? 0 : EINVAL; + break; + + case FUSE_RENAME: + err = (blen == 0) ? 0 : EINVAL; + break; + + case FUSE_LINK: + err = (blen == sizeof(struct fuse_entry_out)) ? 0 : EINVAL; + break; + + case FUSE_OPEN: + err = (blen == sizeof(struct fuse_open_out)) ? 0 : EINVAL; + break; + + case FUSE_READ: + err = (((struct fuse_read_in *)( + (char *)ftick->tk_ms_fiov.base + + sizeof(struct fuse_in_header) + ))->size >= blen) ? 0 : EINVAL; + break; + + case FUSE_WRITE: + err = (blen == sizeof(struct fuse_write_out)) ? 0 : EINVAL; + break; + + case FUSE_STATFS: + if (fuse_libabi_geq(ftick->tk_data, 7, 4)) { + err = (blen == sizeof(struct fuse_statfs_out)) ? + 0 : EINVAL; + } else { + err = (blen == FUSE_COMPAT_STATFS_SIZE) ? 0 : EINVAL; + } + break; + + case FUSE_RELEASE: + err = (blen == 0) ? 0 : EINVAL; + break; + + case FUSE_FSYNC: + err = (blen == 0) ? 0 : EINVAL; + break; + + case FUSE_SETXATTR: + panic("FUSE_SETXATTR implementor has forgotten to define a" + " response body format check"); + break; + + case FUSE_GETXATTR: + panic("FUSE_GETXATTR implementor has forgotten to define a" + " response body format check"); + break; + + case FUSE_LISTXATTR: + panic("FUSE_LISTXATTR implementor has forgotten to define a" + " response body format check"); + break; + + case FUSE_REMOVEXATTR: + panic("FUSE_REMOVEXATTR implementor has forgotten to define a" + " response body format check"); + break; + + case FUSE_FLUSH: + err = (blen == 0) ? 0 : EINVAL; + break; + + case FUSE_INIT: + if (blen == sizeof(struct fuse_init_out) || blen == 8) { + err = 0; + } else { + err = EINVAL; + } + break; + + case FUSE_OPENDIR: + err = (blen == sizeof(struct fuse_open_out)) ? 0 : EINVAL; + break; + + case FUSE_READDIR: + err = (((struct fuse_read_in *)( + (char *)ftick->tk_ms_fiov.base + + sizeof(struct fuse_in_header) + ))->size >= blen) ? 0 : EINVAL; + break; + + case FUSE_RELEASEDIR: + err = (blen == 0) ? 0 : EINVAL; + break; + + case FUSE_FSYNCDIR: + err = (blen == 0) ? 0 : EINVAL; + break; + + case FUSE_GETLK: + panic("FUSE: no response body format check for FUSE_GETLK"); + break; + + case FUSE_SETLK: + panic("FUSE: no response body format check for FUSE_SETLK"); + break; + + case FUSE_SETLKW: + panic("FUSE: no response body format check for FUSE_SETLKW"); + break; + + case FUSE_ACCESS: + err = (blen == 0) ? 0 : EINVAL; + break; + + case FUSE_CREATE: + err = (blen == sizeof(struct fuse_entry_out) + + sizeof(struct fuse_open_out)) ? 0 : EINVAL; + break; + + case FUSE_DESTROY: + err = (blen == 0) ? 0 : EINVAL; + break; + + default: + panic("FUSE: opcodes out of sync (%d)\n", opcode); + } + + return err; +} + +static void +fuse_setup_ihead(struct fuse_in_header *ihead, + struct fuse_ticket *ftick, + uint64_t nid, + enum fuse_opcode op, + size_t blen, + pid_t pid, + struct ucred *cred) +{ + ihead->len = sizeof(*ihead) + blen; + ihead->unique = ftick->tk_unique; + ihead->nodeid = nid; + ihead->opcode = op; + + debug_printf("ihead=%p, ftick=%p, nid=%ju, op=%d, blen=%zu\n", + ihead, ftick, (uintmax_t)nid, op, blen); + + ihead->pid = pid; + ihead->uid = cred->cr_uid; + ihead->gid = cred->cr_rgid; +} + +/* + * fuse_standard_handler just pulls indata and wakes up pretender. + * Doesn't try to interpret data, that's left for the pretender. + * Though might do a basic size verification before the pull-in takes place + */ + +static int +fuse_standard_handler(struct fuse_ticket *ftick, struct uio *uio) +{ + int err = 0; + + debug_printf("ftick=%p, uio=%p\n", ftick, uio); + + err = fticket_pull(ftick, uio); + + fuse_lck_mtx_lock(ftick->tk_aw_mtx); + + if (!fticket_answered(ftick)) { + fticket_set_answered(ftick); + ftick->tk_aw_errno = err; + wakeup(ftick); + } + fuse_lck_mtx_unlock(ftick->tk_aw_mtx); + + return err; +} + +void +fdisp_make_pid(struct fuse_dispatcher *fdip, + enum fuse_opcode op, + struct mount *mp, + uint64_t nid, + pid_t pid, + struct ucred *cred) +{ + struct fuse_data *data = fuse_get_mpdata(mp); + + debug_printf("fdip=%p, op=%d, mp=%p, nid=%ju\n", + fdip, op, mp, (uintmax_t)nid); + + if (fdip->tick) { + fticket_refresh(fdip->tick); + } else { + fdip->tick = fuse_ticket_fetch(data); + } + + FUSE_DIMALLOC(&fdip->tick->tk_ms_fiov, fdip->finh, + fdip->indata, fdip->iosize); + + fuse_setup_ihead(fdip->finh, fdip->tick, nid, op, fdip->iosize, pid, cred); +} + +void +fdisp_make(struct fuse_dispatcher *fdip, + enum fuse_opcode op, + struct mount *mp, + uint64_t nid, + struct thread *td, + struct ucred *cred) +{ + RECTIFY_TDCR(td, cred); + + return fdisp_make_pid(fdip, op, mp, nid, td->td_proc->p_pid, cred); +} + +void +fdisp_make_vp(struct fuse_dispatcher *fdip, + enum fuse_opcode op, + struct vnode *vp, + struct thread *td, + struct ucred *cred) +{ + debug_printf("fdip=%p, op=%d, vp=%p\n", fdip, op, vp); + RECTIFY_TDCR(td, cred); + return fdisp_make_pid(fdip, op, vnode_mount(vp), VTOI(vp), + td->td_proc->p_pid, cred); +} + +int +fdisp_wait_answ(struct fuse_dispatcher *fdip) +{ + int err = 0; + + fdip->answ_stat = 0; + fuse_insert_callback(fdip->tick, fuse_standard_handler); + fuse_insert_message(fdip->tick); + + if ((err = fticket_wait_answer(fdip->tick))) { + debug_printf("IPC: interrupted, err = %d\n", err); + + fuse_lck_mtx_lock(fdip->tick->tk_aw_mtx); + + if (fticket_answered(fdip->tick)) { + /* + * Just between noticing the interrupt and getting here, + * the standard handler has completed his job. + * So we drop the ticket and exit as usual. + */ + debug_printf("IPC: already answered\n"); + fuse_lck_mtx_unlock(fdip->tick->tk_aw_mtx); + goto out; + } else { + /* + * So we were faster than the standard handler. + * Then by setting the answered flag we get *him* + * to drop the ticket. + */ + debug_printf("IPC: setting to answered\n"); + fticket_set_answered(fdip->tick); + fuse_lck_mtx_unlock(fdip->tick->tk_aw_mtx); + return err; + } + } + debug_printf("IPC: not interrupted, err = %d\n", err); + + if (fdip->tick->tk_aw_errno) { + debug_printf("IPC: explicit EIO-ing, tk_aw_errno = %d\n", + fdip->tick->tk_aw_errno); + err = EIO; + goto out; + } + if ((err = fdip->tick->tk_aw_ohead.error)) { + debug_printf("IPC: setting status to %d\n", + fdip->tick->tk_aw_ohead.error); + /* + * This means a "proper" fuse syscall error. + * We record this value so the caller will + * be able to know it's not a boring messaging + * failure, if she wishes so (and if not, she can + * just simply propagate the return value of this routine). + * [XXX Maybe a bitflag would do the job too, + * if other flags needed, this will be converted thusly.] + */ + fdip->answ_stat = err; + goto out; + } + fdip->answ = fticket_resp(fdip->tick)->base; + fdip->iosize = fticket_resp(fdip->tick)->len; + + debug_printf("IPC: all is well\n"); + + return 0; + +out: + debug_printf("IPC: dropping ticket, err = %d\n", err); + + return err; +} + +void +fuse_ipc_init(void) +{ + ticket_zone = uma_zcreate("fuse_ticket", sizeof(struct fuse_ticket), + fticket_ctor, fticket_dtor, fticket_init, fticket_fini, + UMA_ALIGN_PTR, 0); +} + +void +fuse_ipc_destroy(void) +{ + uma_zdestroy(ticket_zone); +} diff --git a/sys/fs/fuse/fuse_ipc.h b/sys/fs/fuse/fuse_ipc.h new file mode 100644 index 0000000..cd08969 --- /dev/null +++ b/sys/fs/fuse/fuse_ipc.h @@ -0,0 +1,423 @@ +/* + * Copyright (c) 2007-2009 Google Inc. and Amit Singh + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Copyright (C) 2005 Csaba Henk. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _FUSE_IPC_H_ +#define _FUSE_IPC_H_ + +#include <sys/param.h> +#include <sys/refcount.h> + +struct fuse_iov { + void *base; + size_t len; + size_t allocated_size; + int credit; +}; + +void fiov_init(struct fuse_iov *fiov, size_t size); +void fiov_teardown(struct fuse_iov *fiov); +void fiov_refresh(struct fuse_iov *fiov); +void fiov_adjust(struct fuse_iov *fiov, size_t size); + +#define FUSE_DIMALLOC(fiov, spc1, spc2, amnt) \ +do { \ + fiov_adjust(fiov, (sizeof(*(spc1)) + (amnt))); \ + (spc1) = (fiov)->base; \ + (spc2) = (char *)(fiov)->base + (sizeof(*(spc1))); \ +} while (0) + +#define FU_AT_LEAST(siz) max((siz), 160) + +#define FUSE_ASSERT_AW_DONE(ftick) \ + KASSERT((ftick)->tk_aw_link.tqe_next == NULL && \ + (ftick)->tk_aw_link.tqe_prev == NULL, \ + ("FUSE: ticket still on answer delivery list %p", (ftick))) \ + +#define FUSE_ASSERT_MS_DONE(ftick) \ + KASSERT((ftick)->tk_ms_link.stqe_next == NULL, \ + ("FUSE: ticket still on message list %p", (ftick))) + +struct fuse_ticket; +struct fuse_data; + +typedef int fuse_handler_t(struct fuse_ticket *ftick, struct uio *uio); + +struct fuse_ticket { + /* fields giving the identity of the ticket */ + uint64_t tk_unique; + struct fuse_data *tk_data; + int tk_flag; + u_int tk_refcount; + + /* fields for initiating an upgoing message */ + struct fuse_iov tk_ms_fiov; + void *tk_ms_bufdata; + size_t tk_ms_bufsize; + enum { FT_M_FIOV, FT_M_BUF } tk_ms_type; + STAILQ_ENTRY(fuse_ticket) tk_ms_link; + + /* fields for handling answers coming from userspace */ + struct fuse_iov tk_aw_fiov; + void *tk_aw_bufdata; + size_t tk_aw_bufsize; + enum { FT_A_FIOV, FT_A_BUF } tk_aw_type; + + struct fuse_out_header tk_aw_ohead; + int tk_aw_errno; + struct mtx tk_aw_mtx; + fuse_handler_t *tk_aw_handler; + TAILQ_ENTRY(fuse_ticket) tk_aw_link; +}; + +#define FT_ANSW 0x01 /* request of ticket has already been answered */ +#define FT_DIRTY 0x04 /* ticket has been used */ + +static __inline__ +struct fuse_iov * +fticket_resp(struct fuse_ticket *ftick) +{ + return (&ftick->tk_aw_fiov); +} + +static __inline__ +int +fticket_answered(struct fuse_ticket *ftick) +{ + DEBUGX(FUSE_DEBUG_IPC, "-> ftick=%p\n", ftick); + mtx_assert(&ftick->tk_aw_mtx, MA_OWNED); + return (ftick->tk_flag & FT_ANSW); +} + +static __inline__ +void +fticket_set_answered(struct fuse_ticket *ftick) +{ + DEBUGX(FUSE_DEBUG_IPC, "-> ftick=%p\n", ftick); + mtx_assert(&ftick->tk_aw_mtx, MA_OWNED); + ftick->tk_flag |= FT_ANSW; +} + +static __inline__ +enum fuse_opcode +fticket_opcode(struct fuse_ticket *ftick) +{ + DEBUGX(FUSE_DEBUG_IPC, "-> ftick=%p\n", ftick); + return (((struct fuse_in_header *)(ftick->tk_ms_fiov.base))->opcode); +} + +int fticket_pull(struct fuse_ticket *ftick, struct uio *uio); + +enum mountpri { FM_NOMOUNTED, FM_PRIMARY, FM_SECONDARY }; + +/* + * The data representing a FUSE session. + */ +struct fuse_data { + struct cdev *fdev; + struct mount *mp; + struct vnode *vroot; + struct ucred *daemoncred; + int dataflags; + int ref; + + struct mtx ms_mtx; + STAILQ_HEAD(, fuse_ticket) ms_head; + + struct mtx aw_mtx; + TAILQ_HEAD(, fuse_ticket) aw_head; + + u_long ticketer; + + struct sx rename_lock; + + uint32_t fuse_libabi_major; + uint32_t fuse_libabi_minor; + + uint32_t max_write; + uint32_t max_read; + uint32_t subtype; + char volname[MAXPATHLEN]; + + struct selinfo ks_rsel; + + int daemon_timeout; + uint64_t notimpl; +}; + +#define FSESS_DEAD 0x0001 /* session is to be closed */ +#define FSESS_UNUSED0 0x0002 /* unused */ +#define FSESS_INITED 0x0004 /* session has been inited */ +#define FSESS_DAEMON_CAN_SPY 0x0010 /* let non-owners access this fs */ + /* (and being observed by the daemon) */ +#define FSESS_PUSH_SYMLINKS_IN 0x0020 /* prefix absolute symlinks with mp */ +#define FSESS_DEFAULT_PERMISSIONS 0x0040 /* kernel does permission checking */ +#define FSESS_NO_ATTRCACHE 0x0080 /* no attribute caching */ +#define FSESS_NO_READAHEAD 0x0100 /* no readaheads */ +#define FSESS_NO_DATACACHE 0x0200 /* disable buffer cache */ +#define FSESS_NO_NAMECACHE 0x0400 /* disable name cache */ +#define FSESS_NO_MMAP 0x0800 /* disable mmap */ +#define FSESS_BROKENIO 0x1000 /* fix broken io */ + +extern int fuse_data_cache_enable; +extern int fuse_data_cache_invalidate; +extern int fuse_mmap_enable; +extern int fuse_sync_resize; +extern int fuse_fix_broken_io; + +static __inline__ +struct fuse_data * +fuse_get_mpdata(struct mount *mp) +{ + return mp->mnt_data; +} + +static __inline int +fsess_isimpl(struct mount *mp, int opcode) +{ + struct fuse_data *data = fuse_get_mpdata(mp); + + return (data->notimpl & (1ULL << opcode)) == 0; + +} +static __inline void +fsess_set_notimpl(struct mount *mp, int opcode) +{ + struct fuse_data *data = fuse_get_mpdata(mp); + + data->notimpl |= (1ULL << opcode); +} + +static __inline int +fsess_opt_datacache(struct mount *mp) +{ + struct fuse_data *data = fuse_get_mpdata(mp); + + return (fuse_data_cache_enable || + (data->dataflags & FSESS_NO_DATACACHE) == 0); +} + +static __inline int +fsess_opt_mmap(struct mount *mp) +{ + struct fuse_data *data = fuse_get_mpdata(mp); + + if (!(fuse_mmap_enable && fuse_data_cache_enable)) + return 0; + return ((data->dataflags & (FSESS_NO_DATACACHE | FSESS_NO_MMAP)) == 0); +} + +static __inline int +fsess_opt_brokenio(struct mount *mp) +{ + struct fuse_data *data = fuse_get_mpdata(mp); + + return (fuse_fix_broken_io || (data->dataflags & FSESS_BROKENIO)); +} + +static __inline__ +void +fuse_ms_push(struct fuse_ticket *ftick) +{ + DEBUGX(FUSE_DEBUG_IPC, "ftick=%p refcount=%d\n", + ftick, ftick->tk_refcount + 1); + mtx_assert(&ftick->tk_data->ms_mtx, MA_OWNED); + refcount_acquire(&ftick->tk_refcount); + STAILQ_INSERT_TAIL(&ftick->tk_data->ms_head, ftick, tk_ms_link); +} + +static __inline__ +struct fuse_ticket * +fuse_ms_pop(struct fuse_data *data) +{ + struct fuse_ticket *ftick = NULL; + + mtx_assert(&data->ms_mtx, MA_OWNED); + + if ((ftick = STAILQ_FIRST(&data->ms_head))) { + STAILQ_REMOVE_HEAD(&data->ms_head, tk_ms_link); +#ifdef INVARIANTS + ftick->tk_ms_link.stqe_next = NULL; +#endif + } + DEBUGX(FUSE_DEBUG_IPC, "ftick=%p refcount=%d\n", + ftick, ftick ? ftick->tk_refcount : -1); + + return ftick; +} + +static __inline__ +void +fuse_aw_push(struct fuse_ticket *ftick) +{ + DEBUGX(FUSE_DEBUG_IPC, "ftick=%p refcount=%d\n", + ftick, ftick->tk_refcount + 1); + mtx_assert(&ftick->tk_data->aw_mtx, MA_OWNED); + refcount_acquire(&ftick->tk_refcount); + TAILQ_INSERT_TAIL(&ftick->tk_data->aw_head, ftick, tk_aw_link); +} + +static __inline__ +void +fuse_aw_remove(struct fuse_ticket *ftick) +{ + DEBUGX(FUSE_DEBUG_IPC, "ftick=%p refcount=%d\n", + ftick, ftick->tk_refcount); + mtx_assert(&ftick->tk_data->aw_mtx, MA_OWNED); + TAILQ_REMOVE(&ftick->tk_data->aw_head, ftick, tk_aw_link); +#ifdef INVARIANTS + ftick->tk_aw_link.tqe_next = NULL; + ftick->tk_aw_link.tqe_prev = NULL; +#endif +} + +static __inline__ +struct fuse_ticket * +fuse_aw_pop(struct fuse_data *data) +{ + struct fuse_ticket *ftick = NULL; + + mtx_assert(&data->aw_mtx, MA_OWNED); + + if ((ftick = TAILQ_FIRST(&data->aw_head))) { + fuse_aw_remove(ftick); + } + DEBUGX(FUSE_DEBUG_IPC, "ftick=%p refcount=%d\n", + ftick, ftick ? ftick->tk_refcount : -1); + + return ftick; +} + +struct fuse_ticket *fuse_ticket_fetch(struct fuse_data *data); +int fuse_ticket_drop(struct fuse_ticket *ftick); +void fuse_insert_callback(struct fuse_ticket *ftick, fuse_handler_t *handler); +void fuse_insert_message(struct fuse_ticket *ftick); + +static __inline__ +int +fuse_libabi_geq(struct fuse_data *data, uint32_t abi_maj, uint32_t abi_min) +{ + return (data->fuse_libabi_major > abi_maj || + (data->fuse_libabi_major == abi_maj && data->fuse_libabi_minor >= abi_min)); +} + +struct fuse_data *fdata_alloc(struct cdev *dev, struct ucred *cred); +void fdata_trydestroy(struct fuse_data *data); +void fdata_set_dead(struct fuse_data *data); + +static __inline__ +int +fdata_get_dead(struct fuse_data *data) +{ + return (data->dataflags & FSESS_DEAD); +} + +struct fuse_dispatcher { + + struct fuse_ticket *tick; + struct fuse_in_header *finh; + + void *indata; + size_t iosize; + uint64_t nodeid; + int answ_stat; + void *answ; +}; + +static __inline__ +void +fdisp_init(struct fuse_dispatcher *fdisp, size_t iosize) +{ + DEBUGX(FUSE_DEBUG_IPC, "-> fdisp=%p, iosize=%zx\n", fdisp, iosize); + fdisp->iosize = iosize; + fdisp->tick = NULL; +} + +static __inline__ +void +fdisp_destroy(struct fuse_dispatcher *fdisp) +{ + DEBUGX(FUSE_DEBUG_IPC, "-> fdisp=%p, ftick=%p\n", fdisp, fdisp->tick); + fuse_ticket_drop(fdisp->tick); +#ifdef INVARIANTS + fdisp->tick = NULL; +#endif +} + +void fdisp_make(struct fuse_dispatcher *fdip, enum fuse_opcode op, + struct mount *mp, uint64_t nid, struct thread *td, + struct ucred *cred); + +void fdisp_make_pid(struct fuse_dispatcher *fdip, enum fuse_opcode op, + struct mount *mp, uint64_t nid, pid_t pid, + struct ucred *cred); + +void fdisp_make_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op, + struct vnode *vp, struct thread *td, struct ucred *cred); + +int fdisp_wait_answ(struct fuse_dispatcher *fdip); + +static __inline__ +int +fdisp_simple_putget_vp(struct fuse_dispatcher *fdip, enum fuse_opcode op, + struct vnode *vp, struct thread *td, struct ucred *cred) +{ + DEBUGX(FUSE_DEBUG_IPC, "-> fdip=%p, opcode=%d, vp=%p\n", fdip, op, vp); + fdisp_make_vp(fdip, op, vp, td, cred); + return fdisp_wait_answ(fdip); +} + +#endif /* _FUSE_IPC_H_ */ diff --git a/sys/fs/fuse/fuse_kernel.h b/sys/fs/fuse/fuse_kernel.h new file mode 100644 index 0000000..07cd4a9 --- /dev/null +++ b/sys/fs/fuse/fuse_kernel.h @@ -0,0 +1,373 @@ +/*- + * This file defines the kernel interface of FUSE + * Copyright (C) 2001-2007 Miklos Szeredi <miklos@szeredi.hu> + * + * This program can be distributed under the terms of the GNU GPL. + * See the file COPYING. + * + * This -- and only this -- header file may also be distributed under + * the terms of the BSD Licence as follows: + * + * Copyright (C) 2001-2007 Miklos Szeredi. All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef linux +#include <sys/types.h> +#define __u64 uint64_t +#define __u32 uint32_t +#define __s32 int32_t +#else +#include <asm/types.h> +#include <linux/major.h> +#endif + +/** Version number of this interface */ +#define FUSE_KERNEL_VERSION 7 + +/** Minor version number of this interface */ +#define FUSE_KERNEL_MINOR_VERSION 8 + +/** The node ID of the root inode */ +#define FUSE_ROOT_ID 1 + +/** The major number of the fuse character device */ +#define FUSE_MAJOR MISC_MAJOR + +/** The minor number of the fuse character device */ +#define FUSE_MINOR 229 + +/* Make sure all structures are padded to 64bit boundary, so 32bit + userspace works under 64bit kernels */ + +struct fuse_attr { + __u64 ino; + __u64 size; + __u64 blocks; + __u64 atime; + __u64 mtime; + __u64 ctime; + __u32 atimensec; + __u32 mtimensec; + __u32 ctimensec; + __u32 mode; + __u32 nlink; + __u32 uid; + __u32 gid; + __u32 rdev; +}; + +struct fuse_kstatfs { + __u64 blocks; + __u64 bfree; + __u64 bavail; + __u64 files; + __u64 ffree; + __u32 bsize; + __u32 namelen; + __u32 frsize; + __u32 padding; + __u32 spare[6]; +}; + +struct fuse_file_lock { + __u64 start; + __u64 end; + __u32 type; + __u32 pid; /* tgid */ +}; + +/** + * Bitmasks for fuse_setattr_in.valid + */ +#define FATTR_MODE (1 << 0) +#define FATTR_UID (1 << 1) +#define FATTR_GID (1 << 2) +#define FATTR_SIZE (1 << 3) +#define FATTR_ATIME (1 << 4) +#define FATTR_MTIME (1 << 5) +#define FATTR_FH (1 << 6) + +/** + * Flags returned by the OPEN request + * + * FOPEN_DIRECT_IO: bypass page cache for this open file + * FOPEN_KEEP_CACHE: don't invalidate the data cache on open + */ +#define FOPEN_DIRECT_IO (1 << 0) +#define FOPEN_KEEP_CACHE (1 << 1) + +/** + * INIT request/reply flags + */ +#define FUSE_ASYNC_READ (1 << 0) +#define FUSE_POSIX_LOCKS (1 << 1) + +/** + * Release flags + */ +#define FUSE_RELEASE_FLUSH (1 << 0) + +enum fuse_opcode { + FUSE_LOOKUP = 1, + FUSE_FORGET = 2, /* no reply */ + FUSE_GETATTR = 3, + FUSE_SETATTR = 4, + FUSE_READLINK = 5, + FUSE_SYMLINK = 6, + FUSE_MKNOD = 8, + FUSE_MKDIR = 9, + FUSE_UNLINK = 10, + FUSE_RMDIR = 11, + FUSE_RENAME = 12, + FUSE_LINK = 13, + FUSE_OPEN = 14, + FUSE_READ = 15, + FUSE_WRITE = 16, + FUSE_STATFS = 17, + FUSE_RELEASE = 18, + FUSE_FSYNC = 20, + FUSE_SETXATTR = 21, + FUSE_GETXATTR = 22, + FUSE_LISTXATTR = 23, + FUSE_REMOVEXATTR = 24, + FUSE_FLUSH = 25, + FUSE_INIT = 26, + FUSE_OPENDIR = 27, + FUSE_READDIR = 28, + FUSE_RELEASEDIR = 29, + FUSE_FSYNCDIR = 30, + FUSE_GETLK = 31, + FUSE_SETLK = 32, + FUSE_SETLKW = 33, + FUSE_ACCESS = 34, + FUSE_CREATE = 35, + FUSE_INTERRUPT = 36, + FUSE_BMAP = 37, + FUSE_DESTROY = 38, +}; + +/* The read buffer is required to be at least 8k, but may be much larger */ +#define FUSE_MIN_READ_BUFFER 8192 + +struct fuse_entry_out { + __u64 nodeid; /* Inode ID */ + __u64 generation; /* Inode generation: nodeid:gen must + be unique for the fs's lifetime */ + __u64 entry_valid; /* Cache timeout for the name */ + __u64 attr_valid; /* Cache timeout for the attributes */ + __u32 entry_valid_nsec; + __u32 attr_valid_nsec; + struct fuse_attr attr; +}; + +struct fuse_forget_in { + __u64 nlookup; +}; + +struct fuse_attr_out { + __u64 attr_valid; /* Cache timeout for the attributes */ + __u32 attr_valid_nsec; + __u32 dummy; + struct fuse_attr attr; +}; + +struct fuse_mkdir_in { + __u32 mode; + __u32 padding; +}; + +struct fuse_rename_in { + __u64 newdir; +}; + +struct fuse_link_in { + __u64 oldnodeid; +}; + +struct fuse_setattr_in { + __u32 valid; + __u32 padding; + __u64 fh; + __u64 size; + __u64 unused1; + __u64 atime; + __u64 mtime; + __u64 unused2; + __u32 atimensec; + __u32 mtimensec; + __u32 unused3; + __u32 mode; + __u32 unused4; + __u32 uid; + __u32 gid; + __u32 unused5; +}; + +struct fuse_open_in { + __u32 flags; + __u32 mode; +}; + +struct fuse_open_out { + __u64 fh; + __u32 open_flags; + __u32 padding; +}; + +struct fuse_release_in { + __u64 fh; + __u32 flags; + __u32 release_flags; + __u64 lock_owner; +}; + +struct fuse_flush_in { + __u64 fh; + __u32 unused; + __u32 padding; + __u64 lock_owner; +}; + +struct fuse_read_in { + __u64 fh; + __u64 offset; + __u32 size; + __u32 padding; +}; + +struct fuse_write_in { + __u64 fh; + __u64 offset; + __u32 size; + __u32 write_flags; +}; + +struct fuse_write_out { + __u32 size; + __u32 padding; +}; + +#define FUSE_COMPAT_STATFS_SIZE 48 + +struct fuse_statfs_out { + struct fuse_kstatfs st; +}; + +struct fuse_fsync_in { + __u64 fh; + __u32 fsync_flags; + __u32 padding; +}; + +struct fuse_setxattr_in { + __u32 size; + __u32 flags; +}; + +struct fuse_getxattr_in { + __u32 size; + __u32 padding; +}; + +struct fuse_getxattr_out { + __u32 size; + __u32 padding; +}; + +struct fuse_lk_in { + __u64 fh; + __u64 owner; + struct fuse_file_lock lk; +}; + +struct fuse_lk_out { + struct fuse_file_lock lk; +}; + +struct fuse_access_in { + __u32 mask; + __u32 padding; +}; + +struct fuse_init_in { + __u32 major; + __u32 minor; + __u32 max_readahead; + __u32 flags; +}; + +struct fuse_init_out { + __u32 major; + __u32 minor; + __u32 max_readahead; + __u32 flags; + __u32 unused; + __u32 max_write; +}; + +struct fuse_interrupt_in { + __u64 unique; +}; + +struct fuse_bmap_in { + __u64 block; + __u32 blocksize; + __u32 padding; +}; + +struct fuse_bmap_out { + __u64 block; +}; + +struct fuse_in_header { + __u32 len; + __u32 opcode; + __u64 unique; + __u64 nodeid; + __u32 uid; + __u32 gid; + __u32 pid; + __u32 padding; +}; + +struct fuse_out_header { + __u32 len; + __s32 error; + __u64 unique; +}; + +struct fuse_dirent { + __u64 ino; + __u64 off; + __u32 namelen; + __u32 type; + char name[0]; +}; + +#define FUSE_NAME_OFFSET offsetof(struct fuse_dirent, name) +#define FUSE_DIRENT_ALIGN(x) (((x) + sizeof(__u64) - 1) & ~(sizeof(__u64) - 1)) +#define FUSE_DIRENT_SIZE(d) \ + FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + (d)->namelen) diff --git a/sys/fs/fuse/fuse_main.c b/sys/fs/fuse/fuse_main.c new file mode 100644 index 0000000..bf73a45f --- /dev/null +++ b/sys/fs/fuse/fuse_main.c @@ -0,0 +1,162 @@ +/* + * Copyright (c) 2007-2009 Google Inc. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Copyright (C) 2005 Csaba Henk. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/types.h> +#include <sys/module.h> +#include <sys/systm.h> +#include <sys/errno.h> +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/conf.h> +#include <sys/mutex.h> +#include <sys/proc.h> +#include <sys/mount.h> +#include <sys/vnode.h> +#include <sys/stat.h> +#include <sys/file.h> +#include <sys/buf.h> +#include <sys/sysctl.h> + +#include "fuse.h" + +static void fuse_bringdown(eventhandler_tag eh_tag); +static int fuse_loader(struct module *m, int what, void *arg); + +struct mtx fuse_mtx; + +extern struct vfsops fuse_vfsops; +extern struct cdevsw fuse_cdevsw; +extern struct vop_vector fuse_vnops; +extern int fuse_pbuf_freecnt; + +static struct vfsconf fuse_vfsconf = { + .vfc_version = VFS_VERSION, + .vfc_name = "fusefs", + .vfc_vfsops = &fuse_vfsops, + .vfc_typenum = -1, + .vfc_flags = VFCF_SYNTHETIC +}; + +SYSCTL_INT(_vfs_fuse, OID_AUTO, kernelabi_major, CTLFLAG_RD, + 0, FUSE_KERNEL_VERSION, "FUSE kernel abi major version"); +SYSCTL_INT(_vfs_fuse, OID_AUTO, kernelabi_minor, CTLFLAG_RD, + 0, FUSE_KERNEL_MINOR_VERSION, "FUSE kernel abi minor version"); + +/****************************** + * + * >>> Module management stuff + * + ******************************/ + +static void +fuse_bringdown(eventhandler_tag eh_tag) +{ + + fuse_ipc_destroy(); + fuse_device_destroy(); + mtx_destroy(&fuse_mtx); +} + +static int +fuse_loader(struct module *m, int what, void *arg) +{ + static eventhandler_tag eh_tag = NULL; + int err = 0; + + switch (what) { + case MOD_LOAD: /* kldload */ + fuse_pbuf_freecnt = nswbuf / 2 + 1; + mtx_init(&fuse_mtx, "fuse_mtx", NULL, MTX_DEF); + err = fuse_device_init(); + if (err) { + mtx_destroy(&fuse_mtx); + return (err); + } + fuse_ipc_init(); + + /* vfs_modevent ignores its first arg */ + if ((err = vfs_modevent(NULL, what, &fuse_vfsconf))) + fuse_bringdown(eh_tag); + else + printf("fuse-freebsd: version %s, FUSE ABI %d.%d\n", + FUSE_FREEBSD_VERSION, + FUSE_KERNEL_VERSION, FUSE_KERNEL_MINOR_VERSION); + + break; + case MOD_UNLOAD: + if ((err = vfs_modevent(NULL, what, &fuse_vfsconf))) + return (err); + fuse_bringdown(eh_tag); + break; + default: + return (EINVAL); + } + + return (err); +} + +/* Registering the module */ + +static moduledata_t fuse_moddata = { + "fuse", + fuse_loader, + &fuse_vfsconf +}; + +DECLARE_MODULE(fuse, fuse_moddata, SI_SUB_VFS, SI_ORDER_MIDDLE); +MODULE_VERSION(fuse, 1); diff --git a/sys/fs/fuse/fuse_node.c b/sys/fs/fuse/fuse_node.c new file mode 100644 index 0000000..3838ab7 --- /dev/null +++ b/sys/fs/fuse/fuse_node.c @@ -0,0 +1,384 @@ +/* + * Copyright (c) 2007-2009 Google Inc. and Amit Singh + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Copyright (C) 2005 Csaba Henk. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/types.h> +#include <sys/module.h> +#include <sys/systm.h> +#include <sys/errno.h> +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/conf.h> +#include <sys/uio.h> +#include <sys/malloc.h> +#include <sys/queue.h> +#include <sys/lock.h> +#include <sys/sx.h> +#include <sys/mutex.h> +#include <sys/proc.h> +#include <sys/vnode.h> +#include <sys/namei.h> +#include <sys/mount.h> +#include <sys/sysctl.h> +#include <sys/fcntl.h> +#include <sys/fnv_hash.h> +#include <sys/priv.h> +#include <security/mac/mac_framework.h> +#include <vm/vm.h> +#include <vm/vm_extern.h> + +#include "fuse.h" +#include "fuse_node.h" +#include "fuse_internal.h" +#include "fuse_io.h" +#include "fuse_ipc.h" + +#define FUSE_DEBUG_MODULE VNOPS +#include "fuse_debug.h" + +MALLOC_DEFINE(M_FUSEVN, "fuse_vnode", "fuse vnode private data"); + +static int fuse_node_count = 0; + +SYSCTL_INT(_vfs_fuse, OID_AUTO, node_count, CTLFLAG_RD, + &fuse_node_count, 0, ""); + +int fuse_data_cache_enable = 1; + +SYSCTL_INT(_vfs_fuse, OID_AUTO, data_cache_enable, CTLFLAG_RW, + &fuse_data_cache_enable, 0, ""); + +int fuse_data_cache_invalidate = 0; + +SYSCTL_INT(_vfs_fuse, OID_AUTO, data_cache_invalidate, CTLFLAG_RW, + &fuse_data_cache_invalidate, 0, ""); + +int fuse_mmap_enable = 1; + +SYSCTL_INT(_vfs_fuse, OID_AUTO, mmap_enable, CTLFLAG_RW, + &fuse_mmap_enable, 0, ""); + +int fuse_refresh_size = 0; + +SYSCTL_INT(_vfs_fuse, OID_AUTO, refresh_size, CTLFLAG_RW, + &fuse_refresh_size, 0, ""); + +int fuse_sync_resize = 1; + +SYSCTL_INT(_vfs_fuse, OID_AUTO, sync_resize, CTLFLAG_RW, + &fuse_sync_resize, 0, ""); + +int fuse_fix_broken_io = 0; + +SYSCTL_INT(_vfs_fuse, OID_AUTO, fix_broken_io, CTLFLAG_RW, + &fuse_fix_broken_io, 0, ""); + +static void +fuse_vnode_init(struct vnode *vp, struct fuse_vnode_data *fvdat, + uint64_t nodeid, enum vtype vtyp) +{ + int i; + + fvdat->nid = nodeid; + if (nodeid == FUSE_ROOT_ID) { + vp->v_vflag |= VV_ROOT; + } + vp->v_type = vtyp; + vp->v_data = fvdat; + + for (i = 0; i < FUFH_MAXTYPE; i++) + fvdat->fufh[i].fh_type = FUFH_INVALID; + + atomic_add_acq_int(&fuse_node_count, 1); +} + +void +fuse_vnode_destroy(struct vnode *vp) +{ + struct fuse_vnode_data *fvdat = vp->v_data; + + vp->v_data = NULL; + free(fvdat, M_FUSEVN); + + atomic_subtract_acq_int(&fuse_node_count, 1); +} + +static int +fuse_vnode_cmp(struct vnode *vp, void *nidp) +{ + return (VTOI(vp) != *((uint64_t *)nidp)); +} + +static uint32_t __inline +fuse_vnode_hash(uint64_t id) +{ + return (fnv_32_buf(&id, sizeof(id), FNV1_32_INIT)); +} + +static int +fuse_vnode_alloc(struct mount *mp, + struct thread *td, + uint64_t nodeid, + enum vtype vtyp, + struct vnode **vpp) +{ + struct fuse_vnode_data *fvdat; + struct vnode *vp2; + int err = 0; + + DEBUG("been asked for vno #%ju\n", (uintmax_t)nodeid); + + if (vtyp == VNON) { + return EINVAL; + } + *vpp = NULL; + err = vfs_hash_get(mp, fuse_vnode_hash(nodeid), LK_EXCLUSIVE, td, vpp, + fuse_vnode_cmp, &nodeid); + if (err) + return (err); + + if (*vpp) { + MPASS((*vpp)->v_type == vtyp && (*vpp)->v_data != NULL); + DEBUG("vnode taken from hash\n"); + return (0); + } + fvdat = malloc(sizeof(*fvdat), M_FUSEVN, M_WAITOK | M_ZERO); + err = getnewvnode("fuse", mp, &fuse_vnops, vpp); + if (err) { + free(fvdat, M_FUSEVN); + return (err); + } + lockmgr((*vpp)->v_vnlock, LK_EXCLUSIVE, NULL); + fuse_vnode_init(*vpp, fvdat, nodeid, vtyp); + err = insmntque(*vpp, mp); + ASSERT_VOP_ELOCKED(*vpp, "fuse_vnode_alloc"); + if (err) { + free(fvdat, M_FUSEVN); + *vpp = NULL; + return (err); + } + err = vfs_hash_insert(*vpp, fuse_vnode_hash(nodeid), LK_EXCLUSIVE, + td, &vp2, fuse_vnode_cmp, &nodeid); + if (err) + return (err); + if (vp2 != NULL) { + *vpp = vp2; + return (0); + } + + ASSERT_VOP_ELOCKED(*vpp, "fuse_vnode_alloc"); + + return (0); +} + +int +fuse_vnode_get(struct mount *mp, + uint64_t nodeid, + struct vnode *dvp, + struct vnode **vpp, + struct componentname *cnp, + enum vtype vtyp) +{ + struct thread *td = (cnp != NULL ? cnp->cn_thread : curthread); + int err = 0; + + debug_printf("dvp=%p\n", dvp); + + err = fuse_vnode_alloc(mp, td, nodeid, vtyp, vpp); + if (err) { + return err; + } + if (dvp != NULL) { + MPASS((cnp->cn_flags & ISDOTDOT) == 0); + MPASS(!(cnp->cn_namelen == 1 && cnp->cn_nameptr[0] == '.')); + fuse_vnode_setparent(*vpp, dvp); + } + if (dvp != NULL && cnp != NULL && (cnp->cn_flags & MAKEENTRY) != 0) { + ASSERT_VOP_LOCKED(*vpp, "fuse_vnode_get"); + ASSERT_VOP_LOCKED(dvp, "fuse_vnode_get"); + cache_enter(dvp, *vpp, cnp); + } + + /* + * In userland, libfuse uses cached lookups for dot and dotdot entries, + * thus it does not really bump the nlookup counter for forget. + * Follow the same semantic and avoid tu bump it in order to keep + * nlookup counters consistent. + */ + if (cnp == NULL || ((cnp->cn_flags & ISDOTDOT) == 0 && + (cnp->cn_namelen != 1 || cnp->cn_nameptr[0] != '.'))) + VTOFUD(*vpp)->nlookup++; + + return 0; +} + +void +fuse_vnode_open(struct vnode *vp, int32_t fuse_open_flags, struct thread *td) +{ + /* + * Funcation is called for every vnode open. + * Merge fuse_open_flags it may be 0 + * + * XXXIP: Handle FOPEN_DIRECT_IO and FOPEN_KEEP_CACHE + */ + + if (vnode_vtype(vp) == VREG) { + /* XXXIP prevent getattr, by using cached node size */ + vnode_create_vobject(vp, 0, td); + } +} + +int +fuse_isvalid_attr(struct vnode *vp) +{ + struct fuse_vnode_data *fvdat = VTOFUD(vp); + struct timespec uptsp; + + nanouptime(&uptsp); + return fuse_timespec_cmp(&uptsp, &fvdat->cached_attrs_valid, <=); +} + +int +fuse_vnode_savesize(struct vnode *vp, struct ucred *cred) +{ + struct fuse_vnode_data *fvdat = VTOFUD(vp); + struct thread *td = curthread; + struct fuse_filehandle *fufh = NULL; + struct fuse_dispatcher fdi; + struct fuse_setattr_in *fsai; + int err = 0; + + DEBUG("inode=%ju size=%ju\n", (uintmax_t)VTOI(vp), + (uintmax_t)fvdat->filesize); + ASSERT_VOP_ELOCKED(vp, "fuse_io_extend"); + + if (fuse_isdeadfs(vp)) { + return EBADF; + } + if (vnode_vtype(vp) == VDIR) { + return EISDIR; + } + if (vfs_isrdonly(vnode_mount(vp))) { + return EROFS; + } + if (cred == NULL) { + cred = td->td_ucred; + } + fdisp_init(&fdi, sizeof(*fsai)); + fdisp_make_vp(&fdi, FUSE_SETATTR, vp, td, cred); + fsai = fdi.indata; + fsai->valid = 0; + + /* Truncate to a new value. */ + fsai->size = fvdat->filesize; + fsai->valid |= FATTR_SIZE; + + fuse_filehandle_getrw(vp, FUFH_WRONLY, &fufh); + if (fufh) { + fsai->fh = fufh->fh_id; + fsai->valid |= FATTR_FH; + } + err = fdisp_wait_answ(&fdi); + fdisp_destroy(&fdi); + if (err == 0) + fvdat->flag &= ~FN_SIZECHANGE; + + fuse_invalidate_attr(vp); + + return err; +} + +void +fuse_vnode_refreshsize(struct vnode *vp, struct ucred *cred) +{ + + struct fuse_vnode_data *fvdat = VTOFUD(vp); + struct vattr va; + + if ((fvdat->flag & FN_SIZECHANGE) != 0 || + (fuse_refresh_size == 0 && fvdat->filesize != 0) || + fuse_isvalid_attr(vp)) + return; + + VOP_GETATTR(vp, &va, cred); + DEBUG("refreshed file size: %jd\n", (intmax_t)VTOFUD(vp)->filesize); +} + +int +fuse_vnode_setsize(struct vnode *vp, struct ucred *cred, off_t newsize) +{ + struct fuse_vnode_data *fvdat = VTOFUD(vp); + off_t oldsize; + int err = 0; + + DEBUG("inode=%ju oldsize=%ju newsize=%ju\n", + (uintmax_t)VTOI(vp), (uintmax_t)fvdat->filesize, + (uintmax_t)newsize); + ASSERT_VOP_ELOCKED(vp, "fuse_vnode_setsize"); + + oldsize = fvdat->filesize; + fvdat->filesize = newsize; + fvdat->flag |= FN_SIZECHANGE; + + if (newsize < oldsize) { + err = vtruncbuf(vp, cred, newsize, fuse_iosize(vp)); + } + vnode_pager_setsize(vp, newsize); + fuse_invalidate_attr(vp); + + return err; +} diff --git a/sys/fs/fuse/fuse_node.h b/sys/fs/fuse/fuse_node.h new file mode 100644 index 0000000..45b15a4 --- /dev/null +++ b/sys/fs/fuse/fuse_node.h @@ -0,0 +1,142 @@ +/* + * Copyright (c) 2007-2009 Google Inc. and Amit Singh + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Copyright (C) 2005 Csaba Henk. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _FUSE_NODE_H_ +#define _FUSE_NODE_H_ + +#include <sys/types.h> +#include <sys/mutex.h> + +#include "fuse_file.h" + +#define FN_REVOKED 0x00000020 +#define FN_FLUSHINPROG 0x00000040 +#define FN_FLUSHWANT 0x00000080 +#define FN_SIZECHANGE 0x00000100 + +struct fuse_vnode_data { + /** self **/ + uint64_t nid; + + /** parent **/ + /* XXXIP very likely to be stale, it's not updated in rename() */ + uint64_t parent_nid; + + /** I/O **/ + struct fuse_filehandle fufh[FUFH_MAXTYPE]; + + /** flags **/ + uint32_t flag; + + /** meta **/ + struct timespec cached_attrs_valid; + struct vattr cached_attrs; + off_t filesize; + uint64_t nlookup; + enum vtype vtype; +}; + +#define VTOFUD(vp) \ + ((struct fuse_vnode_data *)((vp)->v_data)) +#define VTOI(vp) (VTOFUD(vp)->nid) +#define VTOVA(vp) (&(VTOFUD(vp)->cached_attrs)) +#define VTOILLU(vp) ((uint64_t)(VTOFUD(vp) ? VTOI(vp) : 0)) + +#define FUSE_NULL_ID 0 + +extern struct vop_vector fuse_vnops; + +static __inline__ +void +fuse_invalidate_attr(struct vnode *vp) +{ + if (VTOFUD(vp)) { + bzero(&VTOFUD(vp)->cached_attrs_valid, sizeof(struct timespec)); + } +} + +static __inline void +fuse_vnode_setparent(struct vnode *vp, struct vnode *dvp) +{ + if (dvp != NULL && vp->v_type == VDIR) { + MPASS(dvp->v_type == VDIR); + VTOFUD(vp)->parent_nid = VTOI(dvp); + } +} + +int fuse_isvalid_attr(struct vnode *vp); + +void fuse_vnode_destroy(struct vnode *vp); + +int fuse_vnode_get(struct mount *mp, + uint64_t nodeid, + struct vnode *dvp, + struct vnode **vpp, + struct componentname *cnp, + enum vtype vtyp); + +void fuse_vnode_open(struct vnode *vp, + int32_t fuse_open_flags, + struct thread *td); + +void fuse_vnode_refreshsize(struct vnode *vp, struct ucred *cred); + +int fuse_vnode_savesize(struct vnode *vp, struct ucred *cred); + +int fuse_vnode_setsize(struct vnode *vp, struct ucred *cred, off_t newsize); + +#endif /* _FUSE_NODE_H_ */ diff --git a/sys/fs/fuse/fuse_param.h b/sys/fs/fuse/fuse_param.h new file mode 100644 index 0000000..493c385 --- /dev/null +++ b/sys/fs/fuse/fuse_param.h @@ -0,0 +1,80 @@ +/* + * Copyright (c) 2007-2009 Google Inc. and Amit Singh + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _FUSE_PARAM_H_ +#define _FUSE_PARAM_H_ + +/* + * This is the prefix ("fuse" by default) of the name of a FUSE device node + * in devfs. The suffix is the device number. "/dev/fuse0" is the first FUSE + * device by default. If you change the prefix from the default to something + * else, the user-space FUSE library will need to know about it too. + */ +#define FUSE_DEVICE_BASENAME "fuse" + +/* + * This is the number of /dev/fuse<n> nodes we will create. <n> goes from + * 0 to (FUSE_NDEVICES - 1). + */ +#define FUSE_NDEVICES 16 + +/* + * This is the default block size of the virtual storage devices that are + * implicitly implemented by the FUSE kernel extension. This can be changed + * on a per-mount basis (there's one such virtual device for each mount). + */ +#define FUSE_DEFAULT_BLOCKSIZE 4096 + +/* + * This is default I/O size used while accessing the virtual storage devices. + * This can be changed on a per-mount basis. + */ +#define FUSE_DEFAULT_IOSIZE 4096 + +#ifdef KERNEL + +/* + * This is the soft upper limit on the number of "request tickets" FUSE's + * user-kernel IPC layer can have for a given mount. This can be modified + * through the fuse.* sysctl interface. + */ +#define FUSE_DEFAULT_MAX_FREE_TICKETS 1024 + +#define FUSE_DEFAULT_IOV_PERMANENT_BUFSIZE (1L << 19) +#define FUSE_DEFAULT_IOV_CREDIT 16 + +#endif + +#define FUSE_LINK_MAX LINK_MAX + +#endif /* _FUSE_PARAM_H_ */ diff --git a/sys/fs/fuse/fuse_vfsops.c b/sys/fs/fuse/fuse_vfsops.c new file mode 100644 index 0000000..b8244ec --- /dev/null +++ b/sys/fs/fuse/fuse_vfsops.c @@ -0,0 +1,533 @@ +/* + * Copyright (c) 2007-2009 Google Inc. and Amit Singh + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Copyright (C) 2005 Csaba Henk. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/types.h> +#include <sys/module.h> +#include <sys/systm.h> +#include <sys/errno.h> +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/capability.h> +#include <sys/conf.h> +#include <sys/filedesc.h> +#include <sys/uio.h> +#include <sys/malloc.h> +#include <sys/queue.h> +#include <sys/lock.h> +#include <sys/sx.h> +#include <sys/mutex.h> +#include <sys/proc.h> +#include <sys/vnode.h> +#include <sys/namei.h> +#include <sys/mount.h> +#include <sys/sysctl.h> +#include <sys/fcntl.h> + +#include "fuse.h" +#include "fuse_param.h" +#include "fuse_node.h" +#include "fuse_ipc.h" +#include "fuse_internal.h" + +#include <sys/priv.h> +#include <security/mac/mac_framework.h> + +#define FUSE_DEBUG_MODULE VFSOPS +#include "fuse_debug.h" + +/* This will do for privilege types for now */ +#ifndef PRIV_VFS_FUSE_ALLOWOTHER +#define PRIV_VFS_FUSE_ALLOWOTHER PRIV_VFS_MOUNT_NONUSER +#endif +#ifndef PRIV_VFS_FUSE_MOUNT_NONUSER +#define PRIV_VFS_FUSE_MOUNT_NONUSER PRIV_VFS_MOUNT_NONUSER +#endif +#ifndef PRIV_VFS_FUSE_SYNC_UNMOUNT +#define PRIV_VFS_FUSE_SYNC_UNMOUNT PRIV_VFS_MOUNT_NONUSER +#endif + +static vfs_mount_t fuse_vfsop_mount; +static vfs_unmount_t fuse_vfsop_unmount; +static vfs_root_t fuse_vfsop_root; +static vfs_statfs_t fuse_vfsop_statfs; + +struct vfsops fuse_vfsops = { + .vfs_mount = fuse_vfsop_mount, + .vfs_unmount = fuse_vfsop_unmount, + .vfs_root = fuse_vfsop_root, + .vfs_statfs = fuse_vfsop_statfs, +}; + +SYSCTL_INT(_vfs_fuse, OID_AUTO, init_backgrounded, CTLFLAG_RD, + 0, 1, "indicate async handshake"); +static int fuse_enforce_dev_perms = 0; + +SYSCTL_LONG(_vfs_fuse, OID_AUTO, enforce_dev_perms, CTLFLAG_RW, + &fuse_enforce_dev_perms, 0, + "enforce fuse device permissions for secondary mounts"); +static unsigned sync_unmount = 1; + +SYSCTL_UINT(_vfs_fuse, OID_AUTO, sync_unmount, CTLFLAG_RW, + &sync_unmount, 0, "specify when to use synchronous unmount"); + +MALLOC_DEFINE(M_FUSEVFS, "fuse_filesystem", "buffer for fuse vfs layer"); + +static int +fuse_getdevice(const char *fspec, struct thread *td, struct cdev **fdevp) +{ + struct nameidata nd, *ndp = &nd; + struct vnode *devvp; + struct cdev *fdev; + int err; + + /* + * Not an update, or updating the name: look up the name + * and verify that it refers to a sensible disk device. + */ + + NDINIT(ndp, LOOKUP, FOLLOW, UIO_SYSSPACE, fspec, td); + if ((err = namei(ndp)) != 0) + return err; + NDFREE(ndp, NDF_ONLY_PNBUF); + devvp = ndp->ni_vp; + + if (devvp->v_type != VCHR) { + vrele(devvp); + return ENXIO; + } + fdev = devvp->v_rdev; + dev_ref(fdev); + + if (fuse_enforce_dev_perms) { + /* + * Check if mounter can open the fuse device. + * + * This has significance only if we are doing a secondary mount + * which doesn't involve actually opening fuse devices, but we + * still want to enforce the permissions of the device (in + * order to keep control over the circle of fuse users). + * + * (In case of primary mounts, we are either the superuser so + * we can do anything anyway, or we can mount only if the + * device is already opened by us, ie. we are permitted to open + * the device.) + */ +#if 0 +#ifdef MAC + err = mac_check_vnode_open(td->td_ucred, devvp, VREAD | VWRITE); + if (!err) +#endif +#endif /* 0 */ + err = VOP_ACCESS(devvp, VREAD | VWRITE, td->td_ucred, td); + if (err) { + vrele(devvp); + dev_rel(fdev); + return err; + } + } + /* + * according to coda code, no extra lock is needed -- + * although in sys/vnode.h this field is marked "v" + */ + vrele(devvp); + + if (!fdev->si_devsw || + strcmp("fuse", fdev->si_devsw->d_name)) { + dev_rel(fdev); + return ENXIO; + } + *fdevp = fdev; + + return 0; +} + +#define FUSE_FLAGOPT(fnam, fval) do { \ + vfs_flagopt(opts, #fnam, &mntopts, fval); \ + vfs_flagopt(opts, "__" #fnam, &__mntopts, fval); \ +} while (0) + +static int +fuse_vfsop_mount(struct mount *mp) +{ + int err; + + uint64_t mntopts, __mntopts; + int max_read_set; + uint32_t max_read; + int daemon_timeout; + int fd; + + size_t len; + + struct cdev *fdev; + struct fuse_data *data; + struct thread *td; + struct file *fp, *fptmp; + char *fspec, *subtype; + struct vfsoptlist *opts; + + subtype = NULL; + max_read_set = 0; + max_read = ~0; + err = 0; + mntopts = 0; + __mntopts = 0; + td = curthread; + + fuse_trace_printf_vfsop(); + + if (mp->mnt_flag & MNT_UPDATE) + return EOPNOTSUPP; + + mp->mnt_flag |= MNT_SYNCHRONOUS; + mp->mnt_data = NULL; + /* Get the new options passed to mount */ + opts = mp->mnt_optnew; + + if (!opts) + return EINVAL; + + /* `fspath' contains the mount point (eg. /mnt/fuse/sshfs); REQUIRED */ + if (!vfs_getopts(opts, "fspath", &err)) + return err; + + /* `from' contains the device name (eg. /dev/fuse0); REQUIRED */ + fspec = vfs_getopts(opts, "from", &err); + if (!fspec) + return err; + + /* `fd' contains the filedescriptor for this session; REQUIRED */ + if (vfs_scanopt(opts, "fd", "%d", &fd) != 1) + return EINVAL; + + err = fuse_getdevice(fspec, td, &fdev); + if (err != 0) + return err; + + /* + * With the help of underscored options the mount program + * can inform us from the flags it sets by default + */ + FUSE_FLAGOPT(allow_other, FSESS_DAEMON_CAN_SPY); + FUSE_FLAGOPT(push_symlinks_in, FSESS_PUSH_SYMLINKS_IN); + FUSE_FLAGOPT(default_permissions, FSESS_DEFAULT_PERMISSIONS); + FUSE_FLAGOPT(no_attrcache, FSESS_NO_ATTRCACHE); + FUSE_FLAGOPT(no_readahed, FSESS_NO_READAHEAD); + FUSE_FLAGOPT(no_datacache, FSESS_NO_DATACACHE); + FUSE_FLAGOPT(no_namecache, FSESS_NO_NAMECACHE); + FUSE_FLAGOPT(no_mmap, FSESS_NO_MMAP); + FUSE_FLAGOPT(brokenio, FSESS_BROKENIO); + + if (vfs_scanopt(opts, "max_read=", "%u", &max_read) == 1) + max_read_set = 1; + if (vfs_scanopt(opts, "timeout=", "%u", &daemon_timeout) == 1) { + if (daemon_timeout < FUSE_MIN_DAEMON_TIMEOUT) + daemon_timeout = FUSE_MIN_DAEMON_TIMEOUT; + else if (daemon_timeout > FUSE_MAX_DAEMON_TIMEOUT) + daemon_timeout = FUSE_MAX_DAEMON_TIMEOUT; + } else { + daemon_timeout = FUSE_DEFAULT_DAEMON_TIMEOUT; + } + subtype = vfs_getopts(opts, "subtype=", &err); + + DEBUG2G("mntopts 0x%jx\n", (uintmax_t)mntopts); + + err = fget(td, fd, CAP_READ, &fp); + if (err != 0) { + DEBUG("invalid or not opened device: data=%p\n", data); + goto out; + } + fptmp = td->td_fpop; + td->td_fpop = fp; + err = devfs_get_cdevpriv((void **)&data); + td->td_fpop = fptmp; + fdrop(fp, td); + FUSE_LOCK(); + if (err != 0 || data == NULL || data->mp != NULL) { + DEBUG("invalid or not opened device: data=%p data.mp=%p\n", + data, data != NULL ? data->mp : NULL); + err = ENXIO; + FUSE_UNLOCK(); + goto out; + } + if (fdata_get_dead(data)) { + DEBUG("device is dead during mount: data=%p\n", data); + err = ENOTCONN; + FUSE_UNLOCK(); + goto out; + } + /* Sanity + permission checks */ + if (!data->daemoncred) + panic("fuse daemon found, but identity unknown"); + if (mntopts & FSESS_DAEMON_CAN_SPY) + err = priv_check(td, PRIV_VFS_FUSE_ALLOWOTHER); + if (err == 0 && td->td_ucred->cr_uid != data->daemoncred->cr_uid) + /* are we allowed to do the first mount? */ + err = priv_check(td, PRIV_VFS_FUSE_MOUNT_NONUSER); + if (err) { + FUSE_UNLOCK(); + goto out; + } + /* We need this here as this slot is used by getnewvnode() */ + mp->mnt_stat.f_iosize = PAGE_SIZE; + mp->mnt_data = data; + data->ref++; + data->mp = mp; + data->dataflags |= mntopts; + data->max_read = max_read; + data->daemon_timeout = daemon_timeout; +#ifdef XXXIP + if (!priv_check(td, PRIV_VFS_FUSE_SYNC_UNMOUNT)) + data->dataflags |= FSESS_CAN_SYNC_UNMOUNT; +#endif + FUSE_UNLOCK(); + + vfs_getnewfsid(mp); + mp->mnt_flag |= MNT_LOCAL; + mp->mnt_kern_flag |= MNTK_MPSAFE; + if (subtype) { + strlcat(mp->mnt_stat.f_fstypename, ".", MFSNAMELEN); + strlcat(mp->mnt_stat.f_fstypename, subtype, MFSNAMELEN); + } + copystr(fspec, mp->mnt_stat.f_mntfromname, MNAMELEN - 1, &len); + bzero(mp->mnt_stat.f_mntfromname + len, MNAMELEN - len); + DEBUG2G("mp %p: %s\n", mp, mp->mnt_stat.f_mntfromname); + + /* Now handshaking with daemon */ + fuse_internal_send_init(data, td); + +out: + if (err) { + FUSE_LOCK(); + if (data->mp == mp) { + /* + * Destroy device only if we acquired reference to + * it + */ + DEBUG("mount failed, destroy device: data=%p mp=%p" + " err=%d\n", + data, mp, err); + data->mp = NULL; + fdata_trydestroy(data); + } + FUSE_UNLOCK(); + dev_rel(fdev); + } + return err; +} + +static int +fuse_vfsop_unmount(struct mount *mp, int mntflags) +{ + int err = 0; + int flags = 0; + + struct cdev *fdev; + struct fuse_data *data; + struct fuse_dispatcher fdi; + struct thread *td = curthread; + + fuse_trace_printf_vfsop(); + + if (mntflags & MNT_FORCE) { + flags |= FORCECLOSE; + } + data = fuse_get_mpdata(mp); + if (!data) { + panic("no private data for mount point?"); + } + /* There is 1 extra root vnode reference (mp->mnt_data). */ + FUSE_LOCK(); + if (data->vroot != NULL) { + struct vnode *vroot = data->vroot; + + data->vroot = NULL; + FUSE_UNLOCK(); + vrele(vroot); + } else + FUSE_UNLOCK(); + err = vflush(mp, 0, flags, td); + if (err) { + debug_printf("vflush failed"); + return err; + } + if (fdata_get_dead(data)) { + goto alreadydead; + } + fdisp_init(&fdi, 0); + fdisp_make(&fdi, FUSE_DESTROY, mp, 0, td, NULL); + + err = fdisp_wait_answ(&fdi); + fdisp_destroy(&fdi); + + fdata_set_dead(data); + +alreadydead: + FUSE_LOCK(); + data->mp = NULL; + fdev = data->fdev; + fdata_trydestroy(data); + FUSE_UNLOCK(); + + MNT_ILOCK(mp); + mp->mnt_data = NULL; + mp->mnt_flag &= ~MNT_LOCAL; + MNT_IUNLOCK(mp); + + dev_rel(fdev); + + return 0; +} + +static int +fuse_vfsop_root(struct mount *mp, int lkflags, struct vnode **vpp) +{ + struct fuse_data *data = fuse_get_mpdata(mp); + int err = 0; + + if (data->vroot != NULL) { + err = vget(data->vroot, lkflags, curthread); + if (err == 0) + *vpp = data->vroot; + } else { + err = fuse_vnode_get(mp, FUSE_ROOT_ID, NULL, vpp, NULL, VDIR); + if (err == 0) { + FUSE_LOCK(); + MPASS(data->vroot == NULL || data->vroot == *vpp); + if (data->vroot == NULL) { + DEBUG("new root vnode\n"); + data->vroot = *vpp; + FUSE_UNLOCK(); + vref(*vpp); + } else if (data->vroot != *vpp) { + DEBUG("root vnode race\n"); + FUSE_UNLOCK(); + VOP_UNLOCK(*vpp, 0); + vrele(*vpp); + vrecycle(*vpp); + *vpp = data->vroot; + } else + FUSE_UNLOCK(); + } + } + return err; +} + +static int +fuse_vfsop_statfs(struct mount *mp, struct statfs *sbp) +{ + struct fuse_dispatcher fdi; + int err = 0; + + struct fuse_statfs_out *fsfo; + struct fuse_data *data; + + DEBUG2G("mp %p: %s\n", mp, mp->mnt_stat.f_mntfromname); + data = fuse_get_mpdata(mp); + + if (!(data->dataflags & FSESS_INITED)) + goto fake; + + fdisp_init(&fdi, 0); + fdisp_make(&fdi, FUSE_STATFS, mp, FUSE_ROOT_ID, NULL, NULL); + err = fdisp_wait_answ(&fdi); + if (err) { + fdisp_destroy(&fdi); + if (err == ENOTCONN) { + /* + * We want to seem a legitimate fs even if the daemon + * is stiff dead... (so that, eg., we can still do path + * based unmounting after the daemon dies). + */ + goto fake; + } + return err; + } + fsfo = fdi.answ; + + sbp->f_blocks = fsfo->st.blocks; + sbp->f_bfree = fsfo->st.bfree; + sbp->f_bavail = fsfo->st.bavail; + sbp->f_files = fsfo->st.files; + sbp->f_ffree = fsfo->st.ffree; /* cast from uint64_t to int64_t */ + sbp->f_namemax = fsfo->st.namelen; + sbp->f_bsize = fsfo->st.frsize; /* cast from uint32_t to uint64_t */ + + DEBUG("fuse_statfs_out -- blocks: %llu, bfree: %llu, bavail: %llu, " + "fil es: %llu, ffree: %llu, bsize: %i, namelen: %i\n", + (unsigned long long)fsfo->st.blocks, + (unsigned long long)fsfo->st.bfree, + (unsigned long long)fsfo->st.bavail, + (unsigned long long)fsfo->st.files, + (unsigned long long)fsfo->st.ffree, fsfo->st.bsize, + fsfo->st.namelen); + + fdisp_destroy(&fdi); + return 0; + +fake: + sbp->f_blocks = 0; + sbp->f_bfree = 0; + sbp->f_bavail = 0; + sbp->f_files = 0; + sbp->f_ffree = 0; + sbp->f_namemax = 0; + sbp->f_bsize = FUSE_DEFAULT_BLOCKSIZE; + + return 0; +} diff --git a/sys/fs/fuse/fuse_vnops.c b/sys/fs/fuse/fuse_vnops.c new file mode 100644 index 0000000..172210f --- /dev/null +++ b/sys/fs/fuse/fuse_vnops.c @@ -0,0 +1,2037 @@ +/* + * Copyright (c) 2007-2009 Google Inc. and Amit Singh + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions are + * met: + * + * * Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * * Redistributions in binary form must reproduce the above + * copyright notice, this list of conditions and the following disclaimer + * in the documentation and/or other materials provided with the + * distribution. + * * Neither the name of Google Inc. nor the names of its + * contributors may be used to endorse or promote products derived from + * this software without specific prior written permission. + * + * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS + * "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT + * LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR + * A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT + * OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, + * SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT + * LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE + * OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * Copyright (C) 2005 Csaba Henk. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice, this list of conditions and the following disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY AUTHOR AND CONTRIBUTORS ``AS IS'' AND + * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE + * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE + * ARE DISCLAIMED. IN NO EVENT SHALL AUTHOR OR CONTRIBUTORS BE LIABLE + * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL + * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS + * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) + * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT + * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY + * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF + * SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/types.h> +#include <sys/module.h> +#include <sys/systm.h> +#include <sys/errno.h> +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/conf.h> +#include <sys/uio.h> +#include <sys/malloc.h> +#include <sys/queue.h> +#include <sys/lock.h> +#include <sys/mutex.h> +#include <sys/sx.h> +#include <sys/proc.h> +#include <sys/mount.h> +#include <sys/vnode.h> +#include <sys/namei.h> +#include <sys/stat.h> +#include <sys/unistd.h> +#include <sys/filedesc.h> +#include <sys/file.h> +#include <sys/fcntl.h> +#include <sys/dirent.h> +#include <sys/bio.h> +#include <sys/buf.h> +#include <sys/sysctl.h> + +#include <vm/vm.h> +#include <vm/vm_extern.h> +#include <vm/pmap.h> +#include <vm/vm_map.h> +#include <vm/vm_page.h> +#include <vm/vm_param.h> +#include <vm/vm_object.h> +#include <vm/vm_pager.h> +#include <vm/vnode_pager.h> +#include <vm/vm_object.h> + +#include "fuse.h" +#include "fuse_file.h" +#include "fuse_internal.h" +#include "fuse_ipc.h" +#include "fuse_node.h" +#include "fuse_param.h" +#include "fuse_io.h" + +#include <sys/priv.h> + +#define FUSE_DEBUG_MODULE VNOPS +#include "fuse_debug.h" + +/* vnode ops */ +static vop_access_t fuse_vnop_access; +static vop_close_t fuse_vnop_close; +static vop_create_t fuse_vnop_create; +static vop_fsync_t fuse_vnop_fsync; +static vop_getattr_t fuse_vnop_getattr; +static vop_inactive_t fuse_vnop_inactive; +static vop_link_t fuse_vnop_link; +static vop_lookup_t fuse_vnop_lookup; +static vop_mkdir_t fuse_vnop_mkdir; +static vop_mknod_t fuse_vnop_mknod; +static vop_open_t fuse_vnop_open; +static vop_read_t fuse_vnop_read; +static vop_readdir_t fuse_vnop_readdir; +static vop_readlink_t fuse_vnop_readlink; +static vop_reclaim_t fuse_vnop_reclaim; +static vop_remove_t fuse_vnop_remove; +static vop_rename_t fuse_vnop_rename; +static vop_rmdir_t fuse_vnop_rmdir; +static vop_setattr_t fuse_vnop_setattr; +static vop_strategy_t fuse_vnop_strategy; +static vop_symlink_t fuse_vnop_symlink; +static vop_write_t fuse_vnop_write; +static vop_getpages_t fuse_vnop_getpages; +static vop_putpages_t fuse_vnop_putpages; +static vop_print_t fuse_vnop_print; + +struct vop_vector fuse_vnops = { + .vop_default = &default_vnodeops, + .vop_access = fuse_vnop_access, + .vop_close = fuse_vnop_close, + .vop_create = fuse_vnop_create, + .vop_fsync = fuse_vnop_fsync, + .vop_getattr = fuse_vnop_getattr, + .vop_inactive = fuse_vnop_inactive, + .vop_link = fuse_vnop_link, + .vop_lookup = fuse_vnop_lookup, + .vop_mkdir = fuse_vnop_mkdir, + .vop_mknod = fuse_vnop_mknod, + .vop_open = fuse_vnop_open, + .vop_pathconf = vop_stdpathconf, + .vop_read = fuse_vnop_read, + .vop_readdir = fuse_vnop_readdir, + .vop_readlink = fuse_vnop_readlink, + .vop_reclaim = fuse_vnop_reclaim, + .vop_remove = fuse_vnop_remove, + .vop_rename = fuse_vnop_rename, + .vop_rmdir = fuse_vnop_rmdir, + .vop_setattr = fuse_vnop_setattr, + .vop_strategy = fuse_vnop_strategy, + .vop_symlink = fuse_vnop_symlink, + .vop_write = fuse_vnop_write, + .vop_getpages = fuse_vnop_getpages, + .vop_putpages = fuse_vnop_putpages, + .vop_print = fuse_vnop_print, +}; + +static u_long fuse_lookup_cache_hits = 0; + +SYSCTL_ULONG(_vfs_fuse, OID_AUTO, lookup_cache_hits, CTLFLAG_RD, + &fuse_lookup_cache_hits, 0, ""); + +static u_long fuse_lookup_cache_misses = 0; + +SYSCTL_ULONG(_vfs_fuse, OID_AUTO, lookup_cache_misses, CTLFLAG_RD, + &fuse_lookup_cache_misses, 0, ""); + +int fuse_lookup_cache_enable = 1; + +SYSCTL_INT(_vfs_fuse, OID_AUTO, lookup_cache_enable, CTLFLAG_RW, + &fuse_lookup_cache_enable, 0, ""); + +/* + * XXX: This feature is highly experimental and can bring to instabilities, + * needs revisiting before to be enabled by default. + */ +static int fuse_reclaim_revoked = 0; + +SYSCTL_INT(_vfs_fuse, OID_AUTO, reclaim_revoked, CTLFLAG_RW, + &fuse_reclaim_revoked, 0, ""); + +int fuse_pbuf_freecnt = -1; + +#define fuse_vm_page_lock(m) vm_page_lock((m)); +#define fuse_vm_page_unlock(m) vm_page_unlock((m)); +#define fuse_vm_page_lock_queues() ((void)0) +#define fuse_vm_page_unlock_queues() ((void)0) + +/* + struct vnop_access_args { + struct vnode *a_vp; +#if VOP_ACCESS_TAKES_ACCMODE_T + accmode_t a_accmode; +#else + int a_mode; +#endif + struct ucred *a_cred; + struct thread *a_td; + }; +*/ +static int +fuse_vnop_access(struct vop_access_args *ap) +{ + struct vnode *vp = ap->a_vp; + int accmode = ap->a_accmode; + struct ucred *cred = ap->a_cred; + + struct fuse_access_param facp; + struct fuse_data *data = fuse_get_mpdata(vnode_mount(vp)); + + int err; + + DEBUG2G("inode=%ju\n", (uintmax_t)VTOI(vp)); + + if (fuse_isdeadfs(vp)) { + if (vnode_isvroot(vp)) { + return 0; + } + return ENXIO; + } + if (!(data->dataflags & FSESS_INITED)) { + if (vnode_isvroot(vp)) { + if (priv_check_cred(cred, PRIV_VFS_ADMIN, 0) || + (fuse_match_cred(data->daemoncred, cred) == 0)) { + return 0; + } + } + return EBADF; + } + if (vnode_islnk(vp)) { + return 0; + } + bzero(&facp, sizeof(facp)); + + err = fuse_internal_access(vp, accmode, &facp, ap->a_td, ap->a_cred); + DEBUG2G("err=%d accmode=0x%x\n", err, accmode); + return err; +} + +/* + struct vnop_close_args { + struct vnode *a_vp; + int a_fflag; + struct ucred *a_cred; + struct thread *a_td; + }; +*/ +static int +fuse_vnop_close(struct vop_close_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct ucred *cred = ap->a_cred; + int fflag = ap->a_fflag; + fufh_type_t fufh_type; + + fuse_trace_printf_vnop(); + + if (fuse_isdeadfs(vp)) { + return 0; + } + if (vnode_isdir(vp)) { + if (fuse_filehandle_valid(vp, FUFH_RDONLY)) { + fuse_filehandle_close(vp, FUFH_RDONLY, NULL, cred); + } + return 0; + } + if (fflag & IO_NDELAY) { + return 0; + } + fufh_type = fuse_filehandle_xlate_from_fflags(fflag); + + if (!fuse_filehandle_valid(vp, fufh_type)) { + int i; + + for (i = 0; i < FUFH_MAXTYPE; i++) + if (fuse_filehandle_valid(vp, i)) + break; + if (i == FUFH_MAXTYPE) + panic("FUSE: fufh type %d found to be invalid in close" + " (fflag=0x%x)\n", + fufh_type, fflag); + } + if ((VTOFUD(vp)->flag & FN_SIZECHANGE) != 0) { + fuse_vnode_savesize(vp, cred); + } + return 0; +} + +/* + struct vnop_create_args { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + struct vattr *a_vap; + }; +*/ +static int +fuse_vnop_create(struct vop_create_args *ap) +{ + struct vnode *dvp = ap->a_dvp; + struct vnode **vpp = ap->a_vpp; + struct componentname *cnp = ap->a_cnp; + struct vattr *vap = ap->a_vap; + struct thread *td = cnp->cn_thread; + struct ucred *cred = cnp->cn_cred; + + struct fuse_open_in *foi; + struct fuse_entry_out *feo; + struct fuse_dispatcher fdi; + struct fuse_dispatcher *fdip = &fdi; + + int err; + + struct mount *mp = vnode_mount(dvp); + uint64_t parentnid = VTOFUD(dvp)->nid; + mode_t mode = MAKEIMODE(vap->va_type, vap->va_mode); + uint64_t x_fh_id; + uint32_t x_open_flags; + + fuse_trace_printf_vnop(); + + if (fuse_isdeadfs(dvp)) { + return ENXIO; + } + bzero(&fdi, sizeof(fdi)); + + /* XXX: Will we ever want devices ? */ + if ((vap->va_type != VREG)) { + MPASS(vap->va_type != VFIFO); + goto bringup; + } + debug_printf("parent nid = %ju, mode = %x\n", (uintmax_t)parentnid, + mode); + + fdisp_init(fdip, sizeof(*foi) + cnp->cn_namelen + 1); + if (!fsess_isimpl(mp, FUSE_CREATE)) { + debug_printf("eh, daemon doesn't implement create?\n"); + return (EINVAL); + } + fdisp_make(fdip, FUSE_CREATE, vnode_mount(dvp), parentnid, td, cred); + + foi = fdip->indata; + foi->mode = mode; + foi->flags = O_CREAT | O_RDWR; + + memcpy((char *)fdip->indata + sizeof(*foi), cnp->cn_nameptr, + cnp->cn_namelen); + ((char *)fdip->indata)[sizeof(*foi) + cnp->cn_namelen] = '\0'; + + err = fdisp_wait_answ(fdip); + + if (err == ENOSYS) { + debug_printf("create: got ENOSYS from daemon\n"); + fsess_set_notimpl(mp, FUSE_CREATE); + fdisp_destroy(fdip); + } else if (err) { + debug_printf("create: darn, got err=%d from daemon\n", err); + goto out; + } +bringup: + feo = fdip->answ; + + if ((err = fuse_internal_checkentry(feo, VREG))) { + goto out; + } + err = fuse_vnode_get(mp, feo->nodeid, dvp, vpp, cnp, VREG); + if (err) { + struct fuse_release_in *fri; + uint64_t nodeid = feo->nodeid; + uint64_t fh_id = ((struct fuse_open_out *)(feo + 1))->fh; + + fdisp_init(fdip, sizeof(*fri)); + fdisp_make(fdip, FUSE_RELEASE, mp, nodeid, td, cred); + fri = fdip->indata; + fri->fh = fh_id; + fri->flags = OFLAGS(mode); + fuse_insert_callback(fdip->tick, fuse_internal_forget_callback); + fuse_insert_message(fdip->tick); + return err; + } + ASSERT_VOP_ELOCKED(*vpp, "fuse_vnop_create"); + + fdip->answ = feo + 1; + + x_fh_id = ((struct fuse_open_out *)(feo + 1))->fh; + x_open_flags = ((struct fuse_open_out *)(feo + 1))->open_flags; + fuse_filehandle_init(*vpp, FUFH_RDWR, NULL, x_fh_id); + fuse_vnode_open(*vpp, x_open_flags, td); + cache_purge_negative(dvp); + +out: + fdisp_destroy(fdip); + return err; +} + +/* + * Our vnop_fsync roughly corresponds to the FUSE_FSYNC method. The Linux + * version of FUSE also has a FUSE_FLUSH method. + * + * On Linux, fsync() synchronizes a file's complete in-core state with that + * on disk. The call is not supposed to return until the system has completed + * that action or until an error is detected. + * + * Linux also has an fdatasync() call that is similar to fsync() but is not + * required to update the metadata such as access time and modification time. + */ + +/* + struct vnop_fsync_args { + struct vnodeop_desc *a_desc; + struct vnode * a_vp; + struct ucred * a_cred; + int a_waitfor; + struct thread * a_td; + }; +*/ +static int +fuse_vnop_fsync(struct vop_fsync_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct thread *td = ap->a_td; + + struct fuse_filehandle *fufh; + struct fuse_vnode_data *fvdat = VTOFUD(vp); + + int type, err = 0; + + fuse_trace_printf_vnop(); + + if (fuse_isdeadfs(vp)) { + return 0; + } + if ((err = vop_stdfsync(ap))) + return err; + + if (!fsess_isimpl(vnode_mount(vp), + (vnode_vtype(vp) == VDIR ? FUSE_FSYNCDIR : FUSE_FSYNC))) { + goto out; + } + for (type = 0; type < FUFH_MAXTYPE; type++) { + fufh = &(fvdat->fufh[type]); + if (FUFH_IS_VALID(fufh)) { + fuse_internal_fsync(vp, td, NULL, fufh); + } + } + +out: + return 0; +} + +/* + struct vnop_getattr_args { + struct vnode *a_vp; + struct vattr *a_vap; + struct ucred *a_cred; + struct thread *a_td; + }; +*/ +static int +fuse_vnop_getattr(struct vop_getattr_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct vattr *vap = ap->a_vap; + struct ucred *cred = ap->a_cred; + struct thread *td = curthread; + struct fuse_vnode_data *fvdat = VTOFUD(vp); + + int err = 0; + int dataflags; + struct fuse_dispatcher fdi; + + DEBUG2G("inode=%ju\n", (uintmax_t)VTOI(vp)); + + dataflags = fuse_get_mpdata(vnode_mount(vp))->dataflags; + + /* Note that we are not bailing out on a dead file system just yet. */ + + /* look for cached attributes */ + if (fuse_isvalid_attr(vp)) { + if (vap != VTOVA(vp)) { + memcpy(vap, VTOVA(vp), sizeof(*vap)); + } + if ((fvdat->flag & FN_SIZECHANGE) != 0) { + vap->va_size = fvdat->filesize; + } + debug_printf("return cached: inode=%ju\n", (uintmax_t)VTOI(vp)); + return 0; + } + if (!(dataflags & FSESS_INITED)) { + if (!vnode_isvroot(vp)) { + fdata_set_dead(fuse_get_mpdata(vnode_mount(vp))); + err = ENOTCONN; + debug_printf("fuse_getattr b: returning ENOTCONN\n"); + return err; + } else { + goto fake; + } + } + fdisp_init(&fdi, 0); + if ((err = fdisp_simple_putget_vp(&fdi, FUSE_GETATTR, vp, td, cred))) { + if ((err == ENOTCONN) && vnode_isvroot(vp)) { + /* see comment at similar place in fuse_statfs() */ + fdisp_destroy(&fdi); + goto fake; + } + if (err == ENOENT) { + fuse_internal_vnode_disappear(vp); + } + goto out; + } + cache_attrs(vp, (struct fuse_attr_out *)fdi.answ); + if (vap != VTOVA(vp)) { + memcpy(vap, VTOVA(vp), sizeof(*vap)); + } + if ((fvdat->flag & FN_SIZECHANGE) != 0) + vap->va_size = fvdat->filesize; + + if (vnode_isreg(vp) && (fvdat->flag & FN_SIZECHANGE) == 0) { + /* + * This is for those cases when the file size changed without us + * knowing, and we want to catch up. + */ + off_t new_filesize = ((struct fuse_attr_out *) + fdi.answ)->attr.size; + + if (fvdat->filesize != new_filesize) { + fuse_vnode_setsize(vp, cred, new_filesize); + } + } + KASSERT(vnode_vtype(vp) == vap->va_type, ("stale vnode")); + debug_printf("fuse_getattr e: returning 0\n"); + +out: + fdisp_destroy(&fdi); + return err; + +fake: + bzero(vap, sizeof(*vap)); + vap->va_type = vnode_vtype(vp); + + return 0; +} + +/* + struct vnop_inactive_args { + struct vnode *a_vp; + struct thread *a_td; + }; +*/ +static int +fuse_vnop_inactive(struct vop_inactive_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct thread *td = ap->a_td; + + struct fuse_vnode_data *fvdat = VTOFUD(vp); + struct fuse_filehandle *fufh = NULL; + + int type, need_flush = 1; + + DEBUG("inode=%ju\n", (uintmax_t)VTOI(vp)); + + for (type = 0; type < FUFH_MAXTYPE; type++) { + fufh = &(fvdat->fufh[type]); + if (FUFH_IS_VALID(fufh)) { + if (need_flush && vp->v_type == VREG) { + if ((VTOFUD(vp)->flag & FN_SIZECHANGE) != 0) { + fuse_vnode_savesize(vp, NULL); + } + if (fuse_data_cache_invalidate || + (fvdat->flag & FN_REVOKED) != 0) + fuse_io_invalbuf(vp, td); + else + fuse_io_flushbuf(vp, MNT_WAIT, td); + need_flush = 0; + } + fuse_filehandle_close(vp, type, td, NULL); + } + } + + if ((fvdat->flag & FN_REVOKED) != 0 && fuse_reclaim_revoked) { + vrecycle(vp); + } + return 0; +} + +/* + struct vnop_link_args { + struct vnode *a_tdvp; + struct vnode *a_vp; + struct componentname *a_cnp; + }; +*/ +static int +fuse_vnop_link(struct vop_link_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct vnode *tdvp = ap->a_tdvp; + struct componentname *cnp = ap->a_cnp; + + struct vattr *vap = VTOVA(vp); + + struct fuse_dispatcher fdi; + struct fuse_entry_out *feo; + struct fuse_link_in fli; + + int err; + + fuse_trace_printf_vnop(); + + if (fuse_isdeadfs(vp)) { + return ENXIO; + } + if (vnode_mount(tdvp) != vnode_mount(vp)) { + return EXDEV; + } + if (vap->va_nlink >= FUSE_LINK_MAX) { + return EMLINK; + } + fli.oldnodeid = VTOI(vp); + + fdisp_init(&fdi, 0); + fuse_internal_newentry_makerequest(vnode_mount(tdvp), VTOI(tdvp), cnp, + FUSE_LINK, &fli, sizeof(fli), &fdi); + if ((err = fdisp_wait_answ(&fdi))) { + goto out; + } + feo = fdi.answ; + + err = fuse_internal_checkentry(feo, vnode_vtype(vp)); + fuse_invalidate_attr(tdvp); + fuse_invalidate_attr(vp); + +out: + fdisp_destroy(&fdi); + return err; +} + +/* + struct vnop_lookup_args { + struct vnodeop_desc *a_desc; + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + }; +*/ +int +fuse_vnop_lookup(struct vop_lookup_args *ap) +{ + struct vnode *dvp = ap->a_dvp; + struct vnode **vpp = ap->a_vpp; + struct componentname *cnp = ap->a_cnp; + struct thread *td = cnp->cn_thread; + struct ucred *cred = cnp->cn_cred; + + int nameiop = cnp->cn_nameiop; + int flags = cnp->cn_flags; + int wantparent = flags & (LOCKPARENT | WANTPARENT); + int islastcn = flags & ISLASTCN; + struct mount *mp = vnode_mount(dvp); + + int err = 0; + int lookup_err = 0; + struct vnode *vp = NULL; + + struct fuse_dispatcher fdi; + enum fuse_opcode op; + + uint64_t nid; + struct fuse_access_param facp; + + DEBUG2G("parent_inode=%ju - %*s\n", + (uintmax_t)VTOI(dvp), (int)cnp->cn_namelen, cnp->cn_nameptr); + + if (fuse_isdeadfs(dvp)) { + *vpp = NULL; + return ENXIO; + } + if (!vnode_isdir(dvp)) { + return ENOTDIR; + } + if (islastcn && vfs_isrdonly(mp) && (nameiop != LOOKUP)) { + return EROFS; + } + /* + * We do access check prior to doing anything else only in the case + * when we are at fs root (we'd like to say, "we are at the first + * component", but that's not exactly the same... nevermind). + * See further comments at further access checks. + */ + + bzero(&facp, sizeof(facp)); + if (vnode_isvroot(dvp)) { /* early permission check hack */ + if ((err = fuse_internal_access(dvp, VEXEC, &facp, td, cred))) { + return err; + } + } + if (flags & ISDOTDOT) { + nid = VTOFUD(dvp)->parent_nid; + if (nid == 0) { + return ENOENT; + } + fdisp_init(&fdi, 0); + op = FUSE_GETATTR; + goto calldaemon; + } else if (cnp->cn_namelen == 1 && *(cnp->cn_nameptr) == '.') { + nid = VTOI(dvp); + fdisp_init(&fdi, 0); + op = FUSE_GETATTR; + goto calldaemon; + } else if (fuse_lookup_cache_enable) { + err = cache_lookup(dvp, vpp, cnp, NULL, NULL); + switch (err) { + + case -1: /* positive match */ + atomic_add_acq_long(&fuse_lookup_cache_hits, 1); + return 0; + + case 0: /* no match in cache */ + atomic_add_acq_long(&fuse_lookup_cache_misses, 1); + break; + + case ENOENT: /* negative match */ + /* fall through */ + default: + return err; + } + } + nid = VTOI(dvp); + fdisp_init(&fdi, cnp->cn_namelen + 1); + op = FUSE_LOOKUP; + +calldaemon: + fdisp_make(&fdi, op, mp, nid, td, cred); + + if (op == FUSE_LOOKUP) { + memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen); + ((char *)fdi.indata)[cnp->cn_namelen] = '\0'; + } + lookup_err = fdisp_wait_answ(&fdi); + + if ((op == FUSE_LOOKUP) && !lookup_err) { /* lookup call succeeded */ + nid = ((struct fuse_entry_out *)fdi.answ)->nodeid; + if (!nid) { + /* + * zero nodeid is the same as "not found", + * but it's also cacheable (which we keep + * keep on doing not as of writing this) + */ + lookup_err = ENOENT; + } else if (nid == FUSE_ROOT_ID) { + lookup_err = EINVAL; + } + } + if (lookup_err && + (!fdi.answ_stat || lookup_err != ENOENT || op != FUSE_LOOKUP)) { + fdisp_destroy(&fdi); + return lookup_err; + } + /* lookup_err, if non-zero, must be ENOENT at this point */ + + if (lookup_err) { + + if ((nameiop == CREATE || nameiop == RENAME) && islastcn + /* && directory dvp has not been removed */ ) { + + if (vfs_isrdonly(mp)) { + err = EROFS; + goto out; + } +#if 0 /* THINK_ABOUT_THIS */ + if ((err = fuse_internal_access(dvp, VWRITE, cred, td, &facp))) { + goto out; + } +#endif + + /* + * Possibly record the position of a slot in the + * directory large enough for the new component name. + * This can be recorded in the vnode private data for + * dvp. Set the SAVENAME flag to hold onto the + * pathname for use later in VOP_CREATE or VOP_RENAME. + */ + cnp->cn_flags |= SAVENAME; + + err = EJUSTRETURN; + goto out; + } + /* Consider inserting name into cache. */ + + /* + * No we can't use negative caching, as the fs + * changes are out of our control. + * False positives' falseness turns out just as things + * go by, but false negatives' falseness doesn't. + * (and aiding the caching mechanism with extra control + * mechanisms comes quite close to beating the whole purpose + * caching...) + */ +#if 0 + if ((cnp->cn_flags & MAKEENTRY) && nameiop != CREATE) { + DEBUG("inserting NULL into cache\n"); + cache_enter(dvp, NULL, cnp); + } +#endif + err = ENOENT; + goto out; + + } else { + + /* !lookup_err */ + + struct fuse_entry_out *feo = NULL; + struct fuse_attr *fattr = NULL; + + if (op == FUSE_GETATTR) { + fattr = &((struct fuse_attr_out *)fdi.answ)->attr; + } else { + feo = (struct fuse_entry_out *)fdi.answ; + fattr = &(feo->attr); + } + + /* + * If deleting, and at end of pathname, return parameters + * which can be used to remove file. If the wantparent flag + * isn't set, we return only the directory, otherwise we go on + * and lock the inode, being careful with ".". + */ + if (nameiop == DELETE && islastcn) { + /* + * Check for write access on directory. + */ + facp.xuid = fattr->uid; + facp.facc_flags |= FACCESS_STICKY; + err = fuse_internal_access(dvp, VWRITE, &facp, td, cred); + facp.facc_flags &= ~FACCESS_XQUERIES; + + if (err) { + goto out; + } + if (nid == VTOI(dvp)) { + vref(dvp); + *vpp = dvp; + } else { + err = fuse_vnode_get(dvp->v_mount, nid, dvp, + &vp, cnp, IFTOVT(fattr->mode)); + if (err) + goto out; + *vpp = vp; + } + + /* + * Save the name for use in VOP_RMDIR and VOP_REMOVE + * later. + */ + cnp->cn_flags |= SAVENAME; + goto out; + + } + /* + * If rewriting (RENAME), return the inode and the + * information required to rewrite the present directory + * Must get inode of directory entry to verify it's a + * regular file, or empty directory. + */ + if (nameiop == RENAME && wantparent && islastcn) { + +#if 0 /* THINK_ABOUT_THIS */ + if ((err = fuse_internal_access(dvp, VWRITE, cred, td, &facp))) { + goto out; + } +#endif + + /* + * Check for "." + */ + if (nid == VTOI(dvp)) { + err = EISDIR; + goto out; + } + err = fuse_vnode_get(vnode_mount(dvp), + nid, + dvp, + &vp, + cnp, + IFTOVT(fattr->mode)); + if (err) { + goto out; + } + *vpp = vp; + /* + * Save the name for use in VOP_RENAME later. + */ + cnp->cn_flags |= SAVENAME; + + goto out; + } + if (flags & ISDOTDOT) { + struct mount *mp; + int ltype; + + /* + * Expanded copy of vn_vget_ino() so that + * fuse_vnode_get() can be used. + */ + mp = dvp->v_mount; + ltype = VOP_ISLOCKED(dvp); + err = vfs_busy(mp, MBF_NOWAIT); + if (err != 0) { + vfs_ref(mp); + VOP_UNLOCK(dvp, 0); + err = vfs_busy(mp, 0); + vn_lock(dvp, ltype | LK_RETRY); + vfs_rel(mp); + if (err) + goto out; + if ((dvp->v_iflag & VI_DOOMED) != 0) { + err = ENOENT; + vfs_unbusy(mp); + goto out; + } + } + VOP_UNLOCK(dvp, 0); + err = fuse_vnode_get(vnode_mount(dvp), + nid, + NULL, + &vp, + cnp, + IFTOVT(fattr->mode)); + vfs_unbusy(mp); + vn_lock(dvp, ltype | LK_RETRY); + if ((dvp->v_iflag & VI_DOOMED) != 0) { + if (err == 0) + vput(vp); + err = ENOENT; + } + if (err) + goto out; + *vpp = vp; + } else if (nid == VTOI(dvp)) { + vref(dvp); + *vpp = dvp; + } else { + err = fuse_vnode_get(vnode_mount(dvp), + nid, + dvp, + &vp, + cnp, + IFTOVT(fattr->mode)); + if (err) { + goto out; + } + fuse_vnode_setparent(vp, dvp); + *vpp = vp; + } + + if (op == FUSE_GETATTR) { + cache_attrs(*vpp, (struct fuse_attr_out *)fdi.answ); + } else { + cache_attrs(*vpp, (struct fuse_entry_out *)fdi.answ); + } + + /* Insert name into cache if appropriate. */ + + /* + * Nooo, caching is evil. With caching, we can't avoid stale + * information taking over the playground (cached info is not + * just positive/negative, it does have qualitative aspects, + * too). And a (VOP/FUSE)_GETATTR is always thrown anyway, when + * walking down along cached path components, and that's not + * any cheaper than FUSE_LOOKUP. This might change with + * implementing kernel side attr caching, but... In Linux, + * lookup results are not cached, and the daemon is bombarded + * with FUSE_LOOKUPS on and on. This shows that by design, the + * daemon is expected to handle frequent lookup queries + * efficiently, do its caching in userspace, and so on. + * + * So just leave the name cache alone. + */ + + /* + * Well, now I know, Linux caches lookups, but with a + * timeout... So it's the same thing as attribute caching: + * we can deal with it when implement timeouts. + */ +#if 0 + if (cnp->cn_flags & MAKEENTRY) { + cache_enter(dvp, *vpp, cnp); + } +#endif + } +out: + if (!lookup_err) { + + /* No lookup error; need to clean up. */ + + if (err) { /* Found inode; exit with no vnode. */ + if (op == FUSE_LOOKUP) { + fuse_internal_forget_send(vnode_mount(dvp), td, cred, + nid, 1); + } + fdisp_destroy(&fdi); + return err; + } else { +#ifndef NO_EARLY_PERM_CHECK_HACK + if (!islastcn) { + /* + * We have the attributes of the next item + * *now*, and it's a fact, and we do not + * have to do extra work for it (ie, beg the + * daemon), and it neither depends on such + * accidental things like attr caching. So + * the big idea: check credentials *now*, + * not at the beginning of the next call to + * lookup. + * + * The first item of the lookup chain (fs root) + * won't be checked then here, of course, as + * its never "the next". But go and see that + * the root is taken care about at the very + * beginning of this function. + * + * Now, given we want to do the access check + * this way, one might ask: so then why not + * do the access check just after fetching + * the inode and its attributes from the + * daemon? Why bother with producing the + * corresponding vnode at all if something + * is not OK? We know what's the deal as + * soon as we get those attrs... There is + * one bit of info though not given us by + * the daemon: whether his response is + * authorative or not... His response should + * be ignored if something is mounted over + * the dir in question. But that can be + * known only by having the vnode... + */ + int tmpvtype = vnode_vtype(*vpp); + + bzero(&facp, sizeof(facp)); + /*the early perm check hack */ + facp.facc_flags |= FACCESS_VA_VALID; + + if ((tmpvtype != VDIR) && (tmpvtype != VLNK)) { + err = ENOTDIR; + } + if (!err && !vnode_mountedhere(*vpp)) { + err = fuse_internal_access(*vpp, VEXEC, &facp, td, cred); + } + if (err) { + if (tmpvtype == VLNK) + DEBUG("weird, permission error with a symlink?\n"); + vput(*vpp); + *vpp = NULL; + } + } +#endif + } + } + fdisp_destroy(&fdi); + + return err; +} + +/* + struct vnop_mkdir_args { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + struct vattr *a_vap; + }; +*/ +static int +fuse_vnop_mkdir(struct vop_mkdir_args *ap) +{ + struct vnode *dvp = ap->a_dvp; + struct vnode **vpp = ap->a_vpp; + struct componentname *cnp = ap->a_cnp; + struct vattr *vap = ap->a_vap; + + int err = 0; + + struct fuse_mkdir_in fmdi; + + fuse_trace_printf_vnop(); + + if (fuse_isdeadfs(dvp)) { + return ENXIO; + } + fmdi.mode = MAKEIMODE(vap->va_type, vap->va_mode); + + err = fuse_internal_newentry(dvp, vpp, cnp, FUSE_MKDIR, &fmdi, + sizeof(fmdi), VDIR); + + if (err == 0) { + fuse_invalidate_attr(dvp); + } + return err; +} + +/* + struct vnop_mknod_args { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + struct vattr *a_vap; + }; +*/ +static int +fuse_vnop_mknod(struct vop_mknod_args *ap) +{ + + return (EINVAL); +} + + +/* + struct vnop_open_args { + struct vnode *a_vp; + int a_mode; + struct ucred *a_cred; + struct thread *a_td; + int a_fdidx; / struct file *a_fp; + }; +*/ +static int +fuse_vnop_open(struct vop_open_args *ap) +{ + struct vnode *vp = ap->a_vp; + int mode = ap->a_mode; + struct thread *td = ap->a_td; + struct ucred *cred = ap->a_cred; + + fufh_type_t fufh_type; + struct fuse_vnode_data *fvdat; + + int error, isdir = 0; + + DEBUG2G("inode=%ju mode=0x%x\n", (uintmax_t)VTOI(vp), mode); + + if (fuse_isdeadfs(vp)) { + return ENXIO; + } + fvdat = VTOFUD(vp); + + if (vnode_isdir(vp)) { + isdir = 1; + } + if (isdir) { + fufh_type = FUFH_RDONLY; + } else { + fufh_type = fuse_filehandle_xlate_from_fflags(mode); + } + + if (fuse_filehandle_valid(vp, fufh_type)) { + fuse_vnode_open(vp, 0, td); + return 0; + } + error = fuse_filehandle_open(vp, fufh_type, NULL, td, cred); + + return error; +} + +/* + struct vnop_read_args { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + }; +*/ +static int +fuse_vnop_read(struct vop_read_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct uio *uio = ap->a_uio; + int ioflag = ap->a_ioflag; + struct ucred *cred = ap->a_cred; + + DEBUG2G("inode=%ju offset=%jd resid=%zd\n", + (uintmax_t)VTOI(vp), uio->uio_offset, uio->uio_resid); + + if (fuse_isdeadfs(vp)) { + return ENXIO; + } + return fuse_io_dispatch(vp, uio, ioflag, cred); +} + +/* + struct vnop_readdir_args { + struct vnode *a_vp; + struct uio *a_uio; + struct ucred *a_cred; + int *a_eofflag; + int *ncookies; + u_long **a_cookies; + }; +*/ +static int +fuse_vnop_readdir(struct vop_readdir_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct uio *uio = ap->a_uio; + struct ucred *cred = ap->a_cred; + + struct fuse_filehandle *fufh = NULL; + struct fuse_vnode_data *fvdat; + struct fuse_iov cookediov; + + int err = 0; + int freefufh = 0; + + DEBUG2G("inode=%ju\n", (uintmax_t)VTOI(vp)); + + if (fuse_isdeadfs(vp)) { + return ENXIO; + } + if ( /* XXXIP ((uio_iovcnt(uio) > 1)) || */ + (uio_resid(uio) < sizeof(struct dirent))) { + return EINVAL; + } + fvdat = VTOFUD(vp); + + if (!fuse_filehandle_valid(vp, FUFH_RDONLY)) { + DEBUG("calling readdir() before open()"); + err = fuse_filehandle_open(vp, FUFH_RDONLY, &fufh, NULL, cred); + freefufh = 1; + } else { + err = fuse_filehandle_get(vp, FUFH_RDONLY, &fufh); + } + if (err) { + return (err); + } +#define DIRCOOKEDSIZE FUSE_DIRENT_ALIGN(FUSE_NAME_OFFSET + MAXNAMLEN + 1) + fiov_init(&cookediov, DIRCOOKEDSIZE); + + err = fuse_internal_readdir(vp, uio, fufh, &cookediov); + + fiov_teardown(&cookediov); + if (freefufh) { + fuse_filehandle_close(vp, FUFH_RDONLY, NULL, cred); + } + return err; +} + +/* + struct vnop_readlink_args { + struct vnode *a_vp; + struct uio *a_uio; + struct ucred *a_cred; + }; +*/ +static int +fuse_vnop_readlink(struct vop_readlink_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct uio *uio = ap->a_uio; + struct ucred *cred = ap->a_cred; + + struct fuse_dispatcher fdi; + int err; + + DEBUG2G("inode=%ju\n", (uintmax_t)VTOI(vp)); + + if (fuse_isdeadfs(vp)) { + return ENXIO; + } + if (!vnode_islnk(vp)) { + return EINVAL; + } + fdisp_init(&fdi, 0); + err = fdisp_simple_putget_vp(&fdi, FUSE_READLINK, vp, curthread, cred); + if (err) { + goto out; + } + if (((char *)fdi.answ)[0] == '/' && + fuse_get_mpdata(vnode_mount(vp))->dataflags & FSESS_PUSH_SYMLINKS_IN) { + char *mpth = vnode_mount(vp)->mnt_stat.f_mntonname; + + err = uiomove(mpth, strlen(mpth), uio); + } + if (!err) { + err = uiomove(fdi.answ, fdi.iosize, uio); + } +out: + fdisp_destroy(&fdi); + return err; +} + +/* + struct vnop_reclaim_args { + struct vnode *a_vp; + struct thread *a_td; + }; +*/ +static int +fuse_vnop_reclaim(struct vop_reclaim_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct thread *td = ap->a_td; + + struct fuse_vnode_data *fvdat = VTOFUD(vp); + struct fuse_filehandle *fufh = NULL; + + int type; + + if (!fvdat) { + panic("FUSE: no vnode data during recycling"); + } + DEBUG("inode=%ju\n", (uintmax_t)VTOI(vp)); + + for (type = 0; type < FUFH_MAXTYPE; type++) { + fufh = &(fvdat->fufh[type]); + if (FUFH_IS_VALID(fufh)) { + printf("FUSE: vnode being reclaimed but fufh (type=%d) is valid", + type); + fuse_filehandle_close(vp, type, td, NULL); + } + } + + if ((!fuse_isdeadfs(vp)) && (fvdat->nlookup)) { + fuse_internal_forget_send(vnode_mount(vp), td, NULL, VTOI(vp), + fvdat->nlookup); + } + fuse_vnode_setparent(vp, NULL); + cache_purge(vp); + vfs_hash_remove(vp); + vnode_destroy_vobject(vp); + fuse_vnode_destroy(vp); + + return 0; +} + +/* + struct vnop_remove_args { + struct vnode *a_dvp; + struct vnode *a_vp; + struct componentname *a_cnp; + }; +*/ +static int +fuse_vnop_remove(struct vop_remove_args *ap) +{ + struct vnode *dvp = ap->a_dvp; + struct vnode *vp = ap->a_vp; + struct componentname *cnp = ap->a_cnp; + + int err; + + DEBUG2G("inode=%ju name=%*s\n", + (uintmax_t)VTOI(vp), (int)cnp->cn_namelen, cnp->cn_nameptr); + + if (fuse_isdeadfs(vp)) { + return ENXIO; + } + if (vnode_isdir(vp)) { + return EPERM; + } + cache_purge(vp); + + err = fuse_internal_remove(dvp, vp, cnp, FUSE_UNLINK); + + if (err == 0) { + fuse_internal_vnode_disappear(vp); + fuse_invalidate_attr(dvp); + } + return err; +} + +/* + struct vnop_rename_args { + struct vnode *a_fdvp; + struct vnode *a_fvp; + struct componentname *a_fcnp; + struct vnode *a_tdvp; + struct vnode *a_tvp; + struct componentname *a_tcnp; + }; +*/ +static int +fuse_vnop_rename(struct vop_rename_args *ap) +{ + struct vnode *fdvp = ap->a_fdvp; + struct vnode *fvp = ap->a_fvp; + struct componentname *fcnp = ap->a_fcnp; + struct vnode *tdvp = ap->a_tdvp; + struct vnode *tvp = ap->a_tvp; + struct componentname *tcnp = ap->a_tcnp; + struct fuse_data *data; + + int err = 0; + + DEBUG2G("from: inode=%ju name=%*s -> to: inode=%ju name=%*s\n", + (uintmax_t)VTOI(fvp), (int)fcnp->cn_namelen, fcnp->cn_nameptr, + (uintmax_t)(tvp == NULL ? -1 : VTOI(tvp)), + (int)tcnp->cn_namelen, tcnp->cn_nameptr); + + if (fuse_isdeadfs(fdvp)) { + return ENXIO; + } + if (fvp->v_mount != tdvp->v_mount || + (tvp && fvp->v_mount != tvp->v_mount)) { + DEBUG("cross-device rename: %s -> %s\n", + fcnp->cn_nameptr, (tcnp != NULL ? tcnp->cn_nameptr : "(NULL)")); + err = EXDEV; + goto out; + } + cache_purge(fvp); + + /* + * FUSE library is expected to check if target directory is not + * under the source directory in the file system tree. + * Linux performs this check at VFS level. + */ + data = fuse_get_mpdata(vnode_mount(tdvp)); + sx_xlock(&data->rename_lock); + err = fuse_internal_rename(fdvp, fcnp, tdvp, tcnp); + if (err == 0) { + fuse_invalidate_attr(fdvp); + if (tdvp != fdvp) { + fuse_vnode_setparent(fvp, tdvp); + fuse_invalidate_attr(tdvp); + } + if (tvp != NULL) + fuse_vnode_setparent(tvp, NULL); + } + sx_unlock(&data->rename_lock); + + if (tvp != NULL && tvp != fvp) { + cache_purge(tvp); + } + if (vnode_isdir(fvp)) { + if ((tvp != NULL) && vnode_isdir(tvp)) { + cache_purge(tdvp); + } + cache_purge(fdvp); + } +out: + if (tdvp == tvp) { + vrele(tdvp); + } else { + vput(tdvp); + } + if (tvp != NULL) { + vput(tvp); + } + vrele(fdvp); + vrele(fvp); + + return err; +} + +/* + struct vnop_rmdir_args { + struct vnode *a_dvp; + struct vnode *a_vp; + struct componentname *a_cnp; + } *ap; +*/ +static int +fuse_vnop_rmdir(struct vop_rmdir_args *ap) +{ + struct vnode *dvp = ap->a_dvp; + struct vnode *vp = ap->a_vp; + + int err; + + DEBUG2G("inode=%ju\n", (uintmax_t)VTOI(vp)); + + if (fuse_isdeadfs(vp)) { + return ENXIO; + } + if (VTOFUD(vp) == VTOFUD(dvp)) { + return EINVAL; + } + err = fuse_internal_remove(dvp, vp, ap->a_cnp, FUSE_RMDIR); + + if (err == 0) { + fuse_internal_vnode_disappear(vp); + fuse_invalidate_attr(dvp); + } + return err; +} + +/* + struct vnop_setattr_args { + struct vnode *a_vp; + struct vattr *a_vap; + struct ucred *a_cred; + struct thread *a_td; + }; +*/ +static int +fuse_vnop_setattr(struct vop_setattr_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct vattr *vap = ap->a_vap; + struct ucred *cred = ap->a_cred; + struct thread *td = curthread; + + struct fuse_dispatcher fdi; + struct fuse_setattr_in *fsai; + struct fuse_access_param facp; + + int err = 0; + enum vtype vtyp; + int sizechanged = 0; + uint64_t newsize = 0; + + DEBUG2G("inode=%ju\n", (uintmax_t)VTOI(vp)); + + if (fuse_isdeadfs(vp)) { + return ENXIO; + } + fdisp_init(&fdi, sizeof(*fsai)); + fdisp_make_vp(&fdi, FUSE_SETATTR, vp, td, cred); + fsai = fdi.indata; + fsai->valid = 0; + + bzero(&facp, sizeof(facp)); + + facp.xuid = vap->va_uid; + facp.xgid = vap->va_gid; + + if (vap->va_uid != (uid_t)VNOVAL) { + facp.facc_flags |= FACCESS_CHOWN; + fsai->uid = vap->va_uid; + fsai->valid |= FATTR_UID; + } + if (vap->va_gid != (gid_t)VNOVAL) { + facp.facc_flags |= FACCESS_CHOWN; + fsai->gid = vap->va_gid; + fsai->valid |= FATTR_GID; + } + if (vap->va_size != VNOVAL) { + + struct fuse_filehandle *fufh = NULL; + + /*Truncate to a new value. */ + fsai->size = vap->va_size; + sizechanged = 1; + newsize = vap->va_size; + fsai->valid |= FATTR_SIZE; + + fuse_filehandle_getrw(vp, FUFH_WRONLY, &fufh); + if (fufh) { + fsai->fh = fufh->fh_id; + fsai->valid |= FATTR_FH; + } + } + if (vap->va_atime.tv_sec != VNOVAL) { + fsai->atime = vap->va_atime.tv_sec; + fsai->atimensec = vap->va_atime.tv_nsec; + fsai->valid |= FATTR_ATIME; + } + if (vap->va_mtime.tv_sec != VNOVAL) { + fsai->mtime = vap->va_mtime.tv_sec; + fsai->mtimensec = vap->va_mtime.tv_nsec; + fsai->valid |= FATTR_MTIME; + } + if (vap->va_mode != (mode_t)VNOVAL) { + fsai->mode = vap->va_mode & ALLPERMS; + fsai->valid |= FATTR_MODE; + } + if (!fsai->valid) { + goto out; + } + vtyp = vnode_vtype(vp); + + if (fsai->valid & FATTR_SIZE && vtyp == VDIR) { + err = EISDIR; + goto out; + } + if (vfs_isrdonly(vnode_mount(vp)) && (fsai->valid & ~FATTR_SIZE || vtyp == VREG)) { + err = EROFS; + goto out; + } + if (fsai->valid & ~FATTR_SIZE) { + /*err = fuse_internal_access(vp, VADMIN, context, &facp); */ + /*XXX */ + err = 0; + } + facp.facc_flags &= ~FACCESS_XQUERIES; + + if (err && !(fsai->valid & ~(FATTR_ATIME | FATTR_MTIME)) && + vap->va_vaflags & VA_UTIMES_NULL) { + err = fuse_internal_access(vp, VWRITE, &facp, td, cred); + } + if (err) { + fuse_invalidate_attr(vp); + goto out; + } + if ((err = fdisp_wait_answ(&fdi))) { + fuse_invalidate_attr(vp); + goto out; + } + vtyp = IFTOVT(((struct fuse_attr_out *)fdi.answ)->attr.mode); + + if (vnode_vtype(vp) != vtyp) { + if (vnode_vtype(vp) == VNON && vtyp != VNON) { + debug_printf("FUSE: Dang! vnode_vtype is VNON and vtype isn't.\n"); + } else { + /* + * STALE vnode, ditch + * + * The vnode has changed its type "behind our back". There's + * nothing really we can do, so let us just force an internal + * revocation and tell the caller to try again, if interested. + */ + fuse_internal_vnode_disappear(vp); + err = EAGAIN; + } + } + if (!err && !sizechanged) { + cache_attrs(vp, (struct fuse_attr_out *)fdi.answ); + } +out: + fdisp_destroy(&fdi); + if (!err && sizechanged) { + fuse_invalidate_attr(vp); + fuse_vnode_setsize(vp, cred, newsize); + VTOFUD(vp)->flag &= ~FN_SIZECHANGE; + } + return err; +} + +/* + struct vnop_strategy_args { + struct vnode *a_vp; + struct buf *a_bp; + }; +*/ +static int +fuse_vnop_strategy(struct vop_strategy_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct buf *bp = ap->a_bp; + + fuse_trace_printf_vnop(); + + if (!vp || fuse_isdeadfs(vp)) { + bp->b_ioflags |= BIO_ERROR; + bp->b_error = ENXIO; + bufdone(bp); + return ENXIO; + } + if (bp->b_iocmd == BIO_WRITE) + fuse_vnode_refreshsize(vp, NOCRED); + + (void)fuse_io_strategy(vp, bp); + + /* + * This is a dangerous function. If returns error, that might mean a + * panic. We prefer pretty much anything over being forced to panic + * by a malicious daemon (a demon?). So we just return 0 anyway. You + * should never mind this: this function has its own error + * propagation mechanism via the argument buffer, so + * not-that-melodramatic residents of the call chain still will be + * able to know what to do. + */ + return 0; +} + + +/* + struct vnop_symlink_args { + struct vnode *a_dvp; + struct vnode **a_vpp; + struct componentname *a_cnp; + struct vattr *a_vap; + char *a_target; + }; +*/ +static int +fuse_vnop_symlink(struct vop_symlink_args *ap) +{ + struct vnode *dvp = ap->a_dvp; + struct vnode **vpp = ap->a_vpp; + struct componentname *cnp = ap->a_cnp; + char *target = ap->a_target; + + struct fuse_dispatcher fdi; + + int err; + size_t len; + + DEBUG2G("inode=%ju name=%*s\n", + (uintmax_t)VTOI(dvp), (int)cnp->cn_namelen, cnp->cn_nameptr); + + if (fuse_isdeadfs(dvp)) { + return ENXIO; + } + /* + * Unlike the other creator type calls, here we have to create a message + * where the name of the new entry comes first, and the data describing + * the entry comes second. + * Hence we can't rely on our handy fuse_internal_newentry() routine, + * but put together the message manually and just call the core part. + */ + + len = strlen(target) + 1; + fdisp_init(&fdi, len + cnp->cn_namelen + 1); + fdisp_make_vp(&fdi, FUSE_SYMLINK, dvp, curthread, NULL); + + memcpy(fdi.indata, cnp->cn_nameptr, cnp->cn_namelen); + ((char *)fdi.indata)[cnp->cn_namelen] = '\0'; + memcpy((char *)fdi.indata + cnp->cn_namelen + 1, target, len); + + err = fuse_internal_newentry_core(dvp, vpp, cnp, VLNK, &fdi); + fdisp_destroy(&fdi); + + if (err == 0) { + fuse_invalidate_attr(dvp); + } + return err; +} + +/* + struct vnop_write_args { + struct vnode *a_vp; + struct uio *a_uio; + int a_ioflag; + struct ucred *a_cred; + }; +*/ +static int +fuse_vnop_write(struct vop_write_args *ap) +{ + struct vnode *vp = ap->a_vp; + struct uio *uio = ap->a_uio; + int ioflag = ap->a_ioflag; + struct ucred *cred = ap->a_cred; + + fuse_trace_printf_vnop(); + + if (fuse_isdeadfs(vp)) { + return ENXIO; + } + fuse_vnode_refreshsize(vp, cred); + + return fuse_io_dispatch(vp, uio, ioflag, cred); +} + +/* + struct vnop_getpages_args { + struct vnode *a_vp; + vm_page_t *a_m; + int a_count; + int a_reqpage; + vm_ooffset_t a_offset; + }; +*/ +static int +fuse_vnop_getpages(struct vop_getpages_args *ap) +{ + int i, error, nextoff, size, toff, count, npages; + struct uio uio; + struct iovec iov; + vm_offset_t kva; + struct buf *bp; + struct vnode *vp; + struct thread *td; + struct ucred *cred; + vm_page_t *pages; + + DEBUG2G("heh\n"); + + vp = ap->a_vp; + KASSERT(vp->v_object, ("objectless vp passed to getpages")); + td = curthread; /* XXX */ + cred = curthread->td_ucred; /* XXX */ + pages = ap->a_m; + count = ap->a_count; + + if (!fsess_opt_mmap(vnode_mount(vp))) { + DEBUG("called on non-cacheable vnode??\n"); + return (VM_PAGER_ERROR); + } + npages = btoc(count); + + /* + * If the requested page is partially valid, just return it and + * allow the pager to zero-out the blanks. Partially valid pages + * can only occur at the file EOF. + */ + + VM_OBJECT_LOCK(vp->v_object); + fuse_vm_page_lock_queues(); + if (pages[ap->a_reqpage]->valid != 0) { + for (i = 0; i < npages; ++i) { + if (i != ap->a_reqpage) { + fuse_vm_page_lock(pages[i]); + vm_page_free(pages[i]); + fuse_vm_page_unlock(pages[i]); + } + } + fuse_vm_page_unlock_queues(); + VM_OBJECT_UNLOCK(vp->v_object); + return 0; + } + fuse_vm_page_unlock_queues(); + VM_OBJECT_UNLOCK(vp->v_object); + + /* + * We use only the kva address for the buffer, but this is extremely + * convienient and fast. + */ + bp = getpbuf(&fuse_pbuf_freecnt); + + kva = (vm_offset_t)bp->b_data; + pmap_qenter(kva, pages, npages); + PCPU_INC(cnt.v_vnodein); + PCPU_ADD(cnt.v_vnodepgsin, npages); + + iov.iov_base = (caddr_t)kva; + iov.iov_len = count; + uio.uio_iov = &iov; + uio.uio_iovcnt = 1; + uio.uio_offset = IDX_TO_OFF(pages[0]->pindex); + uio.uio_resid = count; + uio.uio_segflg = UIO_SYSSPACE; + uio.uio_rw = UIO_READ; + uio.uio_td = td; + + error = fuse_io_dispatch(vp, &uio, IO_DIRECT, cred); + pmap_qremove(kva, npages); + + relpbuf(bp, &fuse_pbuf_freecnt); + + if (error && (uio.uio_resid == count)) { + DEBUG("error %d\n", error); + VM_OBJECT_LOCK(vp->v_object); + fuse_vm_page_lock_queues(); + for (i = 0; i < npages; ++i) { + if (i != ap->a_reqpage) { + fuse_vm_page_lock(pages[i]); + vm_page_free(pages[i]); + fuse_vm_page_unlock(pages[i]); + } + } + fuse_vm_page_unlock_queues(); + VM_OBJECT_UNLOCK(vp->v_object); + return VM_PAGER_ERROR; + } + /* + * Calculate the number of bytes read and validate only that number + * of bytes. Note that due to pending writes, size may be 0. This + * does not mean that the remaining data is invalid! + */ + + size = count - uio.uio_resid; + VM_OBJECT_LOCK(vp->v_object); + fuse_vm_page_lock_queues(); + for (i = 0, toff = 0; i < npages; i++, toff = nextoff) { + vm_page_t m; + + nextoff = toff + PAGE_SIZE; + m = pages[i]; + + if (nextoff <= size) { + /* + * Read operation filled an entire page + */ + m->valid = VM_PAGE_BITS_ALL; + KASSERT(m->dirty == 0, + ("fuse_getpages: page %p is dirty", m)); + } else if (size > toff) { + /* + * Read operation filled a partial page. + */ + m->valid = 0; + vm_page_set_valid_range(m, 0, size - toff); + KASSERT(m->dirty == 0, + ("fuse_getpages: page %p is dirty", m)); + } else { + /* + * Read operation was short. If no error occured + * we may have hit a zero-fill section. We simply + * leave valid set to 0. + */ + ; + } + if (i != ap->a_reqpage) { + /* + * Whether or not to leave the page activated is up in + * the air, but we should put the page on a page queue + * somewhere (it already is in the object). Result: + * It appears that emperical results show that + * deactivating pages is best. + */ + + /* + * Just in case someone was asking for this page we + * now tell them that it is ok to use. + */ + if (!error) { + if (m->oflags & VPO_WANTED) { + fuse_vm_page_lock(m); + vm_page_activate(m); + fuse_vm_page_unlock(m); + } else { + fuse_vm_page_lock(m); + vm_page_deactivate(m); + fuse_vm_page_unlock(m); + } + vm_page_wakeup(m); + } else { + fuse_vm_page_lock(m); + vm_page_free(m); + fuse_vm_page_unlock(m); + } + } + } + fuse_vm_page_unlock_queues(); + VM_OBJECT_UNLOCK(vp->v_object); + return 0; +} + +/* + struct vnop_putpages_args { + struct vnode *a_vp; + vm_page_t *a_m; + int a_count; + int a_sync; + int *a_rtvals; + vm_ooffset_t a_offset; + }; +*/ +static int +fuse_vnop_putpages(struct vop_putpages_args *ap) +{ + struct uio uio; + struct iovec iov; + vm_offset_t kva; + struct buf *bp; + int i, error, npages, count; + off_t offset; + int *rtvals; + struct vnode *vp; + struct thread *td; + struct ucred *cred; + vm_page_t *pages; + vm_ooffset_t fsize; + + DEBUG2G("heh\n"); + + vp = ap->a_vp; + KASSERT(vp->v_object, ("objectless vp passed to putpages")); + fsize = vp->v_object->un_pager.vnp.vnp_size; + td = curthread; /* XXX */ + cred = curthread->td_ucred; /* XXX */ + pages = ap->a_m; + count = ap->a_count; + rtvals = ap->a_rtvals; + npages = btoc(count); + offset = IDX_TO_OFF(pages[0]->pindex); + + if (!fsess_opt_mmap(vnode_mount(vp))) { + DEBUG("called on non-cacheable vnode??\n"); + } + for (i = 0; i < npages; i++) + rtvals[i] = VM_PAGER_AGAIN; + + /* + * When putting pages, do not extend file past EOF. + */ + + if (offset + count > fsize) { + count = fsize - offset; + if (count < 0) + count = 0; + } + /* + * We use only the kva address for the buffer, but this is extremely + * convienient and fast. + */ + bp = getpbuf(&fuse_pbuf_freecnt); + + kva = (vm_offset_t)bp->b_data; + pmap_qenter(kva, pages, npages); + PCPU_INC(cnt.v_vnodeout); + PCPU_ADD(cnt.v_vnodepgsout, count); + + iov.iov_base = (caddr_t)kva; + iov.iov_len = count; + uio.uio_iov = &iov; + uio.uio_iovcnt = 1; + uio.uio_offset = offset; + uio.uio_resid = count; + uio.uio_segflg = UIO_SYSSPACE; + uio.uio_rw = UIO_WRITE; + uio.uio_td = td; + + error = fuse_io_dispatch(vp, &uio, IO_DIRECT, cred); + + pmap_qremove(kva, npages); + relpbuf(bp, &fuse_pbuf_freecnt); + + if (!error) { + int nwritten = round_page(count - uio.uio_resid) / PAGE_SIZE; + + for (i = 0; i < nwritten; i++) { + rtvals[i] = VM_PAGER_OK; + VM_OBJECT_LOCK(pages[i]->object); + vm_page_undirty(pages[i]); + VM_OBJECT_UNLOCK(pages[i]->object); + } + } + return rtvals[0]; +} + +/* + struct vnop_print_args { + struct vnode *a_vp; + }; +*/ +static int +fuse_vnop_print(struct vop_print_args *ap) +{ + struct fuse_vnode_data *fvdat = VTOFUD(ap->a_vp); + + printf("nodeid: %ju, parent nodeid: %ju, nlookup: %ju, flag: %#x\n", + (uintmax_t)VTOILLU(ap->a_vp), (uintmax_t)fvdat->parent_nid, + (uintmax_t)fvdat->nlookup, + fvdat->flag); + + return 0; +} diff --git a/sys/modules/Makefile b/sys/modules/Makefile index d78f8de..78cd889 100644 --- a/sys/modules/Makefile +++ b/sys/modules/Makefile @@ -111,6 +111,7 @@ SUBDIR= \ ${_filemon} \ firewire \ firmware \ + fuse \ ${_fxp} \ gem \ geom \ diff --git a/sys/modules/fuse/Makefile b/sys/modules/fuse/Makefile new file mode 100644 index 0000000..4ccc1a0 --- /dev/null +++ b/sys/modules/fuse/Makefile @@ -0,0 +1,10 @@ +# $FreeBSD$ + +.PATH: ${.CURDIR}/../../fs/fuse + +KMOD= fuse +SRCS= vnode_if.h \ + fuse_node.c fuse_io.c fuse_device.c fuse_ipc.c fuse_file.c \ + fuse_vfsops.c fuse_vnops.c fuse_internal.c fuse_main.c + +.include <bsd.kmod.mk> |