diff options
Diffstat (limited to 'src/fsdev')
-rw-r--r-- | src/fsdev/Makefile.objs | 12 | ||||
-rw-r--r-- | src/fsdev/file-op-9p.h | 150 | ||||
-rw-r--r-- | src/fsdev/qemu-fsdev-dummy.c | 22 | ||||
-rw-r--r-- | src/fsdev/qemu-fsdev-opts.c | 85 | ||||
-rw-r--r-- | src/fsdev/qemu-fsdev.c | 101 | ||||
-rw-r--r-- | src/fsdev/qemu-fsdev.h | 48 | ||||
-rw-r--r-- | src/fsdev/virtfs-proxy-helper.c | 1166 | ||||
-rw-r--r-- | src/fsdev/virtfs-proxy-helper.texi | 63 | ||||
-rw-r--r-- | src/fsdev/virtio-9p-marshal.c | 322 | ||||
-rw-r--r-- | src/fsdev/virtio-9p-marshal.h | 90 |
10 files changed, 2059 insertions, 0 deletions
diff --git a/src/fsdev/Makefile.objs b/src/fsdev/Makefile.objs new file mode 100644 index 0000000..c27dad3 --- /dev/null +++ b/src/fsdev/Makefile.objs @@ -0,0 +1,12 @@ +ifeq ($(CONFIG_VIRTIO)$(CONFIG_VIRTFS)$(CONFIG_PCI),yyy) +# Lots of the fsdev/9pcode is pulled in by vl.c via qemu_fsdev_add. +# only pull in the actual virtio-9p device if we also enabled virtio. +common-obj-y = qemu-fsdev.o virtio-9p-marshal.o +else +common-obj-y = qemu-fsdev-dummy.o +endif +common-obj-y += qemu-fsdev-opts.o + +# Toplevel always builds this; targets without virtio will put it in +# common-obj-y +common-obj-$(CONFIG_ALL) += qemu-fsdev-dummy.o diff --git a/src/fsdev/file-op-9p.h b/src/fsdev/file-op-9p.h new file mode 100644 index 0000000..956fda0 --- /dev/null +++ b/src/fsdev/file-op-9p.h @@ -0,0 +1,150 @@ +/* + * Virtio 9p + * + * Copyright IBM, Corp. 2010 + * + * Authors: + * Aneesh Kumar K.V <aneesh.kumar@linux.vnet.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ +#ifndef _FILEOP_H +#define _FILEOP_H +#include <sys/types.h> +#include <dirent.h> +#include <sys/time.h> +#include <utime.h> +#include <sys/stat.h> +#include <sys/uio.h> +#include <sys/vfs.h> + +#define SM_LOCAL_MODE_BITS 0600 +#define SM_LOCAL_DIR_MODE_BITS 0700 + +typedef struct FsCred +{ + uid_t fc_uid; + gid_t fc_gid; + mode_t fc_mode; + dev_t fc_rdev; +} FsCred; + +struct xattr_operations; +struct FsContext; +struct V9fsPath; + +typedef struct extended_ops { + int (*get_st_gen)(struct FsContext *, struct V9fsPath *, + mode_t, uint64_t *); +} extended_ops; + +/* export flags */ +#define V9FS_IMMEDIATE_WRITEOUT 0x00000001 +#define V9FS_PATHNAME_FSCONTEXT 0x00000002 +/* + * uid/gid set on fileserver files + */ +#define V9FS_SM_PASSTHROUGH 0x00000004 +/* + * uid/gid part of xattr + */ +#define V9FS_SM_MAPPED 0x00000008 +/* + * Server will try to set uid/gid. + * On failure ignore the error. + */ +#define V9FS_SM_NONE 0x00000010 +/* + * uid/gid part of .virtfs_meatadata namespace + */ +#define V9FS_SM_MAPPED_FILE 0x00000020 +#define V9FS_RDONLY 0x00000040 +#define V9FS_PROXY_SOCK_FD 0x00000080 +#define V9FS_PROXY_SOCK_NAME 0x00000100 + +#define V9FS_SEC_MASK 0x0000003C + + +typedef struct FileOperations FileOperations; +/* + * Structure to store the various fsdev's passed through command line. + */ +typedef struct FsDriverEntry { + char *fsdev_id; + char *path; + int export_flags; + FileOperations *ops; +} FsDriverEntry; + +typedef struct FsContext +{ + uid_t uid; + char *fs_root; + int export_flags; + struct xattr_operations **xops; + struct extended_ops exops; + /* fs driver specific data */ + void *private; +} FsContext; + +typedef struct V9fsPath { + uint16_t size; + char *data; +} V9fsPath; + +typedef union V9fsFidOpenState V9fsFidOpenState; + +void cred_init(FsCred *); + +struct FileOperations +{ + int (*parse_opts)(QemuOpts *, struct FsDriverEntry *); + int (*init)(struct FsContext *); + int (*lstat)(FsContext *, V9fsPath *, struct stat *); + ssize_t (*readlink)(FsContext *, V9fsPath *, char *, size_t); + int (*chmod)(FsContext *, V9fsPath *, FsCred *); + int (*chown)(FsContext *, V9fsPath *, FsCred *); + int (*mknod)(FsContext *, V9fsPath *, const char *, FsCred *); + int (*utimensat)(FsContext *, V9fsPath *, const struct timespec *); + int (*remove)(FsContext *, const char *); + int (*symlink)(FsContext *, const char *, V9fsPath *, + const char *, FsCred *); + int (*link)(FsContext *, V9fsPath *, V9fsPath *, const char *); + int (*setuid)(FsContext *, uid_t); + int (*close)(FsContext *, V9fsFidOpenState *); + int (*closedir)(FsContext *, V9fsFidOpenState *); + int (*opendir)(FsContext *, V9fsPath *, V9fsFidOpenState *); + int (*open)(FsContext *, V9fsPath *, int, V9fsFidOpenState *); + int (*open2)(FsContext *, V9fsPath *, const char *, + int, FsCred *, V9fsFidOpenState *); + void (*rewinddir)(FsContext *, V9fsFidOpenState *); + off_t (*telldir)(FsContext *, V9fsFidOpenState *); + int (*readdir_r)(FsContext *, V9fsFidOpenState *, + struct dirent *, struct dirent **); + void (*seekdir)(FsContext *, V9fsFidOpenState *, off_t); + ssize_t (*preadv)(FsContext *, V9fsFidOpenState *, + const struct iovec *, int, off_t); + ssize_t (*pwritev)(FsContext *, V9fsFidOpenState *, + const struct iovec *, int, off_t); + int (*mkdir)(FsContext *, V9fsPath *, const char *, FsCred *); + int (*fstat)(FsContext *, int, V9fsFidOpenState *, struct stat *); + int (*rename)(FsContext *, const char *, const char *); + int (*truncate)(FsContext *, V9fsPath *, off_t); + int (*fsync)(FsContext *, int, V9fsFidOpenState *, int); + int (*statfs)(FsContext *s, V9fsPath *path, struct statfs *stbuf); + ssize_t (*lgetxattr)(FsContext *, V9fsPath *, + const char *, void *, size_t); + ssize_t (*llistxattr)(FsContext *, V9fsPath *, void *, size_t); + int (*lsetxattr)(FsContext *, V9fsPath *, + const char *, void *, size_t, int); + int (*lremovexattr)(FsContext *, V9fsPath *, const char *); + int (*name_to_path)(FsContext *, V9fsPath *, const char *, V9fsPath *); + int (*renameat)(FsContext *ctx, V9fsPath *olddir, const char *old_name, + V9fsPath *newdir, const char *new_name); + int (*unlinkat)(FsContext *ctx, V9fsPath *dir, const char *name, int flags); + void *opaque; +}; + +#endif diff --git a/src/fsdev/qemu-fsdev-dummy.c b/src/fsdev/qemu-fsdev-dummy.c new file mode 100644 index 0000000..7dc2630 --- /dev/null +++ b/src/fsdev/qemu-fsdev-dummy.c @@ -0,0 +1,22 @@ +/* + * Virtio 9p + * + * Copyright IBM, Corp. 2010 + * + * Authors: + * Gautham R Shenoy <ego@in.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ +#include <stdio.h> +#include <string.h> +#include "qemu-fsdev.h" +#include "qemu/config-file.h" +#include "qemu/module.h" + +int qemu_fsdev_add(QemuOpts *opts) +{ + return 0; +} diff --git a/src/fsdev/qemu-fsdev-opts.c b/src/fsdev/qemu-fsdev-opts.c new file mode 100644 index 0000000..6311c7a --- /dev/null +++ b/src/fsdev/qemu-fsdev-opts.c @@ -0,0 +1,85 @@ +/* + * Virtio 9p + * + * This work is licensed under the terms of the GNU GPL, version 2 or + * later. See the COPYING file in the top-level directory. + */ + +#include "qemu/config-file.h" +#include "qemu/option.h" +#include "qemu/module.h" + +static QemuOptsList qemu_fsdev_opts = { + .name = "fsdev", + .implied_opt_name = "fsdriver", + .head = QTAILQ_HEAD_INITIALIZER(qemu_fsdev_opts.head), + .desc = { + { + .name = "fsdriver", + .type = QEMU_OPT_STRING, + }, { + .name = "path", + .type = QEMU_OPT_STRING, + }, { + .name = "security_model", + .type = QEMU_OPT_STRING, + }, { + .name = "writeout", + .type = QEMU_OPT_STRING, + }, { + .name = "readonly", + .type = QEMU_OPT_BOOL, + + }, { + .name = "socket", + .type = QEMU_OPT_STRING, + }, { + .name = "sock_fd", + .type = QEMU_OPT_NUMBER, + }, + + { /*End of list */ } + }, +}; + +static QemuOptsList qemu_virtfs_opts = { + .name = "virtfs", + .implied_opt_name = "fsdriver", + .head = QTAILQ_HEAD_INITIALIZER(qemu_virtfs_opts.head), + .desc = { + { + .name = "fsdriver", + .type = QEMU_OPT_STRING, + }, { + .name = "path", + .type = QEMU_OPT_STRING, + }, { + .name = "mount_tag", + .type = QEMU_OPT_STRING, + }, { + .name = "security_model", + .type = QEMU_OPT_STRING, + }, { + .name = "writeout", + .type = QEMU_OPT_STRING, + }, { + .name = "readonly", + .type = QEMU_OPT_BOOL, + }, { + .name = "socket", + .type = QEMU_OPT_STRING, + }, { + .name = "sock_fd", + .type = QEMU_OPT_NUMBER, + }, + + { /*End of list */ } + }, +}; + +static void fsdev_register_config(void) +{ + qemu_add_opts(&qemu_fsdev_opts); + qemu_add_opts(&qemu_virtfs_opts); +} +machine_init(fsdev_register_config); diff --git a/src/fsdev/qemu-fsdev.c b/src/fsdev/qemu-fsdev.c new file mode 100644 index 0000000..ccfec13 --- /dev/null +++ b/src/fsdev/qemu-fsdev.c @@ -0,0 +1,101 @@ +/* + * Virtio 9p + * + * Copyright IBM, Corp. 2010 + * + * Authors: + * Gautham R Shenoy <ego@in.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ +#include <stdio.h> +#include <string.h> +#include "qemu-fsdev.h" +#include "qemu/queue.h" +#include "qemu/osdep.h" +#include "qemu-common.h" +#include "qemu/config-file.h" + +static QTAILQ_HEAD(FsDriverEntry_head, FsDriverListEntry) fsdriver_entries = + QTAILQ_HEAD_INITIALIZER(fsdriver_entries); + +static FsDriverTable FsDrivers[] = { + { .name = "local", .ops = &local_ops}, +#ifdef CONFIG_OPEN_BY_HANDLE + { .name = "handle", .ops = &handle_ops}, +#endif + { .name = "synth", .ops = &synth_ops}, + { .name = "proxy", .ops = &proxy_ops}, +}; + +int qemu_fsdev_add(QemuOpts *opts) +{ + int i; + struct FsDriverListEntry *fsle; + const char *fsdev_id = qemu_opts_id(opts); + const char *fsdriver = qemu_opt_get(opts, "fsdriver"); + const char *writeout = qemu_opt_get(opts, "writeout"); + bool ro = qemu_opt_get_bool(opts, "readonly", 0); + + if (!fsdev_id) { + fprintf(stderr, "fsdev: No id specified\n"); + return -1; + } + + if (fsdriver) { + for (i = 0; i < ARRAY_SIZE(FsDrivers); i++) { + if (strcmp(FsDrivers[i].name, fsdriver) == 0) { + break; + } + } + + if (i == ARRAY_SIZE(FsDrivers)) { + fprintf(stderr, "fsdev: fsdriver %s not found\n", fsdriver); + return -1; + } + } else { + fprintf(stderr, "fsdev: No fsdriver specified\n"); + return -1; + } + + fsle = g_malloc0(sizeof(*fsle)); + fsle->fse.fsdev_id = g_strdup(fsdev_id); + fsle->fse.ops = FsDrivers[i].ops; + if (writeout) { + if (!strcmp(writeout, "immediate")) { + fsle->fse.export_flags |= V9FS_IMMEDIATE_WRITEOUT; + } + } + if (ro) { + fsle->fse.export_flags |= V9FS_RDONLY; + } else { + fsle->fse.export_flags &= ~V9FS_RDONLY; + } + + if (fsle->fse.ops->parse_opts) { + if (fsle->fse.ops->parse_opts(opts, &fsle->fse)) { + g_free(fsle->fse.fsdev_id); + g_free(fsle); + return -1; + } + } + + QTAILQ_INSERT_TAIL(&fsdriver_entries, fsle, next); + return 0; +} + +FsDriverEntry *get_fsdev_fsentry(char *id) +{ + if (id) { + struct FsDriverListEntry *fsle; + + QTAILQ_FOREACH(fsle, &fsdriver_entries, next) { + if (strcmp(fsle->fse.fsdev_id, id) == 0) { + return &fsle->fse; + } + } + } + return NULL; +} diff --git a/src/fsdev/qemu-fsdev.h b/src/fsdev/qemu-fsdev.h new file mode 100644 index 0000000..9fa45bf --- /dev/null +++ b/src/fsdev/qemu-fsdev.h @@ -0,0 +1,48 @@ +/* + * Virtio 9p + * + * Copyright IBM, Corp. 2010 + * + * Authors: + * Gautham R Shenoy <ego@in.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ +#ifndef QEMU_FSDEV_H +#define QEMU_FSDEV_H +#include "qemu/option.h" +#include "file-op-9p.h" + + +/* + * A table to store the various file systems and their callback operations. + * ----------------- + * fstype | ops + * ----------------- + * local | local_ops + * . | + * . | + * . | + * . | + * ----------------- + * etc + */ +typedef struct FsDriverTable { + const char *name; + FileOperations *ops; +} FsDriverTable; + +typedef struct FsDriverListEntry { + FsDriverEntry fse; + QTAILQ_ENTRY(FsDriverListEntry) next; +} FsDriverListEntry; + +int qemu_fsdev_add(QemuOpts *opts); +FsDriverEntry *get_fsdev_fsentry(char *id); +extern FileOperations local_ops; +extern FileOperations handle_ops; +extern FileOperations synth_ops; +extern FileOperations proxy_ops; +#endif diff --git a/src/fsdev/virtfs-proxy-helper.c b/src/fsdev/virtfs-proxy-helper.c new file mode 100644 index 0000000..ad1da0d --- /dev/null +++ b/src/fsdev/virtfs-proxy-helper.c @@ -0,0 +1,1166 @@ +/* + * Helper for QEMU Proxy FS Driver + * Copyright IBM, Corp. 2011 + * + * Authors: + * M. Mohan Kumar <mohan@in.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + */ + +#include <sys/resource.h> +#include <getopt.h> +#include <syslog.h> +#include <sys/capability.h> +#include <sys/fsuid.h> +#include <sys/vfs.h> +#include <sys/ioctl.h> +#include <linux/fs.h> +#ifdef CONFIG_LINUX_MAGIC_H +#include <linux/magic.h> +#endif +#include "qemu-common.h" +#include "qemu/sockets.h" +#include "qemu/xattr.h" +#include "virtio-9p-marshal.h" +#include "hw/9pfs/virtio-9p-proxy.h" +#include "fsdev/virtio-9p-marshal.h" + +#define PROGNAME "virtfs-proxy-helper" + +#ifndef XFS_SUPER_MAGIC +#define XFS_SUPER_MAGIC 0x58465342 +#endif +#ifndef EXT2_SUPER_MAGIC +#define EXT2_SUPER_MAGIC 0xEF53 +#endif +#ifndef REISERFS_SUPER_MAGIC +#define REISERFS_SUPER_MAGIC 0x52654973 +#endif +#ifndef BTRFS_SUPER_MAGIC +#define BTRFS_SUPER_MAGIC 0x9123683E +#endif + +static struct option helper_opts[] = { + {"fd", required_argument, NULL, 'f'}, + {"path", required_argument, NULL, 'p'}, + {"nodaemon", no_argument, NULL, 'n'}, + {"socket", required_argument, NULL, 's'}, + {"uid", required_argument, NULL, 'u'}, + {"gid", required_argument, NULL, 'g'}, + {}, +}; + +static bool is_daemon; +static bool get_version; /* IOC getversion IOCTL supported */ + +static void GCC_FMT_ATTR(2, 3) do_log(int loglevel, const char *format, ...) +{ + va_list ap; + + va_start(ap, format); + if (is_daemon) { + vsyslog(LOG_CRIT, format, ap); + } else { + vfprintf(stderr, format, ap); + } + va_end(ap); +} + +static void do_perror(const char *string) +{ + if (is_daemon) { + syslog(LOG_CRIT, "%s:%s", string, strerror(errno)); + } else { + fprintf(stderr, "%s:%s\n", string, strerror(errno)); + } +} + +static int do_cap_set(cap_value_t *cap_value, int size, int reset) +{ + cap_t caps; + if (reset) { + /* + * Start with an empty set and set permitted and effective + */ + caps = cap_init(); + if (caps == NULL) { + do_perror("cap_init"); + return -1; + } + if (cap_set_flag(caps, CAP_PERMITTED, size, cap_value, CAP_SET) < 0) { + do_perror("cap_set_flag"); + goto error; + } + } else { + caps = cap_get_proc(); + if (!caps) { + do_perror("cap_get_proc"); + return -1; + } + } + if (cap_set_flag(caps, CAP_EFFECTIVE, size, cap_value, CAP_SET) < 0) { + do_perror("cap_set_flag"); + goto error; + } + if (cap_set_proc(caps) < 0) { + do_perror("cap_set_proc"); + goto error; + } + cap_free(caps); + return 0; + +error: + cap_free(caps); + return -1; +} + +static int init_capabilities(void) +{ + /* helper needs following capabilities only */ + cap_value_t cap_list[] = { + CAP_CHOWN, + CAP_DAC_OVERRIDE, + CAP_FOWNER, + CAP_FSETID, + CAP_SETGID, + CAP_MKNOD, + CAP_SETUID, + }; + return do_cap_set(cap_list, ARRAY_SIZE(cap_list), 1); +} + +static int socket_read(int sockfd, void *buff, ssize_t size) +{ + ssize_t retval, total = 0; + + while (size) { + retval = read(sockfd, buff, size); + if (retval == 0) { + return -EIO; + } + if (retval < 0) { + if (errno == EINTR) { + continue; + } + return -errno; + } + size -= retval; + buff += retval; + total += retval; + } + return total; +} + +static int socket_write(int sockfd, void *buff, ssize_t size) +{ + ssize_t retval, total = 0; + + while (size) { + retval = write(sockfd, buff, size); + if (retval < 0) { + if (errno == EINTR) { + continue; + } + return -errno; + } + size -= retval; + buff += retval; + total += retval; + } + return total; +} + +static int read_request(int sockfd, struct iovec *iovec, ProxyHeader *header) +{ + int retval; + + /* + * read the request header. + */ + iovec->iov_len = 0; + retval = socket_read(sockfd, iovec->iov_base, PROXY_HDR_SZ); + if (retval < 0) { + return retval; + } + iovec->iov_len = PROXY_HDR_SZ; + retval = proxy_unmarshal(iovec, 0, "dd", &header->type, &header->size); + if (retval < 0) { + return retval; + } + /* + * We can't process message.size > PROXY_MAX_IO_SZ. + * Treat it as fatal error + */ + if (header->size > PROXY_MAX_IO_SZ) { + return -ENOBUFS; + } + retval = socket_read(sockfd, iovec->iov_base + PROXY_HDR_SZ, header->size); + if (retval < 0) { + return retval; + } + iovec->iov_len += header->size; + return 0; +} + +static int send_fd(int sockfd, int fd) +{ + struct msghdr msg; + struct iovec iov; + int retval, data; + struct cmsghdr *cmsg; + union MsgControl msg_control; + + iov.iov_base = &data; + iov.iov_len = sizeof(data); + + memset(&msg, 0, sizeof(msg)); + msg.msg_iov = &iov; + msg.msg_iovlen = 1; + /* No ancillary data on error */ + if (fd < 0) { + /* fd is really negative errno if the request failed */ + data = fd; + } else { + data = V9FS_FD_VALID; + msg.msg_control = &msg_control; + msg.msg_controllen = sizeof(msg_control); + + cmsg = &msg_control.cmsg; + cmsg->cmsg_len = CMSG_LEN(sizeof(fd)); + cmsg->cmsg_level = SOL_SOCKET; + cmsg->cmsg_type = SCM_RIGHTS; + memcpy(CMSG_DATA(cmsg), &fd, sizeof(fd)); + } + + do { + retval = sendmsg(sockfd, &msg, 0); + } while (retval < 0 && errno == EINTR); + if (fd >= 0) { + close(fd); + } + if (retval < 0) { + return retval; + } + return 0; +} + +static int send_status(int sockfd, struct iovec *iovec, int status) +{ + ProxyHeader header; + int retval, msg_size; + + if (status < 0) { + header.type = T_ERROR; + } else { + header.type = T_SUCCESS; + } + header.size = sizeof(status); + /* + * marshal the return status. We don't check error. + * because we are sure we have enough space for the status + */ + msg_size = proxy_marshal(iovec, 0, "ddd", header.type, + header.size, status); + if (msg_size < 0) { + return msg_size; + } + retval = socket_write(sockfd, iovec->iov_base, msg_size); + if (retval < 0) { + return retval; + } + return 0; +} + +/* + * from man 7 capabilities, section + * Effect of User ID Changes on Capabilities: + * If the effective user ID is changed from nonzero to 0, then the permitted + * set is copied to the effective set. If the effective user ID is changed + * from 0 to nonzero, then all capabilities are are cleared from the effective + * set. + * + * The setfsuid/setfsgid man pages warn that changing the effective user ID may + * expose the program to unwanted signals, but this is not true anymore: for an + * unprivileged (without CAP_KILL) program to send a signal, the real or + * effective user ID of the sending process must equal the real or saved user + * ID of the target process. Even when dropping privileges, it is enough to + * keep the saved UID to a "privileged" value and virtfs-proxy-helper won't + * be exposed to signals. So just use setresuid/setresgid. + */ +static int setugid(int uid, int gid, int *suid, int *sgid) +{ + int retval; + + /* + * We still need DAC_OVERRIDE because we don't change + * supplementary group ids, and hence may be subjected DAC rules + */ + cap_value_t cap_list[] = { + CAP_DAC_OVERRIDE, + }; + + *suid = geteuid(); + *sgid = getegid(); + + if (setresgid(-1, gid, *sgid) == -1) { + retval = -errno; + goto err_out; + } + + if (setresuid(-1, uid, *suid) == -1) { + retval = -errno; + goto err_sgid; + } + + if (uid != 0 || gid != 0) { + if (do_cap_set(cap_list, ARRAY_SIZE(cap_list), 0) < 0) { + retval = -errno; + goto err_suid; + } + } + return 0; + +err_suid: + if (setresuid(-1, *suid, *suid) == -1) { + abort(); + } +err_sgid: + if (setresgid(-1, *sgid, *sgid) == -1) { + abort(); + } +err_out: + return retval; +} + +/* + * This is used to reset the ugid back with the saved values + * There is nothing much we can do checking error values here. + */ +static void resetugid(int suid, int sgid) +{ + if (setresgid(-1, sgid, sgid) == -1) { + abort(); + } + if (setresuid(-1, suid, suid) == -1) { + abort(); + } +} + +/* + * send response in two parts + * 1) ProxyHeader + * 2) Response or error status + * This function should be called with marshaled response + * send_response constructs header part and error part only. + * send response sends {ProxyHeader,Response} if the request was success + * otherwise sends {ProxyHeader,error status} + */ +static int send_response(int sock, struct iovec *iovec, int size) +{ + int retval; + ProxyHeader header; + + /* + * If response size exceeds available iovec->iov_len, + * we return ENOBUFS + */ + if (size > PROXY_MAX_IO_SZ) { + size = -ENOBUFS; + } + + if (size < 0) { + /* + * In case of error we would not have got the error encoded + * already so encode the error here. + */ + header.type = T_ERROR; + header.size = sizeof(size); + proxy_marshal(iovec, PROXY_HDR_SZ, "d", size); + } else { + header.type = T_SUCCESS; + header.size = size; + } + proxy_marshal(iovec, 0, "dd", header.type, header.size); + retval = socket_write(sock, iovec->iov_base, header.size + PROXY_HDR_SZ); + if (retval < 0) { + return retval; + } + return 0; +} + +/* + * gets generation number + * returns -errno on failure and sizeof(generation number) on success + */ +static int do_getversion(struct iovec *iovec, struct iovec *out_iovec) +{ + uint64_t version; + int retval = -ENOTTY; +#ifdef FS_IOC_GETVERSION + int fd; + V9fsString path; +#endif + + + /* no need to issue ioctl */ + if (!get_version) { + version = 0; + retval = proxy_marshal(out_iovec, PROXY_HDR_SZ, "q", version); + return retval; + } +#ifdef FS_IOC_GETVERSION + retval = proxy_unmarshal(iovec, PROXY_HDR_SZ, "s", &path); + if (retval < 0) { + return retval; + } + + fd = open(path.data, O_RDONLY); + if (fd < 0) { + retval = -errno; + goto err_out; + } + if (ioctl(fd, FS_IOC_GETVERSION, &version) < 0) { + retval = -errno; + } else { + retval = proxy_marshal(out_iovec, PROXY_HDR_SZ, "q", version); + } + close(fd); +err_out: + v9fs_string_free(&path); +#endif + return retval; +} + +static int do_getxattr(int type, struct iovec *iovec, struct iovec *out_iovec) +{ + int size = 0, offset, retval; + V9fsString path, name, xattr; + + v9fs_string_init(&xattr); + v9fs_string_init(&path); + retval = proxy_unmarshal(iovec, PROXY_HDR_SZ, "ds", &size, &path); + if (retval < 0) { + return retval; + } + offset = PROXY_HDR_SZ + retval; + + if (size) { + xattr.data = g_malloc(size); + xattr.size = size; + } + switch (type) { + case T_LGETXATTR: + v9fs_string_init(&name); + retval = proxy_unmarshal(iovec, offset, "s", &name); + if (retval > 0) { + retval = lgetxattr(path.data, name.data, xattr.data, size); + if (retval < 0) { + retval = -errno; + } else { + xattr.size = retval; + } + } + v9fs_string_free(&name); + break; + case T_LLISTXATTR: + retval = llistxattr(path.data, xattr.data, size); + if (retval < 0) { + retval = -errno; + } else { + xattr.size = retval; + } + break; + } + if (retval < 0) { + goto err_out; + } + + if (!size) { + proxy_marshal(out_iovec, PROXY_HDR_SZ, "d", retval); + retval = sizeof(retval); + } else { + retval = proxy_marshal(out_iovec, PROXY_HDR_SZ, "s", &xattr); + } +err_out: + v9fs_string_free(&xattr); + v9fs_string_free(&path); + return retval; +} + +static void stat_to_prstat(ProxyStat *pr_stat, struct stat *stat) +{ + memset(pr_stat, 0, sizeof(*pr_stat)); + pr_stat->st_dev = stat->st_dev; + pr_stat->st_ino = stat->st_ino; + pr_stat->st_nlink = stat->st_nlink; + pr_stat->st_mode = stat->st_mode; + pr_stat->st_uid = stat->st_uid; + pr_stat->st_gid = stat->st_gid; + pr_stat->st_rdev = stat->st_rdev; + pr_stat->st_size = stat->st_size; + pr_stat->st_blksize = stat->st_blksize; + pr_stat->st_blocks = stat->st_blocks; + pr_stat->st_atim_sec = stat->st_atim.tv_sec; + pr_stat->st_atim_nsec = stat->st_atim.tv_nsec; + pr_stat->st_mtim_sec = stat->st_mtim.tv_sec; + pr_stat->st_mtim_nsec = stat->st_mtim.tv_nsec; + pr_stat->st_ctim_sec = stat->st_ctim.tv_sec; + pr_stat->st_ctim_nsec = stat->st_ctim.tv_nsec; +} + +static void statfs_to_prstatfs(ProxyStatFS *pr_stfs, struct statfs *stfs) +{ + memset(pr_stfs, 0, sizeof(*pr_stfs)); + pr_stfs->f_type = stfs->f_type; + pr_stfs->f_bsize = stfs->f_bsize; + pr_stfs->f_blocks = stfs->f_blocks; + pr_stfs->f_bfree = stfs->f_bfree; + pr_stfs->f_bavail = stfs->f_bavail; + pr_stfs->f_files = stfs->f_files; + pr_stfs->f_ffree = stfs->f_ffree; + pr_stfs->f_fsid[0] = stfs->f_fsid.__val[0]; + pr_stfs->f_fsid[1] = stfs->f_fsid.__val[1]; + pr_stfs->f_namelen = stfs->f_namelen; + pr_stfs->f_frsize = stfs->f_frsize; +} + +/* + * Gets stat/statfs information and packs in out_iovec structure + * on success returns number of bytes packed in out_iovec struture + * otherwise returns -errno + */ +static int do_stat(int type, struct iovec *iovec, struct iovec *out_iovec) +{ + int retval; + V9fsString path; + ProxyStat pr_stat; + ProxyStatFS pr_stfs; + struct stat st_buf; + struct statfs stfs_buf; + + v9fs_string_init(&path); + retval = proxy_unmarshal(iovec, PROXY_HDR_SZ, "s", &path); + if (retval < 0) { + return retval; + } + + switch (type) { + case T_LSTAT: + retval = lstat(path.data, &st_buf); + if (retval < 0) { + retval = -errno; + } else { + stat_to_prstat(&pr_stat, &st_buf); + retval = proxy_marshal(out_iovec, PROXY_HDR_SZ, + "qqqdddqqqqqqqqqq", pr_stat.st_dev, + pr_stat.st_ino, pr_stat.st_nlink, + pr_stat.st_mode, pr_stat.st_uid, + pr_stat.st_gid, pr_stat.st_rdev, + pr_stat.st_size, pr_stat.st_blksize, + pr_stat.st_blocks, + pr_stat.st_atim_sec, pr_stat.st_atim_nsec, + pr_stat.st_mtim_sec, pr_stat.st_mtim_nsec, + pr_stat.st_ctim_sec, pr_stat.st_ctim_nsec); + } + break; + case T_STATFS: + retval = statfs(path.data, &stfs_buf); + if (retval < 0) { + retval = -errno; + } else { + statfs_to_prstatfs(&pr_stfs, &stfs_buf); + retval = proxy_marshal(out_iovec, PROXY_HDR_SZ, + "qqqqqqqqqqq", pr_stfs.f_type, + pr_stfs.f_bsize, pr_stfs.f_blocks, + pr_stfs.f_bfree, pr_stfs.f_bavail, + pr_stfs.f_files, pr_stfs.f_ffree, + pr_stfs.f_fsid[0], pr_stfs.f_fsid[1], + pr_stfs.f_namelen, pr_stfs.f_frsize); + } + break; + } + v9fs_string_free(&path); + return retval; +} + +static int do_readlink(struct iovec *iovec, struct iovec *out_iovec) +{ + char *buffer; + int size, retval; + V9fsString target, path; + + v9fs_string_init(&path); + retval = proxy_unmarshal(iovec, PROXY_HDR_SZ, "sd", &path, &size); + if (retval < 0) { + v9fs_string_free(&path); + return retval; + } + buffer = g_malloc(size); + v9fs_string_init(&target); + retval = readlink(path.data, buffer, size - 1); + if (retval > 0) { + buffer[retval] = '\0'; + v9fs_string_sprintf(&target, "%s", buffer); + retval = proxy_marshal(out_iovec, PROXY_HDR_SZ, "s", &target); + } else { + retval = -errno; + } + g_free(buffer); + v9fs_string_free(&target); + v9fs_string_free(&path); + return retval; +} + +/* + * create other filesystem objects and send 0 on success + * return -errno on error + */ +static int do_create_others(int type, struct iovec *iovec) +{ + dev_t rdev; + int retval = 0; + int offset = PROXY_HDR_SZ; + V9fsString oldpath, path; + int mode, uid, gid, cur_uid, cur_gid; + + v9fs_string_init(&path); + v9fs_string_init(&oldpath); + + retval = proxy_unmarshal(iovec, offset, "dd", &uid, &gid); + if (retval < 0) { + return retval; + } + offset += retval; + retval = setugid(uid, gid, &cur_uid, &cur_gid); + if (retval < 0) { + goto unmarshal_err_out; + } + switch (type) { + case T_MKNOD: + retval = proxy_unmarshal(iovec, offset, "sdq", &path, &mode, &rdev); + if (retval < 0) { + goto err_out; + } + retval = mknod(path.data, mode, rdev); + break; + case T_MKDIR: + retval = proxy_unmarshal(iovec, offset, "sd", &path, &mode); + if (retval < 0) { + goto err_out; + } + retval = mkdir(path.data, mode); + break; + case T_SYMLINK: + retval = proxy_unmarshal(iovec, offset, "ss", &oldpath, &path); + if (retval < 0) { + goto err_out; + } + retval = symlink(oldpath.data, path.data); + break; + } + if (retval < 0) { + retval = -errno; + } + +err_out: + resetugid(cur_uid, cur_gid); +unmarshal_err_out: + v9fs_string_free(&path); + v9fs_string_free(&oldpath); + return retval; +} + +/* + * create a file and send fd on success + * return -errno on error + */ +static int do_create(struct iovec *iovec) +{ + int ret; + V9fsString path; + int flags, mode, uid, gid, cur_uid, cur_gid; + + v9fs_string_init(&path); + ret = proxy_unmarshal(iovec, PROXY_HDR_SZ, "sdddd", + &path, &flags, &mode, &uid, &gid); + if (ret < 0) { + goto unmarshal_err_out; + } + ret = setugid(uid, gid, &cur_uid, &cur_gid); + if (ret < 0) { + goto unmarshal_err_out; + } + ret = open(path.data, flags, mode); + if (ret < 0) { + ret = -errno; + } + + resetugid(cur_uid, cur_gid); +unmarshal_err_out: + v9fs_string_free(&path); + return ret; +} + +/* + * open a file and send fd on success + * return -errno on error + */ +static int do_open(struct iovec *iovec) +{ + int flags, ret; + V9fsString path; + + v9fs_string_init(&path); + ret = proxy_unmarshal(iovec, PROXY_HDR_SZ, "sd", &path, &flags); + if (ret < 0) { + goto err_out; + } + ret = open(path.data, flags); + if (ret < 0) { + ret = -errno; + } +err_out: + v9fs_string_free(&path); + return ret; +} + +/* create unix domain socket and return the descriptor */ +static int proxy_socket(const char *path, uid_t uid, gid_t gid) +{ + int sock, client; + struct sockaddr_un proxy, qemu; + socklen_t size; + + /* requested socket already exists, refuse to start */ + if (!access(path, F_OK)) { + do_log(LOG_CRIT, "socket already exists\n"); + return -1; + } + + if (strlen(path) >= sizeof(proxy.sun_path)) { + do_log(LOG_CRIT, "UNIX domain socket path exceeds %zu characters\n", + sizeof(proxy.sun_path)); + return -1; + } + + sock = socket(AF_UNIX, SOCK_STREAM, 0); + if (sock < 0) { + do_perror("socket"); + return -1; + } + + /* mask other part of mode bits */ + umask(7); + + proxy.sun_family = AF_UNIX; + strcpy(proxy.sun_path, path); + if (bind(sock, (struct sockaddr *)&proxy, + sizeof(struct sockaddr_un)) < 0) { + do_perror("bind"); + goto error; + } + if (chown(proxy.sun_path, uid, gid) < 0) { + do_perror("chown"); + goto error; + } + if (listen(sock, 1) < 0) { + do_perror("listen"); + goto error; + } + + size = sizeof(qemu); + client = accept(sock, (struct sockaddr *)&qemu, &size); + if (client < 0) { + do_perror("accept"); + goto error; + } + close(sock); + return client; + +error: + close(sock); + return -1; +} + +static void usage(char *prog) +{ + fprintf(stderr, "usage: %s\n" + " -p|--path <path> 9p path to export\n" + " {-f|--fd <socket-descriptor>} socket file descriptor to be used\n" + " {-s|--socket <socketname> socket file used for communication\n" + " \t-u|--uid <uid> -g|--gid <gid>} - uid:gid combination to give " + " access to this socket\n" + " \tNote: -s & -f can not be used together\n" + " [-n|--nodaemon] Run as a normal program\n", + basename(prog)); +} + +static int process_reply(int sock, int type, + struct iovec *out_iovec, int retval) +{ + switch (type) { + case T_OPEN: + case T_CREATE: + if (send_fd(sock, retval) < 0) { + return -1; + } + break; + case T_MKNOD: + case T_MKDIR: + case T_SYMLINK: + case T_LINK: + case T_CHMOD: + case T_CHOWN: + case T_TRUNCATE: + case T_UTIME: + case T_RENAME: + case T_REMOVE: + case T_LSETXATTR: + case T_LREMOVEXATTR: + if (send_status(sock, out_iovec, retval) < 0) { + return -1; + } + break; + case T_LSTAT: + case T_STATFS: + case T_READLINK: + case T_LGETXATTR: + case T_LLISTXATTR: + case T_GETVERSION: + if (send_response(sock, out_iovec, retval) < 0) { + return -1; + } + break; + default: + return -1; + break; + } + return 0; +} + +static int process_requests(int sock) +{ + int flags; + int size = 0; + int retval = 0; + uint64_t offset; + ProxyHeader header; + int mode, uid, gid; + V9fsString name, value; + struct timespec spec[2]; + V9fsString oldpath, path; + struct iovec in_iovec, out_iovec; + + in_iovec.iov_base = g_malloc(PROXY_MAX_IO_SZ + PROXY_HDR_SZ); + in_iovec.iov_len = PROXY_MAX_IO_SZ + PROXY_HDR_SZ; + out_iovec.iov_base = g_malloc(PROXY_MAX_IO_SZ + PROXY_HDR_SZ); + out_iovec.iov_len = PROXY_MAX_IO_SZ + PROXY_HDR_SZ; + + while (1) { + /* + * initialize the header type, so that we send + * response to proper request type. + */ + header.type = 0; + retval = read_request(sock, &in_iovec, &header); + if (retval < 0) { + goto err_out; + } + + switch (header.type) { + case T_OPEN: + retval = do_open(&in_iovec); + break; + case T_CREATE: + retval = do_create(&in_iovec); + break; + case T_MKNOD: + case T_MKDIR: + case T_SYMLINK: + retval = do_create_others(header.type, &in_iovec); + break; + case T_LINK: + v9fs_string_init(&path); + v9fs_string_init(&oldpath); + retval = proxy_unmarshal(&in_iovec, PROXY_HDR_SZ, + "ss", &oldpath, &path); + if (retval > 0) { + retval = link(oldpath.data, path.data); + if (retval < 0) { + retval = -errno; + } + } + v9fs_string_free(&oldpath); + v9fs_string_free(&path); + break; + case T_LSTAT: + case T_STATFS: + retval = do_stat(header.type, &in_iovec, &out_iovec); + break; + case T_READLINK: + retval = do_readlink(&in_iovec, &out_iovec); + break; + case T_CHMOD: + v9fs_string_init(&path); + retval = proxy_unmarshal(&in_iovec, PROXY_HDR_SZ, + "sd", &path, &mode); + if (retval > 0) { + retval = chmod(path.data, mode); + if (retval < 0) { + retval = -errno; + } + } + v9fs_string_free(&path); + break; + case T_CHOWN: + v9fs_string_init(&path); + retval = proxy_unmarshal(&in_iovec, PROXY_HDR_SZ, "sdd", &path, + &uid, &gid); + if (retval > 0) { + retval = lchown(path.data, uid, gid); + if (retval < 0) { + retval = -errno; + } + } + v9fs_string_free(&path); + break; + case T_TRUNCATE: + v9fs_string_init(&path); + retval = proxy_unmarshal(&in_iovec, PROXY_HDR_SZ, "sq", + &path, &offset); + if (retval > 0) { + retval = truncate(path.data, offset); + if (retval < 0) { + retval = -errno; + } + } + v9fs_string_free(&path); + break; + case T_UTIME: + v9fs_string_init(&path); + retval = proxy_unmarshal(&in_iovec, PROXY_HDR_SZ, "sqqqq", &path, + &spec[0].tv_sec, &spec[0].tv_nsec, + &spec[1].tv_sec, &spec[1].tv_nsec); + if (retval > 0) { + retval = qemu_utimens(path.data, spec); + if (retval < 0) { + retval = -errno; + } + } + v9fs_string_free(&path); + break; + case T_RENAME: + v9fs_string_init(&path); + v9fs_string_init(&oldpath); + retval = proxy_unmarshal(&in_iovec, PROXY_HDR_SZ, + "ss", &oldpath, &path); + if (retval > 0) { + retval = rename(oldpath.data, path.data); + if (retval < 0) { + retval = -errno; + } + } + v9fs_string_free(&oldpath); + v9fs_string_free(&path); + break; + case T_REMOVE: + v9fs_string_init(&path); + retval = proxy_unmarshal(&in_iovec, PROXY_HDR_SZ, "s", &path); + if (retval > 0) { + retval = remove(path.data); + if (retval < 0) { + retval = -errno; + } + } + v9fs_string_free(&path); + break; + case T_LGETXATTR: + case T_LLISTXATTR: + retval = do_getxattr(header.type, &in_iovec, &out_iovec); + break; + case T_LSETXATTR: + v9fs_string_init(&path); + v9fs_string_init(&name); + v9fs_string_init(&value); + retval = proxy_unmarshal(&in_iovec, PROXY_HDR_SZ, "sssdd", &path, + &name, &value, &size, &flags); + if (retval > 0) { + retval = lsetxattr(path.data, + name.data, value.data, size, flags); + if (retval < 0) { + retval = -errno; + } + } + v9fs_string_free(&path); + v9fs_string_free(&name); + v9fs_string_free(&value); + break; + case T_LREMOVEXATTR: + v9fs_string_init(&path); + v9fs_string_init(&name); + retval = proxy_unmarshal(&in_iovec, + PROXY_HDR_SZ, "ss", &path, &name); + if (retval > 0) { + retval = lremovexattr(path.data, name.data); + if (retval < 0) { + retval = -errno; + } + } + v9fs_string_free(&path); + v9fs_string_free(&name); + break; + case T_GETVERSION: + retval = do_getversion(&in_iovec, &out_iovec); + break; + default: + goto err_out; + break; + } + + if (process_reply(sock, header.type, &out_iovec, retval) < 0) { + goto err_out; + } + } +err_out: + g_free(in_iovec.iov_base); + g_free(out_iovec.iov_base); + return -1; +} + +int main(int argc, char **argv) +{ + int sock; + uid_t own_u; + gid_t own_g; + char *rpath = NULL; + char *sock_name = NULL; + struct stat stbuf; + int c, option_index; +#ifdef FS_IOC_GETVERSION + int retval; + struct statfs st_fs; +#endif + + is_daemon = true; + sock = -1; + own_u = own_g = -1; + while (1) { + option_index = 0; + c = getopt_long(argc, argv, "p:nh?f:s:u:g:", helper_opts, + &option_index); + if (c == -1) { + break; + } + switch (c) { + case 'p': + rpath = g_strdup(optarg); + break; + case 'n': + is_daemon = false; + break; + case 'f': + sock = atoi(optarg); + break; + case 's': + sock_name = g_strdup(optarg); + break; + case 'u': + own_u = atoi(optarg); + break; + case 'g': + own_g = atoi(optarg); + break; + case '?': + case 'h': + default: + usage(argv[0]); + exit(EXIT_FAILURE); + } + } + + /* Parameter validation */ + if ((sock_name == NULL && sock == -1) || rpath == NULL) { + fprintf(stderr, "socket, socket descriptor or path not specified\n"); + usage(argv[0]); + return -1; + } + + if (sock_name && sock != -1) { + fprintf(stderr, "both named socket and socket descriptor specified\n"); + usage(argv[0]); + exit(EXIT_FAILURE); + } + + if (sock_name && (own_u == -1 || own_g == -1)) { + fprintf(stderr, "owner uid:gid not specified, "); + fprintf(stderr, + "owner uid:gid specifies who can access the socket file\n"); + usage(argv[0]); + exit(EXIT_FAILURE); + } + + if (lstat(rpath, &stbuf) < 0) { + fprintf(stderr, "invalid path \"%s\" specified, %s\n", + rpath, strerror(errno)); + exit(EXIT_FAILURE); + } + + if (!S_ISDIR(stbuf.st_mode)) { + fprintf(stderr, "specified path \"%s\" is not directory\n", rpath); + exit(EXIT_FAILURE); + } + + if (is_daemon) { + if (daemon(0, 0) < 0) { + fprintf(stderr, "daemon call failed\n"); + exit(EXIT_FAILURE); + } + openlog(PROGNAME, LOG_PID, LOG_DAEMON); + } + + do_log(LOG_INFO, "Started\n"); + if (sock_name) { + sock = proxy_socket(sock_name, own_u, own_g); + if (sock < 0) { + goto error; + } + } + + if (chdir("/") < 0) { + do_perror("chdir"); + goto error; + } + if (chroot(rpath) < 0) { + do_perror("chroot"); + goto error; + } + + get_version = false; +#ifdef FS_IOC_GETVERSION + /* check whether underlying FS support IOC_GETVERSION */ + retval = statfs("/", &st_fs); + if (!retval) { + switch (st_fs.f_type) { + case EXT2_SUPER_MAGIC: + case BTRFS_SUPER_MAGIC: + case REISERFS_SUPER_MAGIC: + case XFS_SUPER_MAGIC: + get_version = true; + break; + } + } +#endif + + umask(0); + if (init_capabilities() < 0) { + goto error; + } + + process_requests(sock); +error: + do_log(LOG_INFO, "Done\n"); + closelog(); + return 0; +} diff --git a/src/fsdev/virtfs-proxy-helper.texi b/src/fsdev/virtfs-proxy-helper.texi new file mode 100644 index 0000000..e60e3b9 --- /dev/null +++ b/src/fsdev/virtfs-proxy-helper.texi @@ -0,0 +1,63 @@ +@example +@c man begin SYNOPSIS +usage: virtfs-proxy-helper options +@c man end +@end example + +@c man begin DESCRIPTION +@table @description +Pass-through security model in QEMU 9p server needs root privilege to do +few file operations (like chown, chmod to any mode/uid:gid). There are two +issues in pass-through security model + +1) TOCTTOU vulnerability: Following symbolic links in the server could +provide access to files beyond 9p export path. + +2) Running QEMU with root privilege could be a security issue. + +To overcome above issues, following approach is used: A new filesytem +type 'proxy' is introduced. Proxy FS uses chroot + socket combination +for securing the vulnerability known with following symbolic links. +Intention of adding a new filesystem type is to allow qemu to run +in non-root mode, but doing privileged operations using socket IO. + +Proxy helper(a stand alone binary part of qemu) is invoked with +root privileges. Proxy helper chroots into 9p export path and creates +a socket pair or a named socket based on the command line parameter. +QEMU and proxy helper communicate using this socket. QEMU proxy fs +driver sends filesystem request to proxy helper and receives the +response from it. + +Proxy helper is designed so that it can drop the root privilege with +retaining capbilities needed for doing filesystem operations only. + +@end table +@c man end + +@c man begin OPTIONS +The following options are supported: +@table @option +@item -h +@findex -h +Display help and exit +@item -p|--path path +Path to export for proxy filesystem driver +@item -f|--fd socket-id +Use given file descriptor as socket descriptor for communicating with +qemu proxy fs drier. Usually a helper like libvirt will create +socketpair and pass one of the fds as parameter to -f|--fd +@item -s|--socket socket-file +Creates named socket file for communicating with qemu proxy fs driver +@item -u|--uid uid -g|--gid gid +uid:gid combination to give access to named socket file +@item -n|--nodaemon +Run as a normal program. By default program will run in daemon mode +@end table +@c man end + +@setfilename virtfs-proxy-helper +@settitle QEMU 9p virtfs proxy filesystem helper + +@c man begin AUTHOR +M. Mohan Kumar +@c man end diff --git a/src/fsdev/virtio-9p-marshal.c b/src/fsdev/virtio-9p-marshal.c new file mode 100644 index 0000000..7748d32 --- /dev/null +++ b/src/fsdev/virtio-9p-marshal.c @@ -0,0 +1,322 @@ +/* + * Virtio 9p backend + * + * Copyright IBM, Corp. 2010 + * + * Authors: + * Anthony Liguori <aliguori@us.ibm.com> + * + * This work is licensed under the terms of the GNU GPL, version 2. See + * the COPYING file in the top-level directory. + * + */ + +#include <glib.h> +#include <glib/gprintf.h> +#include <sys/types.h> +#include <sys/time.h> +#include <utime.h> +#include <sys/uio.h> +#include <string.h> +#include <stdint.h> +#include <errno.h> + +#include "qemu/compiler.h" +#include "virtio-9p-marshal.h" +#include "qemu/bswap.h" + +void v9fs_string_free(V9fsString *str) +{ + g_free(str->data); + str->data = NULL; + str->size = 0; +} + +void v9fs_string_null(V9fsString *str) +{ + v9fs_string_free(str); +} + +void GCC_FMT_ATTR(2, 3) +v9fs_string_sprintf(V9fsString *str, const char *fmt, ...) +{ + va_list ap; + + v9fs_string_free(str); + + va_start(ap, fmt); + str->size = g_vasprintf(&str->data, fmt, ap); + va_end(ap); +} + +void v9fs_string_copy(V9fsString *lhs, V9fsString *rhs) +{ + v9fs_string_free(lhs); + v9fs_string_sprintf(lhs, "%s", rhs->data); +} + + +static ssize_t v9fs_packunpack(void *addr, struct iovec *sg, int sg_count, + size_t offset, size_t size, int pack) +{ + int i = 0; + size_t copied = 0; + size_t req_size = size; + + + for (i = 0; size && i < sg_count; i++) { + size_t len; + if (offset >= sg[i].iov_len) { + /* skip this sg */ + offset -= sg[i].iov_len; + continue; + } else { + len = MIN(sg[i].iov_len - offset, size); + if (pack) { + memcpy(sg[i].iov_base + offset, addr, len); + } else { + memcpy(addr, sg[i].iov_base + offset, len); + } + size -= len; + copied += len; + addr += len; + if (size) { + offset = 0; + continue; + } + } + } + if (copied < req_size) { + /* + * We copied less that requested size. error out + */ + return -ENOBUFS; + } + return copied; +} + +static ssize_t v9fs_unpack(void *dst, struct iovec *out_sg, int out_num, + size_t offset, size_t size) +{ + return v9fs_packunpack(dst, out_sg, out_num, offset, size, 0); +} + +ssize_t v9fs_pack(struct iovec *in_sg, int in_num, size_t offset, + const void *src, size_t size) +{ + return v9fs_packunpack((void *)src, in_sg, in_num, offset, size, 1); +} + +ssize_t v9fs_unmarshal(struct iovec *out_sg, int out_num, size_t offset, + int bswap, const char *fmt, ...) +{ + int i; + va_list ap; + ssize_t copied = 0; + size_t old_offset = offset; + + va_start(ap, fmt); + for (i = 0; fmt[i]; i++) { + switch (fmt[i]) { + case 'b': { + uint8_t *valp = va_arg(ap, uint8_t *); + copied = v9fs_unpack(valp, out_sg, out_num, offset, sizeof(*valp)); + break; + } + case 'w': { + uint16_t val, *valp; + valp = va_arg(ap, uint16_t *); + copied = v9fs_unpack(&val, out_sg, out_num, offset, sizeof(val)); + if (bswap) { + *valp = le16_to_cpu(val); + } else { + *valp = val; + } + break; + } + case 'd': { + uint32_t val, *valp; + valp = va_arg(ap, uint32_t *); + copied = v9fs_unpack(&val, out_sg, out_num, offset, sizeof(val)); + if (bswap) { + *valp = le32_to_cpu(val); + } else { + *valp = val; + } + break; + } + case 'q': { + uint64_t val, *valp; + valp = va_arg(ap, uint64_t *); + copied = v9fs_unpack(&val, out_sg, out_num, offset, sizeof(val)); + if (bswap) { + *valp = le64_to_cpu(val); + } else { + *valp = val; + } + break; + } + case 's': { + V9fsString *str = va_arg(ap, V9fsString *); + copied = v9fs_unmarshal(out_sg, out_num, offset, bswap, + "w", &str->size); + if (copied > 0) { + offset += copied; + str->data = g_malloc(str->size + 1); + copied = v9fs_unpack(str->data, out_sg, out_num, offset, + str->size); + if (copied > 0) { + str->data[str->size] = 0; + } else { + v9fs_string_free(str); + } + } + break; + } + case 'Q': { + V9fsQID *qidp = va_arg(ap, V9fsQID *); + copied = v9fs_unmarshal(out_sg, out_num, offset, bswap, "bdq", + &qidp->type, &qidp->version, &qidp->path); + break; + } + case 'S': { + V9fsStat *statp = va_arg(ap, V9fsStat *); + copied = v9fs_unmarshal(out_sg, out_num, offset, bswap, + "wwdQdddqsssssddd", + &statp->size, &statp->type, &statp->dev, + &statp->qid, &statp->mode, &statp->atime, + &statp->mtime, &statp->length, + &statp->name, &statp->uid, &statp->gid, + &statp->muid, &statp->extension, + &statp->n_uid, &statp->n_gid, + &statp->n_muid); + break; + } + case 'I': { + V9fsIattr *iattr = va_arg(ap, V9fsIattr *); + copied = v9fs_unmarshal(out_sg, out_num, offset, bswap, + "ddddqqqqq", + &iattr->valid, &iattr->mode, + &iattr->uid, &iattr->gid, &iattr->size, + &iattr->atime_sec, &iattr->atime_nsec, + &iattr->mtime_sec, &iattr->mtime_nsec); + break; + } + default: + break; + } + if (copied < 0) { + va_end(ap); + return copied; + } + offset += copied; + } + va_end(ap); + + return offset - old_offset; +} + +ssize_t v9fs_marshal(struct iovec *in_sg, int in_num, size_t offset, + int bswap, const char *fmt, ...) +{ + int i; + va_list ap; + ssize_t copied = 0; + size_t old_offset = offset; + + va_start(ap, fmt); + for (i = 0; fmt[i]; i++) { + switch (fmt[i]) { + case 'b': { + uint8_t val = va_arg(ap, int); + copied = v9fs_pack(in_sg, in_num, offset, &val, sizeof(val)); + break; + } + case 'w': { + uint16_t val; + if (bswap) { + cpu_to_le16w(&val, va_arg(ap, int)); + } else { + val = va_arg(ap, int); + } + copied = v9fs_pack(in_sg, in_num, offset, &val, sizeof(val)); + break; + } + case 'd': { + uint32_t val; + if (bswap) { + cpu_to_le32w(&val, va_arg(ap, uint32_t)); + } else { + val = va_arg(ap, uint32_t); + } + copied = v9fs_pack(in_sg, in_num, offset, &val, sizeof(val)); + break; + } + case 'q': { + uint64_t val; + if (bswap) { + cpu_to_le64w(&val, va_arg(ap, uint64_t)); + } else { + val = va_arg(ap, uint64_t); + } + copied = v9fs_pack(in_sg, in_num, offset, &val, sizeof(val)); + break; + } + case 's': { + V9fsString *str = va_arg(ap, V9fsString *); + copied = v9fs_marshal(in_sg, in_num, offset, bswap, + "w", str->size); + if (copied > 0) { + offset += copied; + copied = v9fs_pack(in_sg, in_num, offset, str->data, str->size); + } + break; + } + case 'Q': { + V9fsQID *qidp = va_arg(ap, V9fsQID *); + copied = v9fs_marshal(in_sg, in_num, offset, bswap, "bdq", + qidp->type, qidp->version, qidp->path); + break; + } + case 'S': { + V9fsStat *statp = va_arg(ap, V9fsStat *); + copied = v9fs_marshal(in_sg, in_num, offset, bswap, + "wwdQdddqsssssddd", + statp->size, statp->type, statp->dev, + &statp->qid, statp->mode, statp->atime, + statp->mtime, statp->length, &statp->name, + &statp->uid, &statp->gid, &statp->muid, + &statp->extension, statp->n_uid, + statp->n_gid, statp->n_muid); + break; + } + case 'A': { + V9fsStatDotl *statp = va_arg(ap, V9fsStatDotl *); + copied = v9fs_marshal(in_sg, in_num, offset, bswap, + "qQdddqqqqqqqqqqqqqqq", + statp->st_result_mask, + &statp->qid, statp->st_mode, + statp->st_uid, statp->st_gid, + statp->st_nlink, statp->st_rdev, + statp->st_size, statp->st_blksize, + statp->st_blocks, statp->st_atime_sec, + statp->st_atime_nsec, statp->st_mtime_sec, + statp->st_mtime_nsec, statp->st_ctime_sec, + statp->st_ctime_nsec, statp->st_btime_sec, + statp->st_btime_nsec, statp->st_gen, + statp->st_data_version); + break; + } + default: + break; + } + if (copied < 0) { + va_end(ap); + return copied; + } + offset += copied; + } + va_end(ap); + + return offset - old_offset; +} diff --git a/src/fsdev/virtio-9p-marshal.h b/src/fsdev/virtio-9p-marshal.h new file mode 100644 index 0000000..5df65a8 --- /dev/null +++ b/src/fsdev/virtio-9p-marshal.h @@ -0,0 +1,90 @@ +#ifndef _QEMU_VIRTIO_9P_MARSHAL_H +#define _QEMU_VIRTIO_9P_MARSHAL_H + +typedef struct V9fsString +{ + uint16_t size; + char *data; +} V9fsString; + +typedef struct V9fsQID +{ + int8_t type; + int32_t version; + int64_t path; +} V9fsQID; + +typedef struct V9fsStat +{ + int16_t size; + int16_t type; + int32_t dev; + V9fsQID qid; + int32_t mode; + int32_t atime; + int32_t mtime; + int64_t length; + V9fsString name; + V9fsString uid; + V9fsString gid; + V9fsString muid; + /* 9p2000.u */ + V9fsString extension; + int32_t n_uid; + int32_t n_gid; + int32_t n_muid; +} V9fsStat; + +typedef struct V9fsIattr +{ + int32_t valid; + int32_t mode; + int32_t uid; + int32_t gid; + int64_t size; + int64_t atime_sec; + int64_t atime_nsec; + int64_t mtime_sec; + int64_t mtime_nsec; +} V9fsIattr; + +typedef struct V9fsStatDotl { + uint64_t st_result_mask; + V9fsQID qid; + uint32_t st_mode; + uint32_t st_uid; + uint32_t st_gid; + uint64_t st_nlink; + uint64_t st_rdev; + uint64_t st_size; + uint64_t st_blksize; + uint64_t st_blocks; + uint64_t st_atime_sec; + uint64_t st_atime_nsec; + uint64_t st_mtime_sec; + uint64_t st_mtime_nsec; + uint64_t st_ctime_sec; + uint64_t st_ctime_nsec; + uint64_t st_btime_sec; + uint64_t st_btime_nsec; + uint64_t st_gen; + uint64_t st_data_version; +} V9fsStatDotl; + +static inline void v9fs_string_init(V9fsString *str) +{ + str->data = NULL; + str->size = 0; +} +extern void v9fs_string_free(V9fsString *str); +extern void v9fs_string_null(V9fsString *str); +extern void v9fs_string_sprintf(V9fsString *str, const char *fmt, ...); +extern void v9fs_string_copy(V9fsString *lhs, V9fsString *rhs); + +ssize_t v9fs_pack(struct iovec *in_sg, int in_num, size_t offset, + const void *src, size_t size); +ssize_t v9fs_unmarshal(struct iovec *out_sg, int out_num, size_t offset, + int bswap, const char *fmt, ...); +ssize_t v9fs_marshal(struct iovec *in_sg, int in_num, size_t offset, + int bswap, const char *fmt, ...); +#endif |