diff options
author | sephe <sephe@FreeBSD.org> | 2017-01-05 03:20:00 +0000 |
---|---|---|
committer | sephe <sephe@FreeBSD.org> | 2017-01-05 03:20:00 +0000 |
commit | 3b0ec971272f0d0cff6456302098876f14e313ac (patch) | |
tree | 8303a1576c629bb2073017c0299360cd37861a53 /sys/dev/hyperv/utilities | |
parent | 940f4f6a0f99fc1b68f891f638297161f27de9f7 (diff) | |
download | FreeBSD-src-3b0ec971272f0d0cff6456302098876f14e313ac.zip FreeBSD-src-3b0ec971272f0d0cff6456302098876f14e313ac.tar.gz |
MFC 308664,308742,308743
308664
hyperv/vss: Add driver and tools for VSS
VSS stands for "Volume Shadow Copy Service". Unlike virtual machine
snapshot, it only takes snapshot for the virtual disks, so both
filesystem and applications have to aware of it, and cooperate the
whole VSS process.
This driver exposes two device files to the userland:
/dev/hv_fsvss_dev
Normally userland programs should _not_ mess with this device file.
It is currently used by the hv_vss_daemon(8), which freezes and
thaws the filesystem. NOTE: currently only UFS is supported, if
the system mounts _any_ other filesystems, the hv_vss_daemon(8)
will veto the VSS process.
If hv_vss_daemon(8) was disabled, then this device file must be
opened, and proper ioctls must be issued to keep the VSS working.
/dev/hv_appvss_dev
Userland application can opened this device file to receive the
VSS freeze notification, hold the VSS for a while (mainly to flush
application data to filesystem), release the VSS process, and
receive the VSS thaw notification i.e. applications can run again.
The VSS will still work, even if this device file is not opened.
However, only filesystem consistency is promised, if this device
file is not opened or is not operated properly.
hv_vss_daemon(8) is started by devd(8) by default. It can be disabled
by editting /etc/devd/hyperv.conf.
Submitted by: Hongjiang Zhang <honzhan microsoft com>
Reviewed by: kib, mckusick
Sponsored by: Microsoft
Differential Revision: https://reviews.freebsd.org/D8224
308742
hyperv/vss: Nuke unused variables.
Submitted by: markj
Reported by: markj
Sponsored by: Microsoft
308743
hyperv/vss: Install the userland daemon to /usr/sbin instead of /
Submitted by: markj
Reported by: markj
Sponsored by: Microsoft
Diffstat (limited to 'sys/dev/hyperv/utilities')
-rw-r--r-- | sys/dev/hyperv/utilities/hv_snapshot.c | 1061 | ||||
-rw-r--r-- | sys/dev/hyperv/utilities/hv_snapshot.h | 56 |
2 files changed, 1117 insertions, 0 deletions
diff --git a/sys/dev/hyperv/utilities/hv_snapshot.c b/sys/dev/hyperv/utilities/hv_snapshot.c new file mode 100644 index 0000000..2316297 --- /dev/null +++ b/sys/dev/hyperv/utilities/hv_snapshot.c @@ -0,0 +1,1061 @@ +/*- + * Copyright (c) 2016 Microsoft Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + */ + +#include <sys/cdefs.h> +__FBSDID("$FreeBSD$"); + +#include <sys/param.h> +#include <sys/kernel.h> +#include <sys/conf.h> +#include <sys/uio.h> +#include <sys/bus.h> +#include <sys/malloc.h> +#include <sys/mbuf.h> +#include <sys/module.h> +#include <sys/lock.h> +#include <sys/taskqueue.h> +#include <sys/selinfo.h> +#include <sys/sysctl.h> +#include <sys/poll.h> +#include <sys/proc.h> +#include <sys/queue.h> +#include <sys/kthread.h> +#include <sys/syscallsubr.h> +#include <sys/sysproto.h> +#include <sys/un.h> +#include <sys/endian.h> +#include <sys/sema.h> +#include <sys/signal.h> +#include <sys/syslog.h> +#include <sys/systm.h> +#include <sys/mutex.h> +#include <sys/callout.h> + +#include <dev/hyperv/include/hyperv.h> +#include <dev/hyperv/utilities/hv_utilreg.h> +#include <dev/hyperv/utilities/vmbus_icreg.h> + +#include "hv_util.h" +#include "hv_snapshot.h" +#include "vmbus_if.h" + +#define VSS_MAJOR 5 +#define VSS_MINOR 0 +#define VSS_MSGVER VMBUS_IC_VERSION(VSS_MAJOR, VSS_MINOR) + +#define VSS_FWVER_MAJOR 3 +#define VSS_FWVER VMBUS_IC_VERSION(VSS_FWVER_MAJOR, 0) + +#define TIMEOUT_LIMIT (15) // seconds +enum hv_vss_op { + VSS_OP_CREATE = 0, + VSS_OP_DELETE, + VSS_OP_HOT_BACKUP, + VSS_OP_GET_DM_INFO, + VSS_OP_BU_COMPLETE, + /* + * Following operations are only supported with IC version >= 5.0 + */ + VSS_OP_FREEZE, /* Freeze the file systems in the VM */ + VSS_OP_THAW, /* Unfreeze the file systems */ + VSS_OP_AUTO_RECOVER, + VSS_OP_COUNT /* Number of operations, must be last */ +}; + +/* + * Header for all VSS messages. + */ +struct hv_vss_hdr { + struct vmbus_icmsg_hdr ic_hdr; + uint8_t operation; + uint8_t reserved[7]; +} __packed; + + +/* + * Flag values for the hv_vss_check_feature. Here supports only + * one value. + */ +#define VSS_HBU_NO_AUTO_RECOVERY 0x00000005 + +struct hv_vss_check_feature { + uint32_t flags; +} __packed; + +struct hv_vss_check_dm_info { + uint32_t flags; +} __packed; + +struct hv_vss_msg { + union { + struct hv_vss_hdr vss_hdr; + } hdr; + union { + struct hv_vss_check_feature vss_cf; + struct hv_vss_check_dm_info dm_info; + } body; +} __packed; + +struct hv_vss_req { + struct hv_vss_opt_msg opt_msg; /* used to communicate with daemon */ + struct hv_vss_msg msg; /* used to communicate with host */ +} __packed; + +/* hv_vss debug control */ +static int hv_vss_log = 0; + +#define hv_vss_log_error(...) do { \ + if (hv_vss_log > 0) \ + log(LOG_ERR, "hv_vss: " __VA_ARGS__); \ +} while (0) + +#define hv_vss_log_info(...) do { \ + if (hv_vss_log > 1) \ + log(LOG_INFO, "hv_vss: " __VA_ARGS__); \ +} while (0) + +static const struct vmbus_ic_desc vmbus_vss_descs[] = { + { + .ic_guid = { .hv_guid = { + 0x29, 0x2e, 0xfa, 0x35, 0x23, 0xea, 0x36, 0x42, + 0x96, 0xae, 0x3a, 0x6e, 0xba, 0xcb, 0xa4, 0x40} }, + .ic_desc = "Hyper-V VSS" + }, + VMBUS_IC_DESC_END +}; + +static const char * vss_opt_name[] = {"None", "VSSCheck", "Freeze", "Thaw"}; + +/* character device prototypes */ +static d_open_t hv_vss_dev_open; +static d_close_t hv_vss_dev_close; +static d_poll_t hv_vss_dev_daemon_poll; +static d_ioctl_t hv_vss_dev_daemon_ioctl; + +static d_open_t hv_appvss_dev_open; +static d_close_t hv_appvss_dev_close; +static d_poll_t hv_appvss_dev_poll; +static d_ioctl_t hv_appvss_dev_ioctl; + +/* hv_vss character device structure */ +static struct cdevsw hv_vss_cdevsw = +{ + .d_version = D_VERSION, + .d_open = hv_vss_dev_open, + .d_close = hv_vss_dev_close, + .d_poll = hv_vss_dev_daemon_poll, + .d_ioctl = hv_vss_dev_daemon_ioctl, + .d_name = FS_VSS_DEV_NAME, +}; + +static struct cdevsw hv_appvss_cdevsw = +{ + .d_version = D_VERSION, + .d_open = hv_appvss_dev_open, + .d_close = hv_appvss_dev_close, + .d_poll = hv_appvss_dev_poll, + .d_ioctl = hv_appvss_dev_ioctl, + .d_name = APP_VSS_DEV_NAME, +}; + +struct hv_vss_sc; +/* + * Global state to track cdev + */ +struct hv_vss_dev_sc { + /* + * msg was transferred from host to notify queue, and + * ack queue. Finally, it was recyled to free list. + */ + STAILQ_HEAD(, hv_vss_req_internal) to_notify_queue; + STAILQ_HEAD(, hv_vss_req_internal) to_ack_queue; + struct hv_vss_sc *sc; + struct proc *proc_task; + struct selinfo hv_vss_selinfo; +}; +/* + * Global state to track and synchronize the transaction requests from the host. + * The VSS allows user to register their function to do freeze/thaw for application. + * VSS kernel will notify both vss daemon and user application if it is registered. + * The implementation state transition is illustrated by: + * https://clovertrail.github.io/assets/vssdot.png + */ +typedef struct hv_vss_sc { + struct hv_util_sc util_sc; + device_t dev; + + struct task task; + + /* + * mutex is used to protect access of list/queue, + * callout in request is also used this mutex. + */ + struct mtx pending_mutex; + /* + * req_free_list contains all free items + */ + LIST_HEAD(, hv_vss_req_internal) req_free_list; + + /* Indicates if daemon registered with driver */ + boolean_t register_done; + + boolean_t app_register_done; + + /* cdev for file system freeze/thaw */ + struct cdev *hv_vss_dev; + /* cdev for application freeze/thaw */ + struct cdev *hv_appvss_dev; + + /* sc for app */ + struct hv_vss_dev_sc app_sc; + /* sc for deamon */ + struct hv_vss_dev_sc daemon_sc; +} hv_vss_sc; + +typedef struct hv_vss_req_internal { + LIST_ENTRY(hv_vss_req_internal) link; + STAILQ_ENTRY(hv_vss_req_internal) slink; + struct hv_vss_req vss_req; + + /* Rcv buffer for communicating with the host*/ + uint8_t *rcv_buf; + /* Length of host message */ + uint32_t host_msg_len; + /* Host message id */ + uint64_t host_msg_id; + + hv_vss_sc *sc; + + struct callout callout; +} hv_vss_req_internal; + +#define SEARCH_REMOVE_REQ_LOCKED(reqp, queue, link, tmp, id) \ + do { \ + STAILQ_FOREACH_SAFE(reqp, queue, link, tmp) { \ + if (reqp->vss_req.opt_msg.msgid == id) { \ + STAILQ_REMOVE(queue, \ + reqp, hv_vss_req_internal, link); \ + break; \ + } \ + } \ + } while (0) + +static bool +hv_vss_is_daemon_killed_after_launch(hv_vss_sc *sc) +{ + return (!sc->register_done && sc->daemon_sc.proc_task); +} + +/* + * Callback routine that gets called whenever there is a message from host + */ +static void +hv_vss_callback(struct vmbus_channel *chan __unused, void *context) +{ + hv_vss_sc *sc = (hv_vss_sc*)context; + if (hv_vss_is_daemon_killed_after_launch(sc)) + hv_vss_log_info("%s: daemon was killed!\n", __func__); + if (sc->register_done || sc->daemon_sc.proc_task) { + hv_vss_log_info("%s: Queuing work item\n", __func__); + if (hv_vss_is_daemon_killed_after_launch(sc)) + hv_vss_log_info("%s: daemon was killed!\n", __func__); + taskqueue_enqueue(taskqueue_thread, &sc->task); + } else { + hv_vss_log_info("%s: daemon has never been registered\n", __func__); + } + hv_vss_log_info("%s: received msg from host\n", __func__); +} +/* + * Send the response back to the host. + */ +static void +hv_vss_respond_host(uint8_t *rcv_buf, struct vmbus_channel *ch, + uint32_t recvlen, uint64_t requestid, uint32_t error) +{ + struct vmbus_icmsg_hdr *hv_icmsg_hdrp; + + hv_icmsg_hdrp = (struct vmbus_icmsg_hdr *)rcv_buf; + + hv_icmsg_hdrp->ic_status = error; + hv_icmsg_hdrp->ic_flags = HV_ICMSGHDRFLAG_TRANSACTION | HV_ICMSGHDRFLAG_RESPONSE; + + error = vmbus_chan_send(ch, VMBUS_CHANPKT_TYPE_INBAND, 0, + rcv_buf, recvlen, requestid); + if (error) + hv_vss_log_info("%s: hv_vss_respond_host: sendpacket error:%d\n", + __func__, error); +} + +static void +hv_vss_notify_host_result_locked(struct hv_vss_req_internal *reqp, uint32_t status) +{ + struct hv_vss_msg* msg = (struct hv_vss_msg *)reqp->rcv_buf; + hv_vss_sc *sc = reqp->sc; + if (reqp->vss_req.opt_msg.opt == HV_VSS_CHECK) { + msg->body.vss_cf.flags = VSS_HBU_NO_AUTO_RECOVERY; + } + hv_vss_log_info("%s, %s response %s to host\n", __func__, + vss_opt_name[reqp->vss_req.opt_msg.opt], + status == HV_S_OK ? "Success" : "Fail"); + hv_vss_respond_host(reqp->rcv_buf, vmbus_get_channel(reqp->sc->dev), + reqp->host_msg_len, reqp->host_msg_id, status); + /* recycle the request */ + LIST_INSERT_HEAD(&sc->req_free_list, reqp, link); +} + +static void +hv_vss_notify_host_result(struct hv_vss_req_internal *reqp, uint32_t status) +{ + mtx_lock(&reqp->sc->pending_mutex); + hv_vss_notify_host_result_locked(reqp, status); + mtx_unlock(&reqp->sc->pending_mutex); +} + +static void +hv_vss_cp_vssreq_to_user(struct hv_vss_req_internal *reqp, + struct hv_vss_opt_msg *userdata) +{ + struct hv_vss_req *hv_vss_dev_buf; + hv_vss_dev_buf = &reqp->vss_req; + hv_vss_dev_buf->opt_msg.opt = HV_VSS_NONE; + switch (reqp->vss_req.msg.hdr.vss_hdr.operation) { + case VSS_OP_FREEZE: + hv_vss_dev_buf->opt_msg.opt = HV_VSS_FREEZE; + break; + case VSS_OP_THAW: + hv_vss_dev_buf->opt_msg.opt = HV_VSS_THAW; + break; + case VSS_OP_HOT_BACKUP: + hv_vss_dev_buf->opt_msg.opt = HV_VSS_CHECK; + break; + } + *userdata = hv_vss_dev_buf->opt_msg; + hv_vss_log_info("%s, read data from user for " + "%s (%ju) \n", __func__, vss_opt_name[userdata->opt], + (uintmax_t)userdata->msgid); +} + +/** + * Remove the request id from app notifiy or ack queue, + * and recyle the request by inserting it to free list. + * + * When app was notified but not yet sending ack, the request + * should locate in either notify queue or ack queue. + */ +static struct hv_vss_req_internal* +hv_vss_drain_req_queue_locked(hv_vss_sc *sc, uint64_t req_id) +{ + struct hv_vss_req_internal *reqp, *tmp; + SEARCH_REMOVE_REQ_LOCKED(reqp, &sc->daemon_sc.to_notify_queue, + slink, tmp, req_id); + if (reqp == NULL) + SEARCH_REMOVE_REQ_LOCKED(reqp, &sc->daemon_sc.to_ack_queue, + slink, tmp, req_id); + if (reqp == NULL) + SEARCH_REMOVE_REQ_LOCKED(reqp, &sc->app_sc.to_notify_queue, + slink, tmp, req_id); + if (reqp == NULL) + SEARCH_REMOVE_REQ_LOCKED(reqp, &sc->app_sc.to_ack_queue, slink, + tmp, req_id); + return (reqp); +} +/** + * Actions for daemon who has been notified. + */ +static void +hv_vss_notified(struct hv_vss_dev_sc *dev_sc, struct hv_vss_opt_msg *userdata) +{ + struct hv_vss_req_internal *reqp; + mtx_lock(&dev_sc->sc->pending_mutex); + if (!STAILQ_EMPTY(&dev_sc->to_notify_queue)) { + reqp = STAILQ_FIRST(&dev_sc->to_notify_queue); + hv_vss_cp_vssreq_to_user(reqp, userdata); + STAILQ_REMOVE_HEAD(&dev_sc->to_notify_queue, slink); + /* insert the msg to queue for write */ + STAILQ_INSERT_TAIL(&dev_sc->to_ack_queue, reqp, slink); + userdata->status = VSS_SUCCESS; + } else { + /* Timeout occur, thus request was removed from queue. */ + hv_vss_log_info("%s: notify queue is empty!\n", __func__); + userdata->status = VSS_FAIL; + } + mtx_unlock(&dev_sc->sc->pending_mutex); +} + +static void +hv_vss_notify(struct hv_vss_dev_sc *dev_sc, struct hv_vss_req_internal *reqp) +{ + uint32_t opt = reqp->vss_req.opt_msg.opt; + mtx_lock(&dev_sc->sc->pending_mutex); + STAILQ_INSERT_TAIL(&dev_sc->to_notify_queue, reqp, slink); + hv_vss_log_info("%s: issuing query %s (%ju) to %s\n", __func__, + vss_opt_name[opt], (uintmax_t)reqp->vss_req.opt_msg.msgid, + &dev_sc->sc->app_sc == dev_sc ? "app" : "daemon"); + mtx_unlock(&dev_sc->sc->pending_mutex); + selwakeup(&dev_sc->hv_vss_selinfo); +} + +/** + * Actions for daemon who has acknowledged. + */ +static void +hv_vss_daemon_acked(struct hv_vss_dev_sc *dev_sc, struct hv_vss_opt_msg *userdata) +{ + struct hv_vss_req_internal *reqp, *tmp; + uint64_t req_id; + int opt; + uint32_t status; + + opt = userdata->opt; + req_id = userdata->msgid; + status = userdata->status; + /* make sure the reserved fields are all zeros. */ + memset(&userdata->reserved, 0, sizeof(struct hv_vss_opt_msg) - + __offsetof(struct hv_vss_opt_msg, reserved)); + mtx_lock(&dev_sc->sc->pending_mutex); + SEARCH_REMOVE_REQ_LOCKED(reqp, &dev_sc->to_ack_queue, slink, tmp, req_id); + mtx_unlock(&dev_sc->sc->pending_mutex); + if (reqp == NULL) { + hv_vss_log_info("%s Timeout: fail to find daemon ack request\n", + __func__); + userdata->status = VSS_FAIL; + return; + } + KASSERT(opt == reqp->vss_req.opt_msg.opt, ("Mismatched VSS operation!")); + hv_vss_log_info("%s, get response %d from daemon for %s (%ju) \n", __func__, + status, vss_opt_name[opt], (uintmax_t)req_id); + switch (opt) { + case HV_VSS_CHECK: + case HV_VSS_FREEZE: + callout_drain(&reqp->callout); + hv_vss_notify_host_result(reqp, + status == VSS_SUCCESS ? HV_S_OK : HV_E_FAIL); + break; + case HV_VSS_THAW: + if (dev_sc->sc->app_register_done) { + if (status == VSS_SUCCESS) { + hv_vss_notify(&dev_sc->sc->app_sc, reqp); + } else { + /* handle error */ + callout_drain(&reqp->callout); + hv_vss_notify_host_result(reqp, HV_E_FAIL); + } + } else { + callout_drain(&reqp->callout); + hv_vss_notify_host_result(reqp, + status == VSS_SUCCESS ? HV_S_OK : HV_E_FAIL); + } + break; + } +} + +/** + * Actions for app who has acknowledged. + */ +static void +hv_vss_app_acked(struct hv_vss_dev_sc *dev_sc, struct hv_vss_opt_msg *userdata) +{ + struct hv_vss_req_internal *reqp, *tmp; + uint64_t req_id; + int opt; + uint8_t status; + + opt = userdata->opt; + req_id = userdata->msgid; + status = userdata->status; + /* make sure the reserved fields are all zeros. */ + memset(&userdata->reserved, 0, sizeof(struct hv_vss_opt_msg) - + __offsetof(struct hv_vss_opt_msg, reserved)); + mtx_lock(&dev_sc->sc->pending_mutex); + SEARCH_REMOVE_REQ_LOCKED(reqp, &dev_sc->to_ack_queue, slink, tmp, req_id); + mtx_unlock(&dev_sc->sc->pending_mutex); + if (reqp == NULL) { + hv_vss_log_info("%s Timeout: fail to find app ack request\n", + __func__); + userdata->status = VSS_FAIL; + return; + } + KASSERT(opt == reqp->vss_req.opt_msg.opt, ("Mismatched VSS operation!")); + hv_vss_log_info("%s, get response %d from app for %s (%ju) \n", + __func__, status, vss_opt_name[opt], (uintmax_t)req_id); + if (dev_sc->sc->register_done) { + switch (opt) { + case HV_VSS_CHECK: + case HV_VSS_FREEZE: + if (status == VSS_SUCCESS) { + hv_vss_notify(&dev_sc->sc->daemon_sc, reqp); + } else { + /* handle error */ + callout_drain(&reqp->callout); + hv_vss_notify_host_result(reqp, HV_E_FAIL); + } + break; + case HV_VSS_THAW: + callout_drain(&reqp->callout); + hv_vss_notify_host_result(reqp, + status == VSS_SUCCESS ? HV_S_OK : HV_E_FAIL); + break; + } + } else { + hv_vss_log_info("%s, Fatal: vss daemon was killed\n", __func__); + } +} + +static int +hv_vss_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td) +{ + struct proc *td_proc; + td_proc = td->td_proc; + + struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1; + hv_vss_log_info("%s: %s opens device \"%s\" successfully.\n", + __func__, td_proc->p_comm, FS_VSS_DEV_NAME); + + if (dev_sc->sc->register_done) + return (EBUSY); + + dev_sc->sc->register_done = true; + hv_vss_callback(vmbus_get_channel(dev_sc->sc->dev), dev_sc->sc); + + dev_sc->proc_task = curproc; + return (0); +} + +static int +hv_vss_dev_close(struct cdev *dev, int fflag __unused, int devtype __unused, + struct thread *td) +{ + struct proc *td_proc; + td_proc = td->td_proc; + + struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1; + + hv_vss_log_info("%s: %s closes device \"%s\"\n", + __func__, td_proc->p_comm, FS_VSS_DEV_NAME); + dev_sc->sc->register_done = false; + return (0); +} + +static int +hv_vss_dev_daemon_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, + struct thread *td) +{ + struct proc *td_proc; + struct hv_vss_dev_sc *sc; + + td_proc = td->td_proc; + sc = (struct hv_vss_dev_sc*)dev->si_drv1; + + hv_vss_log_info("%s: %s invoked vss ioctl\n", __func__, td_proc->p_comm); + + struct hv_vss_opt_msg* userdata = (struct hv_vss_opt_msg*)data; + switch(cmd) { + case IOCHVVSSREAD: + hv_vss_notified(sc, userdata); + break; + case IOCHVVSSWRITE: + hv_vss_daemon_acked(sc, userdata); + break; + } + return (0); +} + +/* + * hv_vss_daemon poll invokes this function to check if data is available + * for daemon to read. + */ +static int +hv_vss_dev_daemon_poll(struct cdev *dev, int events, struct thread *td) +{ + int revent = 0; + struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1; + + mtx_lock(&dev_sc->sc->pending_mutex); + /** + * if there is data ready, inform daemon's poll + */ + if (!STAILQ_EMPTY(&dev_sc->to_notify_queue)) + revent = POLLIN; + if (revent == 0) + selrecord(td, &dev_sc->hv_vss_selinfo); + hv_vss_log_info("%s return 0x%x\n", __func__, revent); + mtx_unlock(&dev_sc->sc->pending_mutex); + return (revent); +} + +static int +hv_appvss_dev_open(struct cdev *dev, int oflags, int devtype, struct thread *td) +{ + struct proc *td_proc; + td_proc = td->td_proc; + + struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1; + hv_vss_log_info("%s: %s opens device \"%s\" successfully.\n", + __func__, td_proc->p_comm, APP_VSS_DEV_NAME); + + if (dev_sc->sc->app_register_done) + return (EBUSY); + + dev_sc->sc->app_register_done = true; + dev_sc->proc_task = curproc; + return (0); +} + +static int +hv_appvss_dev_close(struct cdev *dev, int fflag __unused, int devtype __unused, + struct thread *td) +{ + struct proc *td_proc; + td_proc = td->td_proc; + + struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1; + + hv_vss_log_info("%s: %s closes device \"%s\".\n", + __func__, td_proc->p_comm, APP_VSS_DEV_NAME); + dev_sc->sc->app_register_done = false; + return (0); +} + +static int +hv_appvss_dev_ioctl(struct cdev *dev, u_long cmd, caddr_t data, int flag, + struct thread *td) +{ + struct proc *td_proc; + struct hv_vss_dev_sc *dev_sc; + + td_proc = td->td_proc; + dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1; + + hv_vss_log_info("%s: %s invoked vss ioctl\n", __func__, td_proc->p_comm); + + struct hv_vss_opt_msg* userdata = (struct hv_vss_opt_msg*)data; + switch(cmd) { + case IOCHVVSSREAD: + hv_vss_notified(dev_sc, userdata); + break; + case IOCHVVSSWRITE: + hv_vss_app_acked(dev_sc, userdata); + break; + } + return (0); +} + +/* + * hv_vss_daemon poll invokes this function to check if data is available + * for daemon to read. + */ +static int +hv_appvss_dev_poll(struct cdev *dev, int events, struct thread *td) +{ + int revent = 0; + struct hv_vss_dev_sc *dev_sc = (struct hv_vss_dev_sc*)dev->si_drv1; + + mtx_lock(&dev_sc->sc->pending_mutex); + /** + * if there is data ready, inform daemon's poll + */ + if (!STAILQ_EMPTY(&dev_sc->to_notify_queue)) + revent = POLLIN; + if (revent == 0) + selrecord(td, &dev_sc->hv_vss_selinfo); + hv_vss_log_info("%s return 0x%x\n", __func__, revent); + mtx_unlock(&dev_sc->sc->pending_mutex); + return (revent); +} + +static void +hv_vss_timeout(void *arg) +{ + hv_vss_req_internal *reqp = arg; + hv_vss_req_internal *request; + hv_vss_sc* sc = reqp->sc; + uint64_t req_id = reqp->vss_req.opt_msg.msgid; + /* This thread is locked */ + KASSERT(mtx_owned(&sc->pending_mutex), ("mutex lock is not owned!")); + request = hv_vss_drain_req_queue_locked(sc, req_id); + KASSERT(request != NULL, ("timeout but fail to find request")); + hv_vss_notify_host_result_locked(reqp, HV_E_FAIL); +} + +/* + * This routine is called whenever a message is received from the host + */ +static void +hv_vss_init_req(hv_vss_req_internal *reqp, + uint32_t recvlen, uint64_t requestid, uint8_t *vss_buf, hv_vss_sc *sc) +{ + struct timespec vm_ts; + struct hv_vss_msg* msg = (struct hv_vss_msg *)vss_buf; + + memset(reqp, 0, __offsetof(hv_vss_req_internal, callout)); + reqp->host_msg_len = recvlen; + reqp->host_msg_id = requestid; + reqp->rcv_buf = vss_buf; + reqp->sc = sc; + memcpy(&reqp->vss_req.msg, + (struct hv_vss_msg *)vss_buf, sizeof(struct hv_vss_msg)); + /* set the opt for users */ + switch (msg->hdr.vss_hdr.operation) { + case VSS_OP_FREEZE: + reqp->vss_req.opt_msg.opt = HV_VSS_FREEZE; + break; + case VSS_OP_THAW: + reqp->vss_req.opt_msg.opt = HV_VSS_THAW; + break; + case VSS_OP_HOT_BACKUP: + reqp->vss_req.opt_msg.opt = HV_VSS_CHECK; + break; + } + /* Use a timestamp as msg request ID */ + nanotime(&vm_ts); + reqp->vss_req.opt_msg.msgid = (vm_ts.tv_sec * NANOSEC) + vm_ts.tv_nsec; +} + +static hv_vss_req_internal* +hv_vss_get_new_req_locked(hv_vss_sc *sc) +{ + hv_vss_req_internal *reqp; + if (!STAILQ_EMPTY(&sc->daemon_sc.to_notify_queue) || + !STAILQ_EMPTY(&sc->daemon_sc.to_ack_queue) || + !STAILQ_EMPTY(&sc->app_sc.to_notify_queue) || + !STAILQ_EMPTY(&sc->app_sc.to_ack_queue)) { + /* + * There is request coming from host before + * finishing previous requests + */ + hv_vss_log_info("%s: Warning: there is new request " + "coming before finishing previous requests\n", __func__); + return (NULL); + } + if (LIST_EMPTY(&sc->req_free_list)) { + /* TODO Error: no buffer */ + hv_vss_log_info("Error: No buffer\n"); + return (NULL); + } + reqp = LIST_FIRST(&sc->req_free_list); + LIST_REMOVE(reqp, link); + return (reqp); +} + +static void +hv_vss_start_notify(hv_vss_req_internal *reqp, uint32_t opt) +{ + hv_vss_sc *sc = reqp->sc; + /* + * Freeze/Check notification sequence: kernel -> app -> daemon(fs) + * Thaw notification sequence: kernel -> daemon(fs) -> app + * + * We should wake up the daemon, in case it's doing poll(). + * The response should be received after 5s, otherwise, trigger timeout. + */ + switch (opt) { + case VSS_OP_FREEZE: + case VSS_OP_HOT_BACKUP: + if (sc->app_register_done) + hv_vss_notify(&sc->app_sc, reqp); + else + hv_vss_notify(&sc->daemon_sc, reqp); + callout_reset(&reqp->callout, TIMEOUT_LIMIT * hz, + hv_vss_timeout, reqp); + break; + case VSS_OP_THAW: + hv_vss_notify(&sc->daemon_sc, reqp); + callout_reset(&reqp->callout, TIMEOUT_LIMIT * hz, + hv_vss_timeout, reqp); + break; + } +} + +/* + * Function to read the vss request buffer from host + * and interact with daemon + */ +static void +hv_vss_process_request(void *context, int pending __unused) +{ + uint8_t *vss_buf; + struct vmbus_channel *channel; + uint32_t recvlen = 0; + uint64_t requestid; + struct vmbus_icmsg_hdr *icmsghdrp; + int ret = 0; + hv_vss_sc *sc; + hv_vss_req_internal *reqp; + + hv_vss_log_info("%s: entering hv_vss_process_request\n", __func__); + + sc = (hv_vss_sc*)context; + vss_buf = sc->util_sc.receive_buffer; + channel = vmbus_get_channel(sc->dev); + + recvlen = sc->util_sc.ic_buflen; + ret = vmbus_chan_recv(channel, vss_buf, &recvlen, &requestid); + KASSERT(ret != ENOBUFS, ("hvvss recvbuf is not large enough")); + /* XXX check recvlen to make sure that it contains enough data */ + + while ((ret == 0) && (recvlen > 0)) { + icmsghdrp = (struct vmbus_icmsg_hdr *)vss_buf; + + if (icmsghdrp->ic_type == HV_ICMSGTYPE_NEGOTIATE) { + ret = vmbus_ic_negomsg(&sc->util_sc, vss_buf, + &recvlen, VSS_FWVER, VSS_MSGVER); + hv_vss_respond_host(vss_buf, vmbus_get_channel(sc->dev), + recvlen, requestid, ret); + hv_vss_log_info("%s: version negotiated\n", __func__); + } else if (!hv_vss_is_daemon_killed_after_launch(sc)) { + struct hv_vss_msg* msg = (struct hv_vss_msg *)vss_buf; + switch(msg->hdr.vss_hdr.operation) { + case VSS_OP_FREEZE: + case VSS_OP_THAW: + case VSS_OP_HOT_BACKUP: + mtx_lock(&sc->pending_mutex); + reqp = hv_vss_get_new_req_locked(sc); + mtx_unlock(&sc->pending_mutex); + if (reqp == NULL) { + /* ignore this request from host */ + break; + } + hv_vss_init_req(reqp, recvlen, requestid, vss_buf, sc); + hv_vss_log_info("%s: receive %s (%ju) from host\n", + __func__, + vss_opt_name[reqp->vss_req.opt_msg.opt], + (uintmax_t)reqp->vss_req.opt_msg.msgid); + hv_vss_start_notify(reqp, msg->hdr.vss_hdr.operation); + break; + case VSS_OP_GET_DM_INFO: + hv_vss_log_info("%s: receive GET_DM_INFO from host\n", + __func__); + msg->body.dm_info.flags = 0; + hv_vss_respond_host(vss_buf, vmbus_get_channel(sc->dev), + recvlen, requestid, HV_S_OK); + break; + default: + device_printf(sc->dev, "Unknown opt from host: %d\n", + msg->hdr.vss_hdr.operation); + break; + } + } else { + /* daemon was killed for some reason after it was launched */ + struct hv_vss_msg* msg = (struct hv_vss_msg *)vss_buf; + switch(msg->hdr.vss_hdr.operation) { + case VSS_OP_FREEZE: + hv_vss_log_info("%s: response fail for FREEZE\n", + __func__); + break; + case VSS_OP_THAW: + hv_vss_log_info("%s: response fail for THAW\n", + __func__); + break; + case VSS_OP_HOT_BACKUP: + hv_vss_log_info("%s: response fail for HOT_BACKUP\n", + __func__); + msg->body.vss_cf.flags = VSS_HBU_NO_AUTO_RECOVERY; + break; + case VSS_OP_GET_DM_INFO: + hv_vss_log_info("%s: response fail for GET_DM_INFO\n", + __func__); + msg->body.dm_info.flags = 0; + break; + default: + device_printf(sc->dev, "Unknown opt from host: %d\n", + msg->hdr.vss_hdr.operation); + break; + } + hv_vss_respond_host(vss_buf, vmbus_get_channel(sc->dev), + recvlen, requestid, HV_E_FAIL); + } + /* + * Try reading next buffer + */ + recvlen = sc->util_sc.ic_buflen; + ret = vmbus_chan_recv(channel, vss_buf, &recvlen, &requestid); + KASSERT(ret != ENOBUFS, ("hvvss recvbuf is not large enough")); + /* XXX check recvlen to make sure that it contains enough data */ + + hv_vss_log_info("%s: read: context %p, ret =%d, recvlen=%d\n", + __func__, context, ret, recvlen); + } +} + +static int +hv_vss_probe(device_t dev) +{ + return (vmbus_ic_probe(dev, vmbus_vss_descs)); +} + +static int +hv_vss_init_send_receive_queue(device_t dev) +{ + hv_vss_sc *sc = (hv_vss_sc*)device_get_softc(dev); + int i; + const int max_list = 4; /* It is big enough for the list */ + struct hv_vss_req_internal* reqp; + + LIST_INIT(&sc->req_free_list); + STAILQ_INIT(&sc->daemon_sc.to_notify_queue); + STAILQ_INIT(&sc->daemon_sc.to_ack_queue); + STAILQ_INIT(&sc->app_sc.to_notify_queue); + STAILQ_INIT(&sc->app_sc.to_ack_queue); + + for (i = 0; i < max_list; i++) { + reqp = malloc(sizeof(struct hv_vss_req_internal), + M_DEVBUF, M_WAITOK|M_ZERO); + LIST_INSERT_HEAD(&sc->req_free_list, reqp, link); + callout_init_mtx(&reqp->callout, &sc->pending_mutex, 0); + } + return (0); +} + +static int +hv_vss_destroy_send_receive_queue(device_t dev) +{ + hv_vss_sc *sc = (hv_vss_sc*)device_get_softc(dev); + hv_vss_req_internal* reqp; + + while (!LIST_EMPTY(&sc->req_free_list)) { + reqp = LIST_FIRST(&sc->req_free_list); + LIST_REMOVE(reqp, link); + free(reqp, M_DEVBUF); + } + + while (!STAILQ_EMPTY(&sc->daemon_sc.to_notify_queue)) { + reqp = STAILQ_FIRST(&sc->daemon_sc.to_notify_queue); + STAILQ_REMOVE_HEAD(&sc->daemon_sc.to_notify_queue, slink); + free(reqp, M_DEVBUF); + } + + while (!STAILQ_EMPTY(&sc->daemon_sc.to_ack_queue)) { + reqp = STAILQ_FIRST(&sc->daemon_sc.to_ack_queue); + STAILQ_REMOVE_HEAD(&sc->daemon_sc.to_ack_queue, slink); + free(reqp, M_DEVBUF); + } + + while (!STAILQ_EMPTY(&sc->app_sc.to_notify_queue)) { + reqp = STAILQ_FIRST(&sc->app_sc.to_notify_queue); + STAILQ_REMOVE_HEAD(&sc->app_sc.to_notify_queue, slink); + free(reqp, M_DEVBUF); + } + + while (!STAILQ_EMPTY(&sc->app_sc.to_ack_queue)) { + reqp = STAILQ_FIRST(&sc->app_sc.to_ack_queue); + STAILQ_REMOVE_HEAD(&sc->app_sc.to_ack_queue, slink); + free(reqp, M_DEVBUF); + } + return (0); +} + +static int +hv_vss_attach(device_t dev) +{ + int error; + struct sysctl_oid_list *child; + struct sysctl_ctx_list *ctx; + + hv_vss_sc *sc = (hv_vss_sc*)device_get_softc(dev); + + sc->dev = dev; + mtx_init(&sc->pending_mutex, "hv_vss pending mutex", NULL, MTX_DEF); + + ctx = device_get_sysctl_ctx(dev); + child = SYSCTL_CHILDREN(device_get_sysctl_tree(dev)); + + SYSCTL_ADD_INT(ctx, child, OID_AUTO, "hv_vss_log", + CTLFLAG_RWTUN, &hv_vss_log, 0, "Hyperv VSS service log level"); + + TASK_INIT(&sc->task, 0, hv_vss_process_request, sc); + hv_vss_init_send_receive_queue(dev); + /* create character device for file system freeze/thaw */ + error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, + &sc->hv_vss_dev, + &hv_vss_cdevsw, + 0, + UID_ROOT, + GID_WHEEL, + 0640, + FS_VSS_DEV_NAME); + + if (error != 0) { + hv_vss_log_info("Fail to create '%s': %d\n", FS_VSS_DEV_NAME, error); + return (error); + } + sc->hv_vss_dev->si_drv1 = &sc->daemon_sc; + sc->daemon_sc.sc = sc; + /* create character device for application freeze/thaw */ + error = make_dev_p(MAKEDEV_CHECKNAME | MAKEDEV_WAITOK, + &sc->hv_appvss_dev, + &hv_appvss_cdevsw, + 0, + UID_ROOT, + GID_WHEEL, + 0640, + APP_VSS_DEV_NAME); + + if (error != 0) { + hv_vss_log_info("Fail to create '%s': %d\n", APP_VSS_DEV_NAME, error); + return (error); + } + sc->hv_appvss_dev->si_drv1 = &sc->app_sc; + sc->app_sc.sc = sc; + + return hv_util_attach(dev, hv_vss_callback); +} + +static int +hv_vss_detach(device_t dev) +{ + hv_vss_sc *sc = (hv_vss_sc*)device_get_softc(dev); + mtx_destroy(&sc->pending_mutex); + if (sc->daemon_sc.proc_task != NULL) { + PROC_LOCK(sc->daemon_sc.proc_task); + kern_psignal(sc->daemon_sc.proc_task, SIGKILL); + PROC_UNLOCK(sc->daemon_sc.proc_task); + } + if (sc->app_sc.proc_task != NULL) { + PROC_LOCK(sc->app_sc.proc_task); + kern_psignal(sc->app_sc.proc_task, SIGKILL); + PROC_UNLOCK(sc->app_sc.proc_task); + } + hv_vss_destroy_send_receive_queue(dev); + destroy_dev(sc->hv_vss_dev); + destroy_dev(sc->hv_appvss_dev); + return hv_util_detach(dev); +} + +static device_method_t vss_methods[] = { + /* Device interface */ + DEVMETHOD(device_probe, hv_vss_probe), + DEVMETHOD(device_attach, hv_vss_attach), + DEVMETHOD(device_detach, hv_vss_detach), + { 0, 0 } +}; + +static driver_t vss_driver = { "hvvss", vss_methods, sizeof(hv_vss_sc)}; + +static devclass_t vss_devclass; + +DRIVER_MODULE(hv_vss, vmbus, vss_driver, vss_devclass, NULL, NULL); +MODULE_VERSION(hv_vss, 1); +MODULE_DEPEND(hv_vss, vmbus, 1, 1, 1); diff --git a/sys/dev/hyperv/utilities/hv_snapshot.h b/sys/dev/hyperv/utilities/hv_snapshot.h new file mode 100644 index 0000000..e3c9e0c --- /dev/null +++ b/sys/dev/hyperv/utilities/hv_snapshot.h @@ -0,0 +1,56 @@ +/*- + * Copyright (c) 2016 Microsoft Corp. + * All rights reserved. + * + * Redistribution and use in source and binary forms, with or without + * modification, are permitted provided that the following conditions + * are met: + * 1. Redistributions of source code must retain the above copyright + * notice unmodified, this list of conditions, and the following + * disclaimer. + * 2. Redistributions in binary form must reproduce the above copyright + * notice, this list of conditions and the following disclaimer in the + * documentation and/or other materials provided with the distribution. + * + * THIS SOFTWARE IS PROVIDED BY THE AUTHOR ``AS IS'' AND ANY EXPRESS OR + * IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES + * OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE DISCLAIMED. + * IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR ANY DIRECT, INDIRECT, + * INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT + * NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, + * DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY + * THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT + * (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF + * THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + * + * $FreeBSD$ + */ + +#ifndef _VSS_H +#define _VSS_H +#include <sys/ioccom.h> +#define FS_VSS_DEV_NAME "hv_fsvss_dev" +#define APP_VSS_DEV_NAME "hv_appvss_dev" + +#define VSS_DEV(VSS) "/dev/"VSS + +#define VSS_SUCCESS 0x00000000 +#define VSS_FAIL 0x00000001 + +enum hv_vss_op_t { + HV_VSS_NONE = 0, + HV_VSS_CHECK, + HV_VSS_FREEZE, + HV_VSS_THAW, + HV_VSS_COUNT +}; + +struct hv_vss_opt_msg { + uint32_t opt; /* operation */ + uint32_t status; /* 0 for success, 1 for error */ + uint64_t msgid; /* an ID used to identify the transaction */ + uint8_t reserved[48]; /* reserved values are all zeroes */ +}; +#define IOCHVVSSREAD _IOR('v', 2, struct hv_vss_opt_msg) +#define IOCHVVSSWRITE _IOW('v', 3, struct hv_vss_opt_msg) +#endif |