summaryrefslogtreecommitdiffstats
path: root/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c')
-rw-r--r--sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c796
1 files changed, 796 insertions, 0 deletions
diff --git a/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c
new file mode 100644
index 0000000..9aa8b7f
--- /dev/null
+++ b/sys/cddl/contrib/opensolaris/uts/common/fs/zfs/zvol.c
@@ -0,0 +1,796 @@
+/*
+ * CDDL HEADER START
+ *
+ * The contents of this file are subject to the terms of the
+ * Common Development and Distribution License (the "License").
+ * You may not use this file except in compliance with the License.
+ *
+ * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
+ * or http://www.opensolaris.org/os/licensing.
+ * See the License for the specific language governing permissions
+ * and limitations under the License.
+ *
+ * When distributing Covered Code, include this CDDL HEADER in each
+ * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
+ * If applicable, add the following below this CDDL HEADER, with the
+ * fields enclosed by brackets "[]" replaced with your own identifying
+ * information: Portions Copyright [yyyy] [name of copyright owner]
+ *
+ * CDDL HEADER END
+ */
+/*
+ * Copyright (c) 2006 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+ * All rights reserved.
+ */
+/*
+ * Copyright 2007 Sun Microsystems, Inc. All rights reserved.
+ * Use is subject to license terms.
+ */
+
+#pragma ident "%Z%%M% %I% %E% SMI"
+
+/*
+ * ZFS volume emulation driver.
+ *
+ * Makes a DMU object look like a volume of arbitrary size, up to 2^64 bytes.
+ * Volumes are accessed through the symbolic links named:
+ *
+ * /dev/zvol/dsk/<pool_name>/<dataset_name>
+ * /dev/zvol/rdsk/<pool_name>/<dataset_name>
+ *
+ * These links are created by the ZFS-specific devfsadm link generator.
+ * Volumes are persistent through reboot. No user command needs to be
+ * run before opening and using a device.
+ */
+
+#include <sys/param.h>
+#include <sys/kernel.h>
+#include <sys/errno.h>
+#include <sys/uio.h>
+#include <sys/bio.h>
+#include <sys/kmem.h>
+#include <sys/conf.h>
+#include <sys/cmn_err.h>
+#include <sys/stat.h>
+#include <sys/zap.h>
+#include <sys/spa.h>
+#include <sys/zio.h>
+#include <sys/dsl_prop.h>
+#include <sys/byteorder.h>
+#include <sys/dirent.h>
+#include <sys/fs/zfs.h>
+#include <sys/zfs_ioctl.h>
+#include <sys/zil.h>
+#include <sys/refcount.h>
+#include <sys/zfs_znode.h>
+#include <sys/zfs_rlock.h>
+#include <geom/geom.h>
+
+#include "zfs_namecheck.h"
+
+struct g_class zfs_zvol_class = {
+ .name = "ZFS::ZVOL",
+ .version = G_VERSION,
+};
+
+DECLARE_GEOM_CLASS(zfs_zvol_class, zfs_zvol);
+
+#define ZVOL_OBJ 1ULL
+#define ZVOL_ZAP_OBJ 2ULL
+
+static uint32_t zvol_minors;
+
+/*
+ * The in-core state of each volume.
+ */
+typedef struct zvol_state {
+ char zv_name[MAXPATHLEN]; /* pool/dd name */
+ uint64_t zv_volsize; /* amount of space we advertise */
+ uint64_t zv_volblocksize; /* volume block size */
+ struct g_provider *zv_provider; /* GEOM provider */
+ uint8_t zv_min_bs; /* minimum addressable block shift */
+ uint8_t zv_readonly; /* hard readonly; like write-protect */
+ objset_t *zv_objset; /* objset handle */
+ uint32_t zv_mode; /* DS_MODE_* flags at open time */
+ uint32_t zv_total_opens; /* total open count */
+ zilog_t *zv_zilog; /* ZIL handle */
+ uint64_t zv_txg_assign; /* txg to assign during ZIL replay */
+ znode_t zv_znode; /* for range locking */
+ int zv_state;
+ struct bio_queue_head zv_queue;
+ struct mtx zv_queue_mtx; /* zv_queue mutex */
+} zvol_state_t;
+
+/*
+ * zvol maximum transfer in one DMU tx.
+ */
+int zvol_maxphys = DMU_MAX_ACCESS/2;
+
+static int zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio);
+
+int
+zvol_check_volsize(uint64_t volsize, uint64_t blocksize)
+{
+ if (volsize == 0)
+ return (EINVAL);
+
+ if (volsize % blocksize != 0)
+ return (EINVAL);
+
+#ifdef _ILP32
+ if (volsize - 1 > SPEC_MAXOFFSET_T)
+ return (EOVERFLOW);
+#endif
+ return (0);
+}
+
+int
+zvol_check_volblocksize(uint64_t volblocksize)
+{
+ if (volblocksize < SPA_MINBLOCKSIZE ||
+ volblocksize > SPA_MAXBLOCKSIZE ||
+ !ISP2(volblocksize))
+ return (EDOM);
+
+ return (0);
+}
+
+static void
+zvol_readonly_changed_cb(void *arg, uint64_t newval)
+{
+ zvol_state_t *zv = arg;
+
+ zv->zv_readonly = (uint8_t)newval;
+}
+
+int
+zvol_get_stats(objset_t *os, nvlist_t *nv)
+{
+ int error;
+ dmu_object_info_t doi;
+ uint64_t val;
+
+
+ error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &val);
+ if (error)
+ return (error);
+
+ dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLSIZE, val);
+
+ error = dmu_object_info(os, ZVOL_OBJ, &doi);
+
+ if (error == 0) {
+ dsl_prop_nvlist_add_uint64(nv, ZFS_PROP_VOLBLOCKSIZE,
+ doi.doi_data_block_size);
+ }
+
+ return (error);
+}
+
+static zvol_state_t *
+zvol_minor_lookup(const char *name)
+{
+ struct g_provider *pp;
+ struct g_geom *gp;
+
+ g_topology_assert();
+
+ LIST_FOREACH(gp, &zfs_zvol_class.geom, geom) {
+ LIST_FOREACH(pp, &gp->provider, provider) {
+ if (strcmp(pp->name + sizeof(ZVOL_DEV_DIR), name) == 0)
+ return (pp->private);
+ }
+ }
+
+ return (NULL);
+}
+
+static int
+zvol_access(struct g_provider *pp, int acr, int acw, int ace)
+{
+ zvol_state_t *zv;
+
+ g_topology_assert();
+
+ zv = pp->private;
+ if (zv == NULL) {
+ if (acr <= 0 && acw <= 0 && ace <= 0)
+ return (0);
+ return (pp->error);
+ }
+
+ ASSERT(zv->zv_objset != NULL);
+
+ if (acw > 0 && (zv->zv_readonly || (zv->zv_mode & DS_MODE_READONLY)))
+ return (EROFS);
+
+ zv->zv_total_opens += acr + acw + ace;
+
+ return (0);
+}
+
+/*
+ * zvol_log_write() handles synchronous writes using TX_WRITE ZIL transactions.
+ *
+ * We store data in the log buffers if it's small enough.
+ * Otherwise we will later flush the data out via dmu_sync().
+ */
+ssize_t zvol_immediate_write_sz = 32768;
+
+static void
+zvol_log_write(zvol_state_t *zv, dmu_tx_t *tx, offset_t off, ssize_t len)
+{
+ uint32_t blocksize = zv->zv_volblocksize;
+ lr_write_t *lr;
+
+ while (len) {
+ ssize_t nbytes = MIN(len, blocksize - P2PHASE(off, blocksize));
+ itx_t *itx = zil_itx_create(TX_WRITE, sizeof (*lr));
+
+ itx->itx_wr_state =
+ len > zvol_immediate_write_sz ? WR_INDIRECT : WR_NEED_COPY;
+ itx->itx_private = zv;
+ lr = (lr_write_t *)&itx->itx_lr;
+ lr->lr_foid = ZVOL_OBJ;
+ lr->lr_offset = off;
+ lr->lr_length = nbytes;
+ lr->lr_blkoff = off - P2ALIGN_TYPED(off, blocksize, uint64_t);
+ BP_ZERO(&lr->lr_blkptr);
+
+ (void) zil_itx_assign(zv->zv_zilog, itx, tx);
+ len -= nbytes;
+ off += nbytes;
+ }
+}
+
+static void
+zvol_start(struct bio *bp)
+{
+ zvol_state_t *zv;
+
+ switch (bp->bio_cmd) {
+ case BIO_READ:
+ case BIO_WRITE:
+ case BIO_FLUSH:
+ zv = bp->bio_to->private;
+ ASSERT(zv != NULL);
+ mtx_lock(&zv->zv_queue_mtx);
+ bioq_insert_tail(&zv->zv_queue, bp);
+ wakeup_one(&zv->zv_queue);
+ mtx_unlock(&zv->zv_queue_mtx);
+ break;
+ case BIO_DELETE:
+ case BIO_GETATTR:
+ default:
+ g_io_deliver(bp, EOPNOTSUPP);
+ break;
+ }
+}
+
+static void
+zvol_serve_one(zvol_state_t *zv, struct bio *bp)
+{
+ uint64_t off, volsize;
+ size_t size, resid;
+ char *addr;
+ objset_t *os;
+ rl_t *rl;
+ int error = 0;
+ boolean_t reading;
+
+ off = bp->bio_offset;
+ volsize = zv->zv_volsize;
+
+ os = zv->zv_objset;
+ ASSERT(os != NULL);
+
+ addr = bp->bio_data;
+ resid = bp->bio_length;
+
+ error = 0;
+
+ /*
+ * There must be no buffer changes when doing a dmu_sync() because
+ * we can't change the data whilst calculating the checksum.
+ * A better approach than a per zvol rwlock would be to lock ranges.
+ */
+ reading = (bp->bio_cmd == BIO_READ);
+ rl = zfs_range_lock(&zv->zv_znode, off, resid,
+ reading ? RL_READER : RL_WRITER);
+
+ while (resid != 0 && off < volsize) {
+
+ size = MIN(resid, zvol_maxphys); /* zvol_maxphys per tx */
+
+ if (size > volsize - off) /* don't write past the end */
+ size = volsize - off;
+
+ if (reading) {
+ error = dmu_read(os, ZVOL_OBJ, off, size, addr);
+ } else {
+ dmu_tx_t *tx = dmu_tx_create(os);
+ dmu_tx_hold_write(tx, ZVOL_OBJ, off, size);
+ error = dmu_tx_assign(tx, TXG_WAIT);
+ if (error) {
+ dmu_tx_abort(tx);
+ } else {
+ dmu_write(os, ZVOL_OBJ, off, size, addr, tx);
+ zvol_log_write(zv, tx, off, size);
+ dmu_tx_commit(tx);
+ }
+ }
+ if (error)
+ break;
+ off += size;
+ addr += size;
+ resid -= size;
+ }
+ zfs_range_unlock(rl);
+
+ bp->bio_completed = bp->bio_length - resid;
+ if (bp->bio_completed < bp->bio_length)
+ bp->bio_error = (off > volsize ? EINVAL : error);
+
+ /*
+ * XXX: We are devilering here?
+ * Looks like I don't understand something here, but I was sure it was
+ * an async request.
+ */
+ g_io_deliver(bp, bp->bio_error);
+}
+
+static void
+zvol_worker(void *arg)
+{
+ zvol_state_t *zv;
+ struct bio *bp;
+
+ zv = arg;
+ for (;;) {
+ mtx_lock(&zv->zv_queue_mtx);
+ bp = bioq_takefirst(&zv->zv_queue);
+ if (bp == NULL) {
+ if (zv->zv_state == 1) {
+ zv->zv_state = 2;
+ wakeup(&zv->zv_state);
+ mtx_unlock(&zv->zv_queue_mtx);
+ kthread_exit(0);
+ }
+ msleep(&zv->zv_queue, &zv->zv_queue_mtx, PRIBIO | PDROP,
+ "zvol:io", 0);
+ continue;
+ }
+ mtx_unlock(&zv->zv_queue_mtx);
+ if (bp->bio_cmd == BIO_FLUSH) {
+ zil_commit(zv->zv_zilog, UINT64_MAX, ZVOL_OBJ);
+ g_io_deliver(bp, 0);
+ } else {
+ zvol_serve_one(zv, bp);
+ }
+ }
+}
+
+void
+zvol_create_cb(objset_t *os, void *arg, dmu_tx_t *tx)
+{
+ zfs_create_data_t *zc = arg;
+ int error;
+ uint64_t volblocksize, volsize;
+
+ VERIFY(nvlist_lookup_uint64(zc->zc_props,
+ zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) == 0);
+ if (nvlist_lookup_uint64(zc->zc_props,
+ zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE), &volblocksize) != 0)
+ volblocksize = zfs_prop_default_numeric(ZFS_PROP_VOLBLOCKSIZE);
+
+ /*
+ * These properites must be removed from the list so the generic
+ * property setting step won't apply to them.
+ */
+ VERIFY(nvlist_remove_all(zc->zc_props,
+ zfs_prop_to_name(ZFS_PROP_VOLSIZE)) == 0);
+ (void) nvlist_remove_all(zc->zc_props,
+ zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE));
+
+ error = dmu_object_claim(os, ZVOL_OBJ, DMU_OT_ZVOL, volblocksize,
+ DMU_OT_NONE, 0, tx);
+ ASSERT(error == 0);
+
+ error = zap_create_claim(os, ZVOL_ZAP_OBJ, DMU_OT_ZVOL_PROP,
+ DMU_OT_NONE, 0, tx);
+ ASSERT(error == 0);
+
+ error = zap_update(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize, tx);
+ ASSERT(error == 0);
+}
+
+/*
+ * Replay a TX_WRITE ZIL transaction that didn't get committed
+ * after a system failure
+ */
+static int
+zvol_replay_write(zvol_state_t *zv, lr_write_t *lr, boolean_t byteswap)
+{
+ objset_t *os = zv->zv_objset;
+ char *data = (char *)(lr + 1); /* data follows lr_write_t */
+ uint64_t off = lr->lr_offset;
+ uint64_t len = lr->lr_length;
+ dmu_tx_t *tx;
+ int error;
+
+ if (byteswap)
+ byteswap_uint64_array(lr, sizeof (*lr));
+
+ tx = dmu_tx_create(os);
+ dmu_tx_hold_write(tx, ZVOL_OBJ, off, len);
+ error = dmu_tx_assign(tx, zv->zv_txg_assign);
+ if (error) {
+ dmu_tx_abort(tx);
+ } else {
+ dmu_write(os, ZVOL_OBJ, off, len, data, tx);
+ dmu_tx_commit(tx);
+ }
+
+ return (error);
+}
+
+/* ARGSUSED */
+static int
+zvol_replay_err(zvol_state_t *zv, lr_t *lr, boolean_t byteswap)
+{
+ return (ENOTSUP);
+}
+
+/*
+ * Callback vectors for replaying records.
+ * Only TX_WRITE is needed for zvol.
+ */
+zil_replay_func_t *zvol_replay_vector[TX_MAX_TYPE] = {
+ zvol_replay_err, /* 0 no such transaction type */
+ zvol_replay_err, /* TX_CREATE */
+ zvol_replay_err, /* TX_MKDIR */
+ zvol_replay_err, /* TX_MKXATTR */
+ zvol_replay_err, /* TX_SYMLINK */
+ zvol_replay_err, /* TX_REMOVE */
+ zvol_replay_err, /* TX_RMDIR */
+ zvol_replay_err, /* TX_LINK */
+ zvol_replay_err, /* TX_RENAME */
+ zvol_replay_write, /* TX_WRITE */
+ zvol_replay_err, /* TX_TRUNCATE */
+ zvol_replay_err, /* TX_SETATTR */
+ zvol_replay_err, /* TX_ACL */
+};
+
+/*
+ * Create a minor node for the specified volume.
+ */
+int
+zvol_create_minor(const char *name, dev_t dev)
+{
+ struct g_provider *pp;
+ struct g_geom *gp;
+ zvol_state_t *zv;
+ objset_t *os;
+ dmu_object_info_t doi;
+ uint64_t volsize;
+ int ds_mode = DS_MODE_PRIMARY;
+ int error;
+
+ DROP_GIANT();
+ g_topology_lock();
+
+ if ((zv = zvol_minor_lookup(name)) != NULL) {
+ error = EEXIST;
+ goto end;
+ }
+
+ if (strchr(name, '@') != 0)
+ ds_mode |= DS_MODE_READONLY;
+
+ error = dmu_objset_open(name, DMU_OST_ZVOL, ds_mode, &os);
+ if (error)
+ goto end;
+
+ g_topology_unlock();
+ PICKUP_GIANT();
+ error = zap_lookup(os, ZVOL_ZAP_OBJ, "size", 8, 1, &volsize);
+ DROP_GIANT();
+ g_topology_lock();
+ if (error) {
+ dmu_objset_close(os);
+ goto end;
+ }
+
+ gp = g_new_geomf(&zfs_zvol_class, "zfs::zvol::%s", name);
+ gp->start = zvol_start;
+ gp->access = zvol_access;
+ pp = g_new_providerf(gp, "%s/%s", ZVOL_DEV_DIR, name);
+ pp->mediasize = volsize;
+ pp->sectorsize = DEV_BSIZE;
+
+ zv = kmem_zalloc(sizeof(*zv), KM_SLEEP);
+ (void) strcpy(zv->zv_name, name);
+ zv->zv_min_bs = DEV_BSHIFT;
+ zv->zv_provider = pp;
+ zv->zv_volsize = pp->mediasize;
+ zv->zv_objset = os;
+ zv->zv_mode = ds_mode;
+ zv->zv_zilog = zil_open(os, zvol_get_data);
+ mutex_init(&zv->zv_znode.z_range_lock, NULL, MUTEX_DEFAULT, NULL);
+ avl_create(&zv->zv_znode.z_range_avl, zfs_range_compare,
+ sizeof (rl_t), offsetof(rl_t, r_node));
+
+
+ /* get and cache the blocksize */
+ error = dmu_object_info(os, ZVOL_OBJ, &doi);
+ ASSERT(error == 0);
+ zv->zv_volblocksize = doi.doi_data_block_size;
+
+ zil_replay(os, zv, &zv->zv_txg_assign, zvol_replay_vector);
+
+ /* XXX this should handle the possible i/o error */
+ VERIFY(dsl_prop_register(dmu_objset_ds(zv->zv_objset),
+ "readonly", zvol_readonly_changed_cb, zv) == 0);
+
+ pp->private = zv;
+ g_error_provider(pp, 0);
+
+ bioq_init(&zv->zv_queue);
+ mtx_init(&zv->zv_queue_mtx, "zvol", NULL, MTX_DEF);
+ zv->zv_state = 0;
+ kthread_create(zvol_worker, zv, NULL, 0, 0, "zvol:worker %s", pp->name);
+
+ zvol_minors++;
+end:
+ g_topology_unlock();
+ PICKUP_GIANT();
+
+ return (error);
+}
+
+/*
+ * Remove minor node for the specified volume.
+ */
+int
+zvol_remove_minor(const char *name)
+{
+ struct g_provider *pp;
+ zvol_state_t *zv;
+ int error = 0;
+
+ DROP_GIANT();
+ g_topology_lock();
+
+ if ((zv = zvol_minor_lookup(name)) == NULL) {
+ error = ENXIO;
+ goto end;
+ }
+
+ if (zv->zv_total_opens != 0) {
+ error = EBUSY;
+ goto end;
+ }
+
+ VERIFY(dsl_prop_unregister(dmu_objset_ds(zv->zv_objset),
+ "readonly", zvol_readonly_changed_cb, zv) == 0);
+
+ mtx_lock(&zv->zv_queue_mtx);
+ zv->zv_state = 1;
+ wakeup_one(&zv->zv_queue);
+ while (zv->zv_state != 2)
+ msleep(&zv->zv_state, &zv->zv_queue_mtx, 0, "zvol:w", 0);
+ mtx_unlock(&zv->zv_queue_mtx);
+ mtx_destroy(&zv->zv_queue_mtx);
+
+ pp = zv->zv_provider;
+ pp->private = NULL;
+ g_wither_geom(pp->geom, ENXIO);
+
+ zil_close(zv->zv_zilog);
+ zv->zv_zilog = NULL;
+ dmu_objset_close(zv->zv_objset);
+ zv->zv_objset = NULL;
+ avl_destroy(&zv->zv_znode.z_range_avl);
+ mutex_destroy(&zv->zv_znode.z_range_lock);
+
+ kmem_free(zv, sizeof(*zv));
+
+ zvol_minors--;
+end:
+ g_topology_unlock();
+ PICKUP_GIANT();
+
+ return (error);
+}
+
+int
+zvol_set_volsize(const char *name, dev_t dev, uint64_t volsize)
+{
+ zvol_state_t *zv;
+ dmu_tx_t *tx;
+ int error;
+ dmu_object_info_t doi;
+
+ DROP_GIANT();
+ g_topology_lock();
+
+ if ((zv = zvol_minor_lookup(name)) == NULL) {
+ error = ENXIO;
+ goto end;
+ }
+
+ if ((error = dmu_object_info(zv->zv_objset, ZVOL_OBJ, &doi)) != 0 ||
+ (error = zvol_check_volsize(volsize,
+ doi.doi_data_block_size)) != 0) {
+ goto end;
+ }
+
+ if (zv->zv_readonly || (zv->zv_mode & DS_MODE_READONLY)) {
+ error = EROFS;
+ goto end;
+ }
+
+ tx = dmu_tx_create(zv->zv_objset);
+ dmu_tx_hold_zap(tx, ZVOL_ZAP_OBJ, TRUE, NULL);
+ dmu_tx_hold_free(tx, ZVOL_OBJ, volsize, DMU_OBJECT_END);
+ error = dmu_tx_assign(tx, TXG_WAIT);
+ if (error) {
+ dmu_tx_abort(tx);
+ goto end;
+ }
+
+ error = zap_update(zv->zv_objset, ZVOL_ZAP_OBJ, "size", 8, 1,
+ &volsize, tx);
+ if (error == 0) {
+ error = dmu_free_range(zv->zv_objset, ZVOL_OBJ, volsize,
+ DMU_OBJECT_END, tx);
+ }
+
+ dmu_tx_commit(tx);
+
+ if (error == 0) {
+ zv->zv_volsize = volsize;
+ zv->zv_provider->mediasize = volsize; /* XXX: Not supported. */
+ }
+end:
+ g_topology_unlock();
+ PICKUP_GIANT();
+
+ return (error);
+}
+
+int
+zvol_set_volblocksize(const char *name, uint64_t volblocksize)
+{
+ zvol_state_t *zv;
+ dmu_tx_t *tx;
+ int error;
+
+ DROP_GIANT();
+ g_topology_lock();
+
+ if ((zv = zvol_minor_lookup(name)) == NULL) {
+ error = ENXIO;
+ goto end;
+ }
+
+ if (zv->zv_readonly || (zv->zv_mode & DS_MODE_READONLY)) {
+ error = EROFS;
+ goto end;
+ }
+
+ tx = dmu_tx_create(zv->zv_objset);
+ dmu_tx_hold_bonus(tx, ZVOL_OBJ);
+ error = dmu_tx_assign(tx, TXG_WAIT);
+ if (error) {
+ dmu_tx_abort(tx);
+ } else {
+ error = dmu_object_set_blocksize(zv->zv_objset, ZVOL_OBJ,
+ volblocksize, 0, tx);
+ if (error == ENOTSUP)
+ error = EBUSY;
+ dmu_tx_commit(tx);
+ /* XXX: Not supported. */
+#if 0
+ if (error == 0)
+ zv->zv_provider->sectorsize = zc->zc_volblocksize;
+#endif
+ }
+end:
+ g_topology_unlock();
+ PICKUP_GIANT();
+
+ return (error);
+}
+
+void
+zvol_get_done(dmu_buf_t *db, void *vzgd)
+{
+ zgd_t *zgd = (zgd_t *)vzgd;
+ rl_t *rl = zgd->zgd_rl;
+
+ dmu_buf_rele(db, vzgd);
+ zfs_range_unlock(rl);
+ zil_add_vdev(zgd->zgd_zilog, DVA_GET_VDEV(BP_IDENTITY(zgd->zgd_bp)));
+ kmem_free(zgd, sizeof (zgd_t));
+}
+
+/*
+ * Get data to generate a TX_WRITE intent log record.
+ */
+static int
+zvol_get_data(void *arg, lr_write_t *lr, char *buf, zio_t *zio)
+{
+ zvol_state_t *zv = arg;
+ objset_t *os = zv->zv_objset;
+ dmu_buf_t *db;
+ rl_t *rl;
+ zgd_t *zgd;
+ uint64_t boff; /* block starting offset */
+ int dlen = lr->lr_length; /* length of user data */
+ int error;
+
+ ASSERT(zio);
+ ASSERT(dlen != 0);
+
+ /*
+ * Write records come in two flavors: immediate and indirect.
+ * For small writes it's cheaper to store the data with the
+ * log record (immediate); for large writes it's cheaper to
+ * sync the data and get a pointer to it (indirect) so that
+ * we don't have to write the data twice.
+ */
+ if (buf != NULL) /* immediate write */
+ return (dmu_read(os, ZVOL_OBJ, lr->lr_offset, dlen, buf));
+
+ zgd = (zgd_t *)kmem_alloc(sizeof (zgd_t), KM_SLEEP);
+ zgd->zgd_zilog = zv->zv_zilog;
+ zgd->zgd_bp = &lr->lr_blkptr;
+
+ /*
+ * Lock the range of the block to ensure that when the data is
+ * written out and it's checksum is being calculated that no other
+ * thread can change the block.
+ */
+ boff = P2ALIGN_TYPED(lr->lr_offset, zv->zv_volblocksize, uint64_t);
+ rl = zfs_range_lock(&zv->zv_znode, boff, zv->zv_volblocksize,
+ RL_READER);
+ zgd->zgd_rl = rl;
+
+ VERIFY(0 == dmu_buf_hold(os, ZVOL_OBJ, lr->lr_offset, zgd, &db));
+ error = dmu_sync(zio, db, &lr->lr_blkptr,
+ lr->lr_common.lrc_txg, zvol_get_done, zgd);
+ if (error == 0)
+ zil_add_vdev(zv->zv_zilog,
+ DVA_GET_VDEV(BP_IDENTITY(&lr->lr_blkptr)));
+ /*
+ * If we get EINPROGRESS, then we need to wait for a
+ * write IO initiated by dmu_sync() to complete before
+ * we can release this dbuf. We will finish everything
+ * up in the zvol_get_done() callback.
+ */
+ if (error == EINPROGRESS)
+ return (0);
+ dmu_buf_rele(db, zgd);
+ zfs_range_unlock(rl);
+ kmem_free(zgd, sizeof (zgd_t));
+ return (error);
+}
+
+int
+zvol_busy(void)
+{
+ return (zvol_minors != 0);
+}
+
+void
+zvol_init(void)
+{
+ ZFS_LOG(1, "ZVOL Initialized.");
+}
+
+void
+zvol_fini(void)
+{
+ ZFS_LOG(1, "ZVOL Deinitialized.");
+}
OpenPOWER on IntegriCloud