summaryrefslogtreecommitdiffstats
path: root/sbin
diff options
context:
space:
mode:
authorpjd <pjd@FreeBSD.org>2007-09-23 07:34:23 +0000
committerpjd <pjd@FreeBSD.org>2007-09-23 07:34:23 +0000
commit27bd800e6102d0ef36527c31660508bdecdd1b1d (patch)
treeff9f3de5e3e137f37ef92cb9ca8feeeb51ec63ca /sbin
parenta4c30a206350a252d2eff78348d39a33eec2dcdd (diff)
downloadFreeBSD-src-27bd800e6102d0ef36527c31660508bdecdd1b1d.zip
FreeBSD-src-27bd800e6102d0ef36527c31660508bdecdd1b1d.tar.gz
Bring in the GEOM Virtualisation class, which allows to create huge GEOM
providers with limited physical storage and add physical storage as needed. Submitted by: Ivan Voras Sponsored by: Google Summer of Code 2006 Approved by: re (kensmith)
Diffstat (limited to 'sbin')
-rw-r--r--sbin/geom/class/Makefile1
-rw-r--r--sbin/geom/class/virstor/Makefile13
-rw-r--r--sbin/geom/class/virstor/geom_virstor.c578
-rw-r--r--sbin/geom/class/virstor/gvirstor.8227
-rw-r--r--sbin/geom/core/geom.85
5 files changed, 823 insertions, 1 deletions
diff --git a/sbin/geom/class/Makefile b/sbin/geom/class/Makefile
index 7b99726..7b556a8 100644
--- a/sbin/geom/class/Makefile
+++ b/sbin/geom/class/Makefile
@@ -16,5 +16,6 @@ SUBDIR+=part
SUBDIR+=raid3
SUBDIR+=shsec
SUBDIR+=stripe
+SUBDIR+=virstor
.include <bsd.subdir.mk>
diff --git a/sbin/geom/class/virstor/Makefile b/sbin/geom/class/virstor/Makefile
new file mode 100644
index 0000000..0b4d5cd
--- /dev/null
+++ b/sbin/geom/class/virstor/Makefile
@@ -0,0 +1,13 @@
+# $FreeBSD$
+
+.PATH: ${.CURDIR}/../../misc ${.CURDIR}/../../../../sys/geom/virstor
+
+CLASS= virstor
+
+SRCS+= binstream.c
+SRCS+= g_virstor_md.c
+
+DPADD= ${LIBMD}
+LDADD= -lmd
+
+.include <bsd.lib.mk>
diff --git a/sbin/geom/class/virstor/geom_virstor.c b/sbin/geom/class/virstor/geom_virstor.c
new file mode 100644
index 0000000..6b29bbe
--- /dev/null
+++ b/sbin/geom/class/virstor/geom_virstor.c
@@ -0,0 +1,578 @@
+/*-
+ * Copyright (c) 2005 Ivan Voras <ivoras@freebsd.org>
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <errno.h>
+#include <paths.h>
+#include <stdio.h>
+#include <stdlib.h>
+#include <stdint.h>
+#include <string.h>
+#include <strings.h>
+#include <fcntl.h>
+#include <unistd.h>
+#include <libgeom.h>
+#include <err.h>
+#include <assert.h>
+
+#include <core/geom.h>
+#include <misc/subr.h>
+
+#include <geom/virstor/g_virstor_md.h>
+#include <geom/virstor/g_virstor.h>
+
+uint32_t lib_version = G_LIB_VERSION;
+uint32_t version = G_VIRSTOR_VERSION;
+static intmax_t chunk_size = 4 * 1024 * 1024; /* in kB (default: 4 MB) */
+static intmax_t vir_size = 2ULL << 40; /* in MB (default: 2 TB) */
+
+#if G_LIB_VERSION == 1
+/* Support RELENG_6 */
+#define G_TYPE_BOOL G_TYPE_NONE
+#endif
+
+/*
+ * virstor_main gets called by the geom(8) utility
+ */
+static void virstor_main(struct gctl_req *req, unsigned flags);
+
+struct g_command class_commands[] = {
+ {"clear", G_FLAG_VERBOSE, virstor_main, G_NULL_OPTS, NULL,
+ "[-v] prov ..."
+ },
+ {"dump", 0, virstor_main, G_NULL_OPTS, NULL,
+ "prov ..."
+ },
+ {"label", G_FLAG_VERBOSE | G_FLAG_LOADKLD, virstor_main,
+ {
+ {'h', "hardcode", NULL, G_TYPE_BOOL},
+ {'m', "chunk_size", &chunk_size, G_TYPE_NUMBER},
+ {'s', "vir_size", &vir_size, G_TYPE_NUMBER},
+ G_OPT_SENTINEL
+ },
+ NULL, "[-h] [-v] [-m chunk_size] [-s vir_size] name provider0 [provider1 ...]"
+ },
+ {"destroy", G_FLAG_VERBOSE, NULL,
+ {
+ {'f', "force", NULL, G_TYPE_BOOL},
+ G_OPT_SENTINEL
+ },
+ NULL, "[-fv] name ..."
+ },
+ {"stop", G_FLAG_VERBOSE, NULL,
+ {
+ {'f', "force", NULL, G_TYPE_BOOL},
+ G_OPT_SENTINEL
+ },
+ NULL, "[-fv] name ... (alias for \"destroy\")"
+ },
+ {"add", G_FLAG_VERBOSE, NULL,
+ {
+ {'h', "hardcode", NULL, G_TYPE_BOOL},
+ G_OPT_SENTINEL
+ },
+ NULL, "[-vh] name prov [prov ...]"
+ },
+ {"remove", G_FLAG_VERBOSE, NULL, G_NULL_OPTS, NULL,
+ "[-v] name ..."
+ },
+ G_CMD_SENTINEL
+};
+
+static int verbose = 0;
+
+/* Helper functions' declarations */
+static void virstor_clear(struct gctl_req *req);
+static void virstor_dump(struct gctl_req *req);
+static void virstor_label(struct gctl_req *req);
+
+/* Dispatcher function (no real work done here, only verbose flag recorder) */
+static void
+virstor_main(struct gctl_req *req, unsigned flags)
+{
+ const char *name;
+
+ if ((flags & G_FLAG_VERBOSE) != 0)
+ verbose = 1;
+
+ name = gctl_get_ascii(req, "verb");
+ if (name == NULL) {
+ gctl_error(req, "No '%s' argument.", "verb");
+ return;
+ }
+ if (strcmp(name, "label") == 0)
+ virstor_label(req);
+ else if (strcmp(name, "clear") == 0)
+ virstor_clear(req);
+ else if (strcmp(name, "dump") == 0)
+ virstor_dump(req);
+ else
+ gctl_error(req, "%s: Unknown command: %s.", __func__, name);
+
+ /* No CTASSERT in userland
+ CTASSERT(VIRSTOR_MAP_BLOCK_ENTRIES*VIRSTOR_MAP_ENTRY_SIZE == MAXPHYS);
+ */
+}
+
+static void
+pathgen(const char *name, char *path, size_t size)
+{
+
+ if (strncmp(name, _PATH_DEV, strlen(_PATH_DEV)) != 0)
+ snprintf(path, size, "%s%s", _PATH_DEV, name);
+ else
+ strlcpy(path, name, size);
+}
+
+static int
+my_g_metadata_store(const char *name, u_char *md, size_t size)
+{
+ char path[MAXPATHLEN];
+ unsigned sectorsize;
+ off_t mediasize;
+ u_char *sector;
+ int error, fd;
+ ssize_t abc;
+
+ pathgen(name, path, sizeof(path));
+ sector = NULL;
+ error = 0;
+
+ fd = open(path, O_RDWR);
+ if (fd == -1)
+ return (errno);
+ mediasize = g_get_mediasize(name);
+ if (mediasize == 0) {
+ error = errno;
+ goto out;
+ }
+ sectorsize = g_get_sectorsize(name);
+ if (sectorsize == 0) {
+ error = errno;
+ goto out;
+ }
+ assert(sectorsize >= size);
+ sector = malloc(sectorsize);
+ if (sector == NULL) {
+ error = ENOMEM;
+ goto out;
+ }
+ bcopy(md, sector, size);
+ if ((abc = pwrite(fd, sector, sectorsize, mediasize - sectorsize)) !=
+ (ssize_t)sectorsize) {
+ error = errno;
+ goto out;
+ }
+out:
+ if (sector != NULL)
+ free(sector);
+ close(fd);
+ return (error);
+}
+
+/*
+ * Labels a new geom Meaning: parses and checks the parameters, calculates &
+ * writes metadata to the relevant providers so when the next round of
+ * "tasting" comes (which will be just after the provider(s) are closed) geom
+ * can be instantiated with the tasted metadata.
+ */
+static void
+virstor_label(struct gctl_req *req)
+{
+ struct g_virstor_metadata md;
+ off_t msize;
+ unsigned char *sect;
+ unsigned int i;
+ size_t ssize, secsize;
+ const char *name;
+ char param[32];
+ int hardcode, nargs, error;
+ struct virstor_map_entry *map;
+ size_t total_chunks; /* We'll run out of memory if
+ this needs to be bigger. */
+ unsigned int map_chunks; /* Chunks needed by the map (map size). */
+ size_t map_size; /* In bytes. */
+ ssize_t written;
+ int fd;
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs < 2) {
+ gctl_error(req, "Too few arguments (%d): expecting: name "
+ "provider0 [provider1 ...]", nargs);
+ return;
+ }
+
+ hardcode = gctl_get_int(req, "hardcode");
+
+ /*
+ * Initialize constant parts of metadata: magic signature, version,
+ * name.
+ */
+ bzero(&md, sizeof(md));
+ strlcpy(md.md_magic, G_VIRSTOR_MAGIC, sizeof(md.md_magic));
+ md.md_version = G_VIRSTOR_VERSION;
+ name = gctl_get_ascii(req, "arg0");
+ if (name == NULL) {
+ gctl_error(req, "No 'arg%u' argument.", 0);
+ return;
+ }
+ strlcpy(md.md_name, name, sizeof(md.md_name));
+
+ md.md_virsize = (off_t)gctl_get_intmax(req, "vir_size");
+ md.md_chunk_size = gctl_get_intmax(req, "chunk_size");
+ md.md_count = nargs - 1;
+
+ if (md.md_virsize == 0 || md.md_chunk_size == 0) {
+ gctl_error(req, "Virtual size and chunk size must be non-zero");
+ return;
+ }
+
+ if (md.md_chunk_size % MAXPHYS != 0) {
+ /* XXX: This is not strictly needed, but it's convenient to
+ * impose some limitations on it, so why not MAXPHYS. */
+ size_t new_size = (md.md_chunk_size / MAXPHYS) * MAXPHYS;
+ if (new_size < md.md_chunk_size)
+ new_size += MAXPHYS;
+ fprintf(stderr, "Resizing chunk size to be a multiple of "
+ "MAXPHYS (%d kB).\n", MAXPHYS / 1024);
+ fprintf(stderr, "New chunk size: %zu kB\n", new_size / 1024);
+ md.md_chunk_size = new_size;
+ }
+
+ if (md.md_virsize % md.md_chunk_size != 0) {
+ off_t chunk_count = md.md_virsize / md.md_chunk_size;
+ md.md_virsize = chunk_count * md.md_chunk_size;
+ fprintf(stderr, "Resizing virtual size to be a multiple of "
+ "chunk size.\n");
+ fprintf(stderr, "New virtual size: %zu MB\n",
+ (size_t)(md.md_virsize/(1024 * 1024)));
+ }
+
+ msize = secsize = ssize = 0;
+ for (i = 1; i < (unsigned)nargs; i++) {
+ snprintf(param, sizeof(param), "arg%u", i);
+ name = gctl_get_ascii(req, param);
+ ssize = g_get_sectorsize(name);
+ if (ssize == 0)
+ fprintf(stderr, "%s for %s\n", strerror(errno), name);
+ msize += g_get_mediasize(name);
+ if (secsize == 0)
+ secsize = ssize;
+ else if (secsize != ssize) {
+ gctl_error(req, "Devices need to have same sector size "
+ "(%u on %s needs to be %u).",
+ (u_int)ssize, name, (u_int)secsize);
+ return;
+ }
+ }
+
+ if (md.md_chunk_size % secsize != 0) {
+ fprintf(stderr, "Error: chunk size is not a multiple of sector "
+ "size.");
+ gctl_error(req, "Chunk size (in bytes) must be multiple of %u.",
+ (unsigned int)secsize);
+ return;
+ }
+
+ total_chunks = md.md_virsize / md.md_chunk_size;
+ map_size = total_chunks * sizeof(*map);
+ assert(md.md_virsize % md.md_chunk_size == 0);
+
+ ssize = map_size % secsize;
+ if (ssize != 0) {
+ size_t add_chunks = (secsize - ssize) / sizeof(*map);
+ total_chunks += add_chunks;
+ md.md_virsize = (off_t)total_chunks * (off_t)md.md_chunk_size;
+ map_size = total_chunks * sizeof(*map);
+ fprintf(stderr, "Resizing virtual size to fit virstor "
+ "structures.\n");
+ fprintf(stderr, "New virtual size: %ju MB (%zu new chunks)\n",
+ (uintmax_t)(md.md_virsize / (1024 * 1024)), add_chunks);
+ }
+
+ if (verbose)
+ printf("Total virtual chunks: %zu (%zu MB each), %ju MB total "
+ "virtual size.\n",
+ total_chunks, (size_t)(md.md_chunk_size / (1024 * 1024)),
+ md.md_virsize/(1024 * 1024));
+
+ if ((off_t)md.md_virsize < msize)
+ fprintf(stderr, "WARNING: Virtual storage size < Physical "
+ "available storage (%ju < %ju)\n", md.md_virsize, msize);
+
+ /* Clear last sector first to spoil all components if device exists. */
+ if (verbose)
+ printf("Clearing metadata on");
+
+ for (i = 1; i < (unsigned)nargs; i++) {
+ snprintf(param, sizeof(param), "arg%u", i);
+ name = gctl_get_ascii(req, param);
+
+ if (verbose)
+ printf(" %s", name);
+
+ msize = g_get_mediasize(name);
+ ssize = g_get_sectorsize(name);
+ if (msize == 0 || ssize == 0) {
+ gctl_error(req, "Can't retrieve information about "
+ "%s: %s.", name, strerror(errno));
+ return;
+ }
+ if (msize < MAX(md.md_chunk_size*4, map_size))
+ gctl_error(req, "Device %s is too small", name);
+ error = g_metadata_clear(name, NULL);
+ if (error != 0) {
+ gctl_error(req, "Can't clear metadata on %s: %s.", name,
+ strerror(error));
+ return;
+ }
+ }
+
+
+ /* Write allocation table to the first provider - this needs to be done
+ * before metadata is written because when kernel tastes it it's too
+ * late */
+ name = gctl_get_ascii(req, "arg1"); /* device with metadata */
+ if (verbose)
+ printf(".\nWriting allocation table to %s...", name);
+
+ /* How many chunks does the map occupy? */
+ map_chunks = map_size/md.md_chunk_size;
+ if (map_size % md.md_chunk_size != 0)
+ map_chunks++;
+ if (verbose) {
+ printf(" (%zu MB, %d chunks) ", map_size/(1024*1024), map_chunks);
+ fflush(stdout);
+ }
+
+ if (strncmp(name, _PATH_DEV, strlen(_PATH_DEV)) == 0)
+ fd = open(name, O_RDWR);
+ else {
+ sprintf(param, "%s%s", _PATH_DEV, name);
+ fd = open(param, O_RDWR);
+ }
+ if (fd < 0)
+ gctl_error(req, "Cannot open provider %s to write map", name);
+
+ /* Do it with calloc because there might be a need to set up chunk flags
+ * in the future */
+ map = calloc(total_chunks, sizeof(*map));
+ if (map == NULL) {
+ gctl_error(req,
+ "Out of memory (need %zu bytes for allocation map)",
+ map_size);
+ }
+
+ written = pwrite(fd, map, map_size, 0);
+ free(map);
+ if ((size_t)written != map_size) {
+ if (verbose) {
+ fprintf(stderr, "\nTried to write %zu, written %zd (%s)\n",
+ map_size, written, strerror(errno));
+ }
+ gctl_error(req, "Error writing out allocation map!");
+ return;
+ }
+ close (fd);
+
+ if (verbose)
+ printf("\nStoring metadata on ");
+
+ /*
+ * ID is randomly generated, unique for a geom. This is used to
+ * recognize all providers belonging to one geom.
+ */
+ md.md_id = arc4random();
+
+ /* Ok, store metadata. */
+ for (i = 1; i < (unsigned)nargs; i++) {
+ snprintf(param, sizeof(param), "arg%u", i);
+ name = gctl_get_ascii(req, param);
+
+ msize = g_get_mediasize(name);
+ ssize = g_get_sectorsize(name);
+
+ if (verbose)
+ printf("%s ", name);
+
+ /* this provider's position/type in geom */
+ md.no = i - 1;
+ /* this provider's size */
+ md.provsize = msize;
+ /* chunk allocation info */
+ md.chunk_count = md.provsize / md.md_chunk_size;
+ if (verbose)
+ printf("(%u chunks) ", md.chunk_count);
+ /* Check to make sure last sector is unused */
+ if ((off_t)(md.chunk_count) * md.md_chunk_size > msize-ssize)
+ md.chunk_count--;
+ md.chunk_next = 0;
+ if (i != 1) {
+ md.chunk_reserved = 0;
+ md.flags = 0;
+ } else {
+ md.chunk_reserved = map_chunks * 2;
+ md.flags = VIRSTOR_PROVIDER_ALLOCATED |
+ VIRSTOR_PROVIDER_CURRENT;
+ md.chunk_next = md.chunk_reserved;
+ if (verbose)
+ printf("(%u reserved) ", md.chunk_reserved);
+ }
+
+ if (!hardcode)
+ bzero(md.provider, sizeof(md.provider));
+ else {
+ /* convert "/dev/something" to "something" */
+ if (strncmp(name, _PATH_DEV, strlen(_PATH_DEV)) == 0) {
+ strlcpy(md.provider, name + strlen(_PATH_DEV),
+ sizeof(md.provider));
+ } else
+ strlcpy(md.provider, name, sizeof(md.provider));
+ }
+ sect = malloc(ssize);
+ bzero(sect, ssize);
+ if (sect == NULL)
+ err(1, "Cannot allocate sector of %zu bytes", ssize);
+ virstor_metadata_encode(&md, sect);
+ error = my_g_metadata_store(name, sect, ssize);
+ free(sect);
+ if (error != 0) {
+ if (verbose)
+ printf("\n");
+ fprintf(stderr, "Can't store metadata on %s: %s.\n",
+ name, strerror(error));
+ gctl_error(req,
+ "Not fully done (error storing metadata).");
+ return;
+ }
+ }
+#if 0
+ if (verbose)
+ printf("\n");
+#endif
+}
+
+/* Clears metadata on given provider(s) IF it's owned by us */
+static void
+virstor_clear(struct gctl_req *req)
+{
+ const char *name;
+ char param[32];
+ unsigned i;
+ int nargs, error;
+ int fd;
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs < 1) {
+ gctl_error(req, "Too few arguments.");
+ return;
+ }
+ for (i = 0; i < (unsigned)nargs; i++) {
+ snprintf(param, sizeof(param), "arg%u", i);
+ name = gctl_get_ascii(req, param);
+
+ error = g_metadata_clear(name, G_VIRSTOR_MAGIC);
+ if (error != 0) {
+ fprintf(stderr, "Can't clear metadata on %s: %s "
+ "(do I own it?)\n", name, strerror(error));
+ gctl_error(req,
+ "Not fully done (can't clear metadata).");
+ continue;
+ }
+ if (strncmp(name, _PATH_DEV, strlen(_PATH_DEV)) == 0)
+ fd = open(name, O_RDWR);
+ else {
+ sprintf(param, "%s%s", _PATH_DEV, name);
+ fd = open(param, O_RDWR);
+ }
+ if (fd < 0) {
+ gctl_error(req, "Cannot clear header sector for %s",
+ name);
+ continue;
+ }
+ if (verbose)
+ printf("Metadata cleared on %s.\n", name);
+ }
+}
+
+/* Print some metadata information */
+static void
+virstor_metadata_dump(const struct g_virstor_metadata *md)
+{
+ printf(" Magic string: %s\n", md->md_magic);
+ printf(" Metadata version: %u\n", (u_int) md->md_version);
+ printf(" Device name: %s\n", md->md_name);
+ printf(" Device ID: %u\n", (u_int) md->md_id);
+ printf(" Provider index: %u\n", (u_int) md->no);
+ printf(" Active providers: %u\n", (u_int) md->md_count);
+ printf(" Hardcoded provider: %s\n",
+ md->provider[0] != '\0' ? md->provider : "(not hardcoded)");
+ printf(" Virtual size: %u MB\n",
+ (unsigned int)(md->md_virsize/(1024 * 1024)));
+ printf(" Chunk size: %u kB\n", md->md_chunk_size / 1024);
+ printf(" Chunks on provider: %u\n", md->chunk_count);
+ printf(" Chunks free: %u\n", md->chunk_count - md->chunk_next);
+ printf(" Reserved chunks: %u\n", md->chunk_reserved);
+}
+
+/* Called by geom(8) via gvirstor_main() to dump metadata information */
+static void
+virstor_dump(struct gctl_req *req)
+{
+ struct g_virstor_metadata md;
+ u_char tmpmd[512]; /* temporary buffer */
+ const char *name;
+ char param[16];
+ int nargs, error, i;
+
+ assert(sizeof(tmpmd) >= sizeof(md));
+
+ nargs = gctl_get_int(req, "nargs");
+ if (nargs < 1) {
+ gctl_error(req, "Too few arguments.");
+ return;
+ }
+ for (i = 0; i < nargs; i++) {
+ snprintf(param, sizeof(param), "arg%u", i);
+ name = gctl_get_ascii(req, param);
+
+ error = g_metadata_read(name, (u_char *) & tmpmd, sizeof(tmpmd),
+ G_VIRSTOR_MAGIC);
+ if (error != 0) {
+ fprintf(stderr, "Can't read metadata from %s: %s.\n",
+ name, strerror(error));
+ gctl_error(req,
+ "Not fully done (error reading metadata).");
+ continue;
+ }
+ virstor_metadata_decode((u_char *) & tmpmd, &md);
+ printf("Metadata on %s:\n", name);
+ virstor_metadata_dump(&md);
+ printf("\n");
+ }
+}
diff --git a/sbin/geom/class/virstor/gvirstor.8 b/sbin/geom/class/virstor/gvirstor.8
new file mode 100644
index 0000000..5807af9
--- /dev/null
+++ b/sbin/geom/class/virstor/gvirstor.8
@@ -0,0 +1,227 @@
+.\" Copyright (c) 2005 Pawel Jakub Dawidek <pjd@FreeBSD.org>
+.\" Copyright (c) 2005 Ivan Voras <ivoras@FreeBSD.org>
+.\" All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" THIS SOFTWARE IS PROVIDED BY THE AUTHORS AND CONTRIBUTORS ``AS IS'' AND
+.\" ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+.\" IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+.\" ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHORS OR CONTRIBUTORS BE LIABLE
+.\" FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+.\" DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+.\" OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+.\" HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+.\" LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+.\" OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+.\" SUCH DAMAGE.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd July 8, 2006
+.Dt GVIRSTOR 8
+.Os
+.Sh NAME
+.Nm gvirstor
+.Nd "provides virtual data storage geom"
+.Sh SYNOPSIS
+.Nm
+.Cm label
+.Op Fl hv
+.Op Fl s Ar virsize
+.Op Fl m Ar chunksize
+.Ar name
+.Ar prov ...
+.Nm
+.Cm stop
+.Op Fl fv
+.Ar name ...
+.Nm
+.Cm add
+.Op Fl vh
+.Ar name prov ...
+.Nm
+.Cm remove
+.Op Fl v
+.Ar name prov ...
+.Nm
+.Cm clear
+.Op Fl v
+.Ar prov ...
+.Nm
+.Cm dump
+.Ar prov ...
+.Nm
+.Cm list
+.Nm
+.Cm status
+.Nm
+.Cm load
+.Nm
+.Cm unload
+.Sh DESCRIPTION
+The
+.Nm
+utility is used for setting up a storage device of arbitrary large size (for example,
+several TB), consisting of an arbitrary number of physical storage devices with
+total size <= the virtual size. Data for the virtual devices will be allocated from
+physical devices on demand. In short, this is the virtual storage functionality.
+The first argument to
+.Nm
+indicates an action to be performed:
+.Bl -tag -width ".Cm destroy"
+.It Cm label
+Set up a virtual device from the given components with the specified
+.Ar name .
+Metadata are stored in the last sector of every component.
+Argument
+.Ar virsize
+is the size of new virtual device, with default being 2 TiB (2097152 MiB).
+Argument
+.Ar chunksize
+is the chunk size, with default being 4 MiB (4096 KiB).
+The default is thus "-s 2097152 -m 4096".
+.It Cm stop
+Turn off an existing virtual device by its
+.Ar name .
+This command does not touch on-disk metadata.
+As with other GEOM classes, stopped geoms cannot be started manually.
+.It Cm add
+Adds new components to existing virtual device by its
+.Ar name .
+The specified virstor device must exist and be active (i.e.
+module loaded, device present in /dev).
+.It Cm remove
+Removes components from existing virtual device by its
+.Ar name .
+Only unallocated providers can be removed.
+.It Cm clear
+Clear metadata on the given providers.
+.It Cm dump
+Dump metadata stored on the given providers.
+.It Cm list
+See
+.Xr geom 8 .
+.It Cm status
+See
+.Xr geom 8 .
+.It Cm load
+See
+.Xr geom 8 .
+.It Cm unload
+See
+.Xr geom 8 .
+.El
+.Pp
+Additional options:
+.Bl -tag -width ".Fl f"
+.It Fl f
+Force the removal of the specified virtual device.
+.It Fl h
+Hardcode providers' names in metadata.
+.It Fl v
+Be more verbose.
+.El
+.Sh EXIT STATUS
+Exit status is 0 on success, and 1 if the command fails.
+.Sh EXAMPLES
+The following example shows how to create a virtual device of default size
+(2 TiB), of default chunk (extent) size (4 MiB), with two physical devices for
+backing storage.
+.Bd -literal -offset indent
+gvirstor label -v mydata /dev/ad4 /dev/ad6
+newfs /dev/virstor/mydata
+.Ed
+.Pp
+From now on, the virtual device will be available via the
+.Pa /dev/virstor/mydata
+device entry.
+To add a new physical device / provider to an active virstor device:
+.Bd -literal -offset indent
+gvirstor add mydata ad8
+.Ed
+.Pp
+This will add physical storage (from ad8) to
+.Pa /dev/virstor/mydata
+device.
+To see device status information (including how much physical storage
+is still available for the virtual device), use:
+.Bd -literal -offset indent
+gvirstor list
+.Ed
+.Pp
+All standard
+.Xr geom 8
+subcommands (e.g. "status", "help") are also supported.
+.Sh SYSCTLs
+.Nm
+has several
+.Xr sysctl 8
+tunable variables.
+.Bd -literal -offset indent
+.Pa int kern.geom.virstor.debug
+.Ed
+.Pp
+This sysctl controls verbosity of the kernel module, in the range
+1 to 15. Messages that are marked with higher verbosity levels than
+this are supressed. Default value is 5 and it's not
+recommented to set this tunable to less than 2, because level 1 messages
+are error events, and level 2 messages are system warnings.
+.Bd -literal -offset indent
+.Pa int kern.geom.virstor.chunk_watermark
+.Ed
+.Pp
+Value in this sysctl sets warning watermark level for physical chunk usage
+on a single component. The warning is issued when a virstor component
+has less than this many free chunks (default 100).
+.Bd -literal -offset indent
+.Pa int kern.geom.virstor.component_watermark
+.Ed
+.Pp
+Value in this sysctl sets warning watermark level for component usage.
+The warning is issed when there are less than this many unallocated
+components (default is 1).
+.Pp
+All these sysctls are also available as
+.Xr loader 8
+tunables.
+.Sh LOG MESSAGES
+.Nm
+kernel module issues log messages with prefixes in standardised format,
+which is useful for log message filtering and dispatching. Each message
+line begins with
+.Bd -literal -offset indent
+.Pa GEOM_VIRSTOR[%d]:
+.Ed
+.Pp
+The number (%d) is message verbosity / importance level, in the range
+1 to 15. If a message filtering, dispatching or operator alert system is
+used, it is recommended that messages with levels 1 and 2 be taken
+seriously (for example, to catch out-of-space conditions as set by
+watermark sysctls).
+.Sh SEE ALSO
+.Xr geom 4 ,
+.Xr geom 8 ,
+.Xr newfs 8 ,
+.Xr fstab 5 ,
+.Xr glabel 8
+.Sh HISTORY
+The
+.Nm
+utility appeared in
+.Fx 7.0 .
+.Sh BUGS
+Commands "add" and "remove" contain unavoidable critical sections
+which may make the virstor device unusable if a power failure (or
+other disruptive event) happens during their execution.
+It's recommended to run them when the system is quiescent.
+.Sh AUTHOR
+.An Ivan Voras Aq ivoras@FreeBSD.org
+Sponsored by Google Summer of Code 2006
diff --git a/sbin/geom/core/geom.8 b/sbin/geom/core/geom.8
index 7ab57ef..bb79f78 100644
--- a/sbin/geom/core/geom.8
+++ b/sbin/geom/core/geom.8
@@ -123,6 +123,8 @@ RAID3
SHSEC
.It
STRIPE
+.It
+VIRSTOR
.El
.Sh ENVIRONMENT
The following environment variables affect the execution of
@@ -161,7 +163,8 @@ geom md unload
.Xr gnop 8 ,
.Xr graid3 8 ,
.Xr gshsec 8 ,
-.Xr gstripe 8
+.Xr gstripe 8 ,
+.Xr gvirstor 8
.Sh HISTORY
The
.Nm
OpenPOWER on IntegriCloud