summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorlulf <lulf@FreeBSD.org>2009-03-28 17:20:08 +0000
committerlulf <lulf@FreeBSD.org>2009-03-28 17:20:08 +0000
commit14e3eb7296a4d1fcdf386bd96a7a1d8b96001d13 (patch)
tree5e7f9ce749d55fb9cc8ca02210ab0d369c6900cd
parent1552563928ae5927fc77974bb0a2101970c938bb (diff)
downloadFreeBSD-src-14e3eb7296a4d1fcdf386bd96a7a1d8b96001d13.zip
FreeBSD-src-14e3eb7296a4d1fcdf386bd96a7a1d8b96001d13.tar.gz
Import the gvinum work that have been done during and after Summer of Code 2007.
The work have been under testing and fixing since then, and it is mature enough to be put into HEAD for further testing. A lot have changed in this time, and here are the most important: - Gvinum now uses one single workerthread instead of one thread for each volume and each plex. The reason for this is that the previous scheme was very complex, and was the cause of many of the bugs discovered in gvinum. Instead, gvinum now uses one worker thread with an event queue, quite similar to what used in gmirror. - The rebuild/grow/initialize/parity check routines no longer runs in separate threads, but are run as regular I/O requests with special flags. This made it easier to support mounted growing and parity rebuild. - Support for growing striped and raid5-plexes, meaning that one can extend the volumes for these plex types in addition to the concat type. Also works while the volume is mounted. - Implementation of many of the missing commands from the old vinum: attach/detach, start (was partially implemented), stop (was partially implemented), concat, mirror, stripe, raid5 (shortcuts for creating volumes with one plex of these organizations). - The parity check and rebuild no longer goes between userland/kernel, meaning that the gvinum command will not stay and wait forever for the rebuild to finish. You can instead watch the status with the list command. - Many problems with gvinum have been reported since 5.x, and some has been hard to fix due to the complicated architecture. Hopefully, it should be more stable and better handle edge cases that previously made gvinum crash. - Failed drives no longer disappears entirely, but now leave behind a dummy drive that makes sure the original state is not forgotten in case the system is rebooted between drive failures/swaps. - Update manpage to reflect new commands and extend it with some examples. Sponsored by: Google Summer of Code 2007 Mentored by: le Tested by: Rick C. Petty <rick-freebsd2008 -at- kiwi-computer.com>
-rw-r--r--sbin/gvinum/gvinum.8177
-rw-r--r--sbin/gvinum/gvinum.c527
-rw-r--r--sys/geom/vinum/geom_vinum.c1010
-rw-r--r--sys/geom/vinum/geom_vinum.h120
-rw-r--r--sys/geom/vinum/geom_vinum_drive.c659
-rw-r--r--sys/geom/vinum/geom_vinum_init.c701
-rw-r--r--sys/geom/vinum/geom_vinum_list.c35
-rw-r--r--sys/geom/vinum/geom_vinum_move.c106
-rw-r--r--sys/geom/vinum/geom_vinum_plex.c1425
-rw-r--r--sys/geom/vinum/geom_vinum_raid5.c380
-rw-r--r--sys/geom/vinum/geom_vinum_raid5.h28
-rw-r--r--sys/geom/vinum/geom_vinum_rename.c245
-rw-r--r--sys/geom/vinum/geom_vinum_rm.c351
-rw-r--r--sys/geom/vinum/geom_vinum_share.c51
-rw-r--r--sys/geom/vinum/geom_vinum_share.h2
-rw-r--r--sys/geom/vinum/geom_vinum_state.c206
-rw-r--r--sys/geom/vinum/geom_vinum_subr.c889
-rw-r--r--sys/geom/vinum/geom_vinum_var.h143
-rw-r--r--sys/geom/vinum/geom_vinum_volume.c422
-rw-r--r--sys/modules/geom/geom_vinum/Makefile4
20 files changed, 4009 insertions, 3472 deletions
diff --git a/sbin/gvinum/gvinum.8 b/sbin/gvinum/gvinum.8
index 83b6876..d2b9616 100644
--- a/sbin/gvinum/gvinum.8
+++ b/sbin/gvinum/gvinum.8
@@ -40,6 +40,13 @@
.Op Fl options
.Sh COMMANDS
.Bl -tag -width indent
+.It Ic attach Ar plex volume Op Cm rename
+.It Ic attach Ar subdisk plex Oo Ar offset Oc Op Cm rename
+Attach a plex to a volume, or a subdisk to a plex.
+If offset is specified, the subdisk will be attached to the given offset within
+the plex.
+If rename is specified, the subdisk or plex will change name according to the
+object it attaches to.
.It Ic checkparity Oo Fl f Oc Ar plex
Check the parity blocks of a RAID-5 plex.
The parity check will start at the
@@ -49,7 +56,10 @@ flag is specified, or otherwise at the location of the parity check pointer,
the first location at which plex's parity is incorrect.
All subdisks in the
plex must be up for a parity check.
-.It Ic create Op Ar description-file
+.It Ic concat Oo Fl fv Oc Oo Fl n Ar name Oc Ar drives
+Create a concatenated volume from the specified drives.
+If no name is specified, a unique name will be set by gvinum.
+.It Ic create Oo Fl f Oc Op Ar description-file
Create a volume as described in
.Ar description-file .
If no
@@ -57,6 +67,18 @@ If no
provided, opens an editor and provides the current
.Nm
configuration for editing.
+The
+.Fl f
+flag will make gvinum ignore any errors regarding creating objects that already
+exists.
+However, in contrast to vinum, objects that are not properly named in the
+.Ar description-file
+will not be created when the
+.Fl f
+flag is given.
+.It Ic detach Oo Fl f Oc Op Ar plex | subdisk
+Detach a plex or subdisk from the volume or plex to which it is
+attached.
.It Ic help
Provides a synopsis of
.Nm
@@ -76,6 +98,14 @@ The
and
.Fl V
flags provide progressively more detailed output.
+.It Ic mirror Oo Fl fsv Oc Oo Fl n Ar name Oc Ar drives
+Create a mirrored volume from the specified drives.
+It requires at least a multiple of 2 drives.
+If no name is specified, a unique name will be set by gvinum.
+If the
+.Fl s
+flag is specified, a striped mirror will be created, and thus requires a
+multiple of 4 drives.
.It Ic move | mv Fl f Ar drive subdisk Op Ar ...
Move the subdisk(s) to the specified drive.
The
@@ -85,12 +115,19 @@ part of the move.
This can currently only be done when the subdisk is
not being accessed.
.Pp
-If the subdisk(s) form part of a RAID-5 plex, the disk(s) will need to be set
-to the
+If a single subdisk is moved, and it forms a part of a RAID-5 plex, the moved
+subdisks will need to be set to the
+.Dq stale
+state, and the plex will require a
+.Ic start
+command.
+If multiple subdisk(s) is moved, and form part of a RAID-5 plex, the
+moved disk(s) will need to be set to the
.Dq up
state and the plex will require a
.Ic rebuildparity
-command; if the subdisk(s) form part of a plex that is mirrored with other
+command.
+If the subdisk(s) form part of a plex that is mirrored with other
plexes, the plex will require restarting and will sync once restarted.
Moving
more than one subdisk in a RAID-5 plex or subdisks from both sides of a
@@ -105,6 +142,11 @@ Exit
when running in interactive mode.
Normally this would be done by entering the
EOF character.
+.It Ic raid5 Oo Fl fv Oc Oo Fl s Ar stripesize Oc Oo Fl n Ar name Oc Ar drives
+Create a RAID-5 volume from the specified drives.
+If no name is specified,a unique name will be set by
+.Ic gvinum.
+This organization requires at least three drives.
.It Ic rename Oo Fl r Oc Ar drive | subdisk | plex | volume newname
Change the name of the specified object.
The
@@ -143,9 +185,21 @@ flag forces state changes regardless of whether they are legal.
Read configuration from all vinum drives.
.It Ic start Oo Fl S Ar size Oc Ar volume | plex | subdisk
Allow the system to access the objects.
+If necessary, plexes will be synced and rebuilt.
+If a subdisk was added to a running RAID-5 or striped plex, gvinum will
+expand into this subdisk and grow the whole RAID-5 array.
+This can be done without unmounting your filesystem.
The
.Fl S
flag is currently ignored.
+.It Ic stop Oo Fl f Oc Op Ar volume | plex | subdisk
+Terminate access to the objects, or stop
+.Nm
+if no parameters are specified.
+.It Ic stripe Oo Fl fv Oc Oo Fl n Ar name Oc Ar drives
+Create a striped volume from the specified drives. If no name is specified,
+a unique name will be set by Ic gvinum. This organization requires at least two
+drives.
.El
.Sh DESCRIPTION
The
@@ -217,15 +271,90 @@ is invoked.
directory with device nodes for
.Nm
objects
-.It Pa /dev/gvinum/plex
-directory containing device nodes for
-.Nm
-plexes
-.It Pa /dev/gvinum/sd
-directory containing device nodes for
-.Nm
-subdisks
.El
+.Sh EXAMPLES
+To create a mirror on disks /dev/ad1 and /dev/ad2, create a filesystem, mount,
+unmount and then stop Ic gvinum:
+.Pp
+.Dl "gvinum mirror /dev/ad1 /dev/ad2"
+.Dl "newfs /dev/gvinum/gvinumvolume0"
+.Dl "mount /dev/gvinum/gvinumvolume0 /mnt"
+.Dl "..."
+.Dl "unmount /mnt"
+.Dl "gvinum stop"
+.Pp
+To create a striped mirror on disks /dev/ad1 /dev/ad2 /dev/ad3 and /dev/ad4
+named "data" and create a filesystem:
+.Pp
+.Dl "gvinum mirror -s -n data /dev/ad1 /dev/ad2 /dev/ad3 /dev/ad4"
+.Dl "newfs /dev/gvinum/data"
+.Pp
+To create a raid5 array on disks /dev/ad1 /dev/ad2 and /dev/ad3, with stripesize
+493k you can use the raid5 command:
+.Pp
+.Dl "gvinum raid5 -s 493k /dev/ad1 /dev/ad2 /dev/ad3"
+.Pp
+Then the volume will be created automatically.
+Afterwards, you have to initialize the volume:
+.Pp
+.Dl "gvinum start myraid5vol"
+.Pp
+The initialization will start, and the states will be updated when it's
+finished.
+The list command will give you information about its progress.
+.Pp
+Imagine that one of the drives fails, and the output of 'printconfig' looks
+something like this:
+.Pp
+.Dl "drive gvinumdrive1 device /dev/ad2"
+.Dl "drive gvinumdrive2 device /dev/???"
+.Dl "drive gvinumdrive0 device /dev/ad1"
+.Dl "volume myraid5vol"
+.Dl "plex name myraid5vol.p0 org raid5 986s vol myraid5vol"
+.Dl "sd name myraid5vol.p0.s2 drive gvinumdrive2 len 32538s driveoffset 265s"
+.Dl "plex myraid5vol.p0 plexoffset 1972s"
+.Dl "sd name myraid5vol.p0.s1 drive gvinumdrive1 len 32538s driveoffset 265s"
+.Dl "plex myraid5vol.p0 plexoffset 986s"
+.Dl "sd name myraid5vol.p0.s0 drive gvinumdrive0 len 32538s driveoffset 265s"
+.Dl "plex myraid5vol.p0 plexoffset 0s"
+.Pp
+Create a new drive with this configuration:
+.Pp
+.Dl "drive gdrive4 device /dev/ad4"
+.Pp
+Then move the stale subdisk to the new drive:
+.Pp
+.Dl "gvinum move gdrive4 myraid5vol.p0.s2"
+.Pp
+Then, initiate the rebuild:
+.Pp
+.Dl "gvinum start myraid5vol.p0"
+.Pp
+The plex will go up form degraded mode after the rebuild is finished.
+The plex can still be used while the rebuild is in progress, although requests
+might be delayed.
+For a more advanced usage and detailed explanation of gvinum, the
+handbook is recommended.
+.Pp
+Given the configuration as in the previous example, growing a RAID-5 or STRIPED
+array is accomplished by adding a new subdisk to the plex with a
+.Ar description-file
+similar to this:
+.Pp
+.Dl "drive newdrive device /dev/ad4"
+.Dl "sd drive newdrive plex myraid5vol.p0"
+.Pp
+If everything went ok, the plex state should now be set to growable.
+You can then start the growing with the
+.Ic start
+command:
+.Pp
+.Dl "gvinum start myraid5vol.p0"
+.Pp
+As with rebuilding, you can watch the progress using the
+.Ic list
+command.
+.Pp
.Sh SEE ALSO
.Xr geom 4 ,
.Xr geom 8
@@ -255,9 +384,13 @@ documentation were added by
.An "Chris Jones"
through the 2005 Google Summer
of Code program.
+.Ic a partial rewrite of gvinum was done by "Lukas Ertl" and "Ulf Lilleengen"
+through the 2007 Google Summer of Code program.
+The documentation have been updated to reflect the new functionality.
.Sh AUTHORS
.An Lukas Ertl Aq le@FreeBSD.org
.An Chris Jones Aq soc-cjones@FreeBSD.org
+.An Ulf Lilleengen Aq lulf@FreeBSD.org
.Sh BUGS
Currently,
.Nm
@@ -271,10 +404,6 @@ initsize flag to
.Ic start
is ignored.
.Pp
-The
-.Ic stop
-command does not work.
-.Pp
Moving subdisks that are not part of a mirrored or RAID-5 volume will
destroy data.
It is perhaps a bug to permit this.
@@ -291,18 +420,10 @@ Specifically, the following commands from
.Xr vinum 4
are not supported:
.Bl -tag -width indent
-.It Ic attach Ar plex volume Op Cm rename
-.It Ic attach Ar subdisk plex Oo Ar offset Oc Op Cm rename
-Attach a plex to a volume, or a subdisk to a plex.
-.It Ic concat Oo Fl fv Oc Oo Fl n Ar name Oc Ar drives
-Create a concatenated volume from the specified drives.
.It Ic debug
Cause the volume manager to enter the kernel debugger.
.It Ic debug Ar flags
Set debugging flags.
-.It Ic detach Oo Fl f Oc Op Ar plex | subdisk
-Detach a plex or subdisk from the volume or plex to which it is
-attached.
.It Ic dumpconfig Op Ar drive ...
List the configuration information stored on the specified drives, or all
drives in the system if no drive names are specified.
@@ -310,17 +431,9 @@ drives in the system if no drive names are specified.
List information about volume manager state.
.It Ic label Ar volume
Create a volume label.
-.It Ic mirror Oo Fl fsv Oc Oo Fl n Ar name Oc Ar drives
-Create a mirrored volume from the specified drives.
.It Ic resetstats Oo Fl r Oc Op Ar volume | plex | subdisk
Reset statistics counters for the specified objects, or for all objects if none
are specified.
.It Ic setdaemon Op Ar value
Set daemon configuration.
-.It Ic stop Oo Fl f Oc Op Ar volume | plex | subdisk
-Terminate access to the objects, or stop
-.Nm
-if no parameters are specified.
-.It Ic stripe Oo Fl fv Oc Oo Fl n Ar name Oc Ar drives
-Create a striped volume from the specified drives.
.El
diff --git a/sbin/gvinum/gvinum.c b/sbin/gvinum/gvinum.c
index ea33b75..bea4f7c 100644
--- a/sbin/gvinum/gvinum.c
+++ b/sbin/gvinum/gvinum.c
@@ -1,5 +1,7 @@
/*
- * Copyright (c) 2004 Lukas Ertl, 2005 Chris Jones
+ * Copyright (c) 2004 Lukas Ertl
+ * Copyright (c) 2005 Chris Jones
+ * Copyright (c) 2007 Ulf Lilleengen
* All rights reserved.
*
* Portions of this software were developed for the FreeBSD Project
@@ -43,6 +45,7 @@
#include <ctype.h>
#include <err.h>
+#include <errno.h>
#include <libgeom.h>
#include <stdint.h>
#include <stdio.h>
@@ -54,12 +57,17 @@
#include "gvinum.h"
+void gvinum_attach(int, char **);
+void gvinum_concat(int, char **);
void gvinum_create(int, char **);
+void gvinum_detach(int, char **);
void gvinum_help(void);
void gvinum_list(int, char **);
void gvinum_move(int, char **);
+void gvinum_mirror(int, char **);
void gvinum_parityop(int, char **, int);
void gvinum_printconfig(int, char **);
+void gvinum_raid5(int, char **);
void gvinum_rename(int, char **);
void gvinum_resetconfig(void);
void gvinum_rm(int, char **);
@@ -67,9 +75,15 @@ void gvinum_saveconfig(void);
void gvinum_setstate(int, char **);
void gvinum_start(int, char **);
void gvinum_stop(int, char **);
+void gvinum_stripe(int, char **);
void parseline(int, char **);
void printconfig(FILE *, char *);
+char *create_drive(char *);
+void create_volume(int, char **, char *);
+char *find_name(const char *, int, int);
+char *find_pattern(char *, char *);
+
int
main(int argc, char **argv)
{
@@ -111,6 +125,44 @@ main(int argc, char **argv)
exit(0);
}
+/* Attach a plex to a volume or a subdisk to a plex. */
+void
+gvinum_attach(int argc, char **argv)
+{
+ struct gctl_req *req;
+ const char *errstr;
+ int rename;
+ off_t offset;
+
+ rename = 0;
+ offset = -1;
+ if (argc < 3) {
+ warnx("usage:\tattach <subdisk> <plex> [rename] "
+ "[<plexoffset>]\n"
+ "\tattach <plex> <volume> [rename]");
+ return;
+ }
+ if (argc > 3) {
+ if (!strcmp(argv[3], "rename")) {
+ rename = 1;
+ if (argc == 5)
+ offset = strtol(argv[4], NULL, 0);
+ } else
+ offset = strtol(argv[3], NULL, 0);
+ }
+ req = gctl_get_handle();
+ gctl_ro_param(req, "class", -1, "VINUM");
+ gctl_ro_param(req, "verb", -1, "attach");
+ gctl_ro_param(req, "child", -1, argv[1]);
+ gctl_ro_param(req, "parent", -1, argv[2]);
+ gctl_ro_param(req, "offset", sizeof(off_t), &offset);
+ gctl_ro_param(req, "rename", sizeof(int), &rename);
+ errstr = gctl_issue(req);
+ if (errstr != NULL)
+ warnx("attach failed: %s", errstr);
+ gctl_free(req);
+}
+
void
gvinum_create(int argc, char **argv)
{
@@ -120,19 +172,30 @@ gvinum_create(int argc, char **argv)
struct gv_sd *s;
struct gv_volume *v;
FILE *tmp;
- int drives, errors, fd, line, plexes, plex_in_volume;
- int sd_in_plex, status, subdisks, tokens, volumes;
+ int drives, errors, fd, flags, i, line, plexes, plex_in_volume;
+ int sd_in_plex, status, subdisks, tokens, undeffd, volumes;
const char *errstr;
- char buf[BUFSIZ], buf1[BUFSIZ], commandline[BUFSIZ], *ed;
+ char buf[BUFSIZ], buf1[BUFSIZ], commandline[BUFSIZ], *ed, *sdname;
char original[BUFSIZ], tmpfile[20], *token[GV_MAXARGS];
char plex[GV_MAXPLEXNAME], volume[GV_MAXVOLNAME];
- if (argc == 2) {
- if ((tmp = fopen(argv[1], "r")) == NULL) {
- warn("can't open '%s' for reading", argv[1]);
- return;
- }
- } else {
+ tmp = NULL;
+ flags = 0;
+ for (i = 1; i < argc; i++) {
+ /* Force flag used to ignore already created drives. */
+ if (!strcmp(argv[i], "-f")) {
+ flags |= GV_FLAG_F;
+ /* Else it must be a file. */
+ } else {
+ if ((tmp = fopen(argv[1], "r")) == NULL) {
+ warn("can't open '%s' for reading", argv[1]);
+ return;
+ }
+ }
+ }
+
+ /* We didn't get a file. */
+ if (tmp == NULL) {
snprintf(tmpfile, sizeof(tmpfile), "/tmp/gvinum.XXXXXX");
if ((fd = mkstemp(tmpfile)) == -1) {
@@ -167,9 +230,11 @@ gvinum_create(int argc, char **argv)
req = gctl_get_handle();
gctl_ro_param(req, "class", -1, "VINUM");
gctl_ro_param(req, "verb", -1, "create");
+ gctl_ro_param(req, "flags", sizeof(int), &flags);
drives = volumes = plexes = subdisks = 0;
- plex_in_volume = sd_in_plex = 0;
+ plex_in_volume = sd_in_plex = undeffd = 0;
+ plex[0] = '\0';
errors = 0;
line = 1;
while ((fgets(buf, BUFSIZ, tmp)) != NULL) {
@@ -187,7 +252,7 @@ gvinum_create(int argc, char **argv)
* Copy the original input line in case we need it for error
* output.
*/
- strncpy(original, buf, sizeof(buf));
+ strlcpy(original, buf, sizeof(original));
tokens = gv_tokenize(buf, token, GV_MAXARGS);
if (tokens <= 0) {
@@ -214,7 +279,7 @@ gvinum_create(int argc, char **argv)
* Set default volume name for following plex
* definitions.
*/
- strncpy(volume, v->name, sizeof(volume));
+ strlcpy(volume, v->name, sizeof(volume));
snprintf(buf1, sizeof(buf1), "volume%d", volumes);
gctl_ro_param(req, buf1, sizeof(*v), v);
@@ -236,13 +301,13 @@ gvinum_create(int argc, char **argv)
/* Default name. */
if (strlen(p->name) == 0) {
- snprintf(p->name, GV_MAXPLEXNAME, "%s.p%d",
+ snprintf(p->name, sizeof(p->name), "%s.p%d",
volume, plex_in_volume++);
}
/* Default volume. */
if (strlen(p->volume) == 0) {
- snprintf(p->volume, GV_MAXVOLNAME, "%s",
+ snprintf(p->volume, sizeof(p->volume), "%s",
volume);
}
@@ -250,7 +315,7 @@ gvinum_create(int argc, char **argv)
* Set default plex name for following subdisk
* definitions.
*/
- strncpy(plex, p->name, GV_MAXPLEXNAME);
+ strlcpy(plex, p->name, sizeof(plex));
snprintf(buf1, sizeof(buf1), "plex%d", plexes);
gctl_ro_param(req, buf1, sizeof(*p), p);
@@ -270,13 +335,21 @@ gvinum_create(int argc, char **argv)
/* Default name. */
if (strlen(s->name) == 0) {
- snprintf(s->name, GV_MAXSDNAME, "%s.s%d",
- plex, sd_in_plex++);
+ if (strlen(plex) == 0) {
+ sdname = find_name("gvinumsubdisk.p",
+ GV_TYPE_SD, GV_MAXSDNAME);
+ snprintf(s->name, sizeof(s->name),
+ "%s.s%d", sdname, undeffd++);
+ free(sdname);
+ } else {
+ snprintf(s->name, sizeof(s->name),
+ "%s.s%d",plex, sd_in_plex++);
+ }
}
/* Default plex. */
if (strlen(s->plex) == 0)
- snprintf(s->plex, GV_MAXPLEXNAME, "%s", plex);
+ snprintf(s->plex, sizeof(s->plex), "%s", plex);
snprintf(buf1, sizeof(buf1), "sd%d", subdisks);
gctl_ro_param(req, buf1, sizeof(*s), s);
@@ -320,7 +393,279 @@ gvinum_create(int argc, char **argv)
warnx("create failed: %s", errstr);
}
gctl_free(req);
- gvinum_list(0, NULL);
+}
+
+/* Create a concatenated volume. */
+void
+gvinum_concat(int argc, char **argv)
+{
+
+ if (argc < 2) {
+ warnx("usage:\tconcat [-fv] [-n name] drives\n");
+ return;
+ }
+ create_volume(argc, argv, "concat");
+}
+
+
+/* Create a drive quick and dirty. */
+char *
+create_drive(char *device)
+{
+ struct gv_drive *d;
+ struct gctl_req *req;
+ const char *errstr;
+ char *drivename, *dname;
+ int drives, i, flags, volumes, subdisks, plexes;
+
+ flags = plexes = subdisks = volumes = 0;
+ drives = 1;
+ dname = NULL;
+
+ /* Strip away eventual /dev/ in front. */
+ if (strncmp(device, "/dev/", 5) == 0)
+ device += 5;
+
+ drivename = find_name("gvinumdrive", GV_TYPE_DRIVE, GV_MAXDRIVENAME);
+ if (drivename == NULL)
+ return (NULL);
+
+ req = gctl_get_handle();
+ gctl_ro_param(req, "class", -1, "VINUM");
+ gctl_ro_param(req, "verb", -1, "create");
+ d = malloc(sizeof(struct gv_drive));
+ if (d == NULL)
+ err(1, "unable to allocate for gv_drive object");
+ memset(d, 0, sizeof(struct gv_drive));
+
+ strlcpy(d->name, drivename, sizeof(d->name));
+ strlcpy(d->device, device, sizeof(d->device));
+ gctl_ro_param(req, "drive0", sizeof(*d), d);
+ gctl_ro_param(req, "flags", sizeof(int), &flags);
+ gctl_ro_param(req, "drives", sizeof(int), &drives);
+ gctl_ro_param(req, "volumes", sizeof(int), &volumes);
+ gctl_ro_param(req, "plexes", sizeof(int), &plexes);
+ gctl_ro_param(req, "subdisks", sizeof(int), &subdisks);
+ errstr = gctl_issue(req);
+ if (errstr != NULL) {
+ warnx("error creating drive: %s", errstr);
+ gctl_free(req);
+ return (NULL);
+ } else {
+ gctl_free(req);
+ /* XXX: This is needed because we have to make sure the drives
+ * are created before we return. */
+ /* Loop until it's in the config. */
+ for (i = 0; i < 100000; i++) {
+ dname = find_name("gvinumdrive", GV_TYPE_DRIVE,
+ GV_MAXDRIVENAME);
+ /* If we got a different name, quit. */
+ if (dname == NULL)
+ continue;
+ if (strcmp(dname, drivename)) {
+ free(dname);
+ return (drivename);
+ }
+ free(dname);
+ dname = NULL;
+ usleep(100000); /* Sleep for 0.1s */
+ }
+ }
+ gctl_free(req);
+ return (drivename);
+}
+
+/*
+ * General routine for creating a volume. Mainly for use by concat, mirror,
+ * raid5 and stripe commands.
+ */
+void
+create_volume(int argc, char **argv, char *verb)
+{
+ struct gctl_req *req;
+ const char *errstr;
+ char buf[BUFSIZ], *drivename, *volname;
+ int drives, flags, i;
+ off_t stripesize;
+
+ flags = 0;
+ drives = 0;
+ volname = NULL;
+ stripesize = 262144;
+
+ /* XXX: Should we check for argument length? */
+
+ req = gctl_get_handle();
+ gctl_ro_param(req, "class", -1, "VINUM");
+
+ for (i = 1; i < argc; i++) {
+ if (!strcmp(argv[i], "-f")) {
+ flags |= GV_FLAG_F;
+ } else if (!strcmp(argv[i], "-n")) {
+ volname = argv[++i];
+ } else if (!strcmp(argv[i], "-v")) {
+ flags |= GV_FLAG_V;
+ } else if (!strcmp(argv[i], "-s")) {
+ flags |= GV_FLAG_S;
+ if (!strcmp(verb, "raid5"))
+ stripesize = gv_sizespec(argv[++i]);
+ } else {
+ /* Assume it's a drive. */
+ snprintf(buf, sizeof(buf), "drive%d", drives++);
+
+ /* First we create the drive. */
+ drivename = create_drive(argv[i]);
+ if (drivename == NULL)
+ goto bad;
+ /* Then we add it to the request. */
+ gctl_ro_param(req, buf, -1, drivename);
+ }
+ }
+
+ gctl_ro_param(req, "stripesize", sizeof(off_t), &stripesize);
+
+ /* Find a free volume name. */
+ if (volname == NULL)
+ volname = find_name("gvinumvolume", GV_TYPE_VOL, GV_MAXVOLNAME);
+
+ /* Then we send a request to actually create the volumes. */
+ gctl_ro_param(req, "verb", -1, verb);
+ gctl_ro_param(req, "flags", sizeof(int), &flags);
+ gctl_ro_param(req, "drives", sizeof(int), &drives);
+ gctl_ro_param(req, "name", -1, volname);
+ errstr = gctl_issue(req);
+ if (errstr != NULL)
+ warnx("creating %s volume failed: %s", verb, errstr);
+bad:
+ gctl_free(req);
+}
+
+/* Parse a line of the config, return the word after <pattern>. */
+char *
+find_pattern(char *line, char *pattern)
+{
+ char *ptr;
+
+ ptr = strsep(&line, " ");
+ while (ptr != NULL) {
+ if (!strcmp(ptr, pattern)) {
+ /* Return the next. */
+ ptr = strsep(&line, " ");
+ return (ptr);
+ }
+ ptr = strsep(&line, " ");
+ }
+ return (NULL);
+}
+
+/* Find a free name for an object given a a prefix. */
+char *
+find_name(const char *prefix, int type, int namelen)
+{
+ struct gctl_req *req;
+ char comment[1], buf[GV_CFG_LEN - 1], *name, *sname, *ptr;
+ const char *errstr;
+ int i, n, begin, len, conflict;
+ char line[1024];
+
+ comment[0] = '\0';
+
+ /* Find a name. Fetch out configuration first. */
+ req = gctl_get_handle();
+ gctl_ro_param(req, "class", -1, "VINUM");
+ gctl_ro_param(req, "verb", -1, "getconfig");
+ gctl_ro_param(req, "comment", -1, comment);
+ gctl_rw_param(req, "config", sizeof(buf), buf);
+ errstr = gctl_issue(req);
+ if (errstr != NULL) {
+ warnx("can't get configuration: %s", errstr);
+ return (NULL);
+ }
+ gctl_free(req);
+
+ begin = 0;
+ len = strlen(buf);
+ i = 0;
+ sname = malloc(namelen + 1);
+
+ /* XXX: Max object setting? */
+ for (n = 0; n < 10000; n++) {
+ snprintf(sname, namelen, "%s%d", prefix, n);
+ conflict = 0;
+ begin = 0;
+ /* Loop through the configuration line by line. */
+ for (i = 0; i < len; i++) {
+ if (buf[i] == '\n' || buf[i] == '\0') {
+ ptr = buf + begin;
+ strlcpy(line, ptr, (i - begin) + 1);
+ begin = i + 1;
+ switch (type) {
+ case GV_TYPE_DRIVE:
+ name = find_pattern(line, "drive");
+ break;
+ case GV_TYPE_VOL:
+ name = find_pattern(line, "volume");
+ break;
+ case GV_TYPE_PLEX:
+ case GV_TYPE_SD:
+ name = find_pattern(line, "name");
+ break;
+ default:
+ printf("Invalid type given\n");
+ continue;
+ }
+ if (name == NULL)
+ continue;
+ if (!strcmp(sname, name)) {
+ conflict = 1;
+ /* XXX: Could quit the loop earlier. */
+ }
+ }
+ }
+ if (!conflict)
+ return (sname);
+ }
+ free(sname);
+ return (NULL);
+}
+
+/* Detach a plex or subdisk from its parent. */
+void
+gvinum_detach(int argc, char **argv)
+{
+ const char *errstr;
+ struct gctl_req *req;
+ int flags, i;
+
+ optreset = 1;
+ optind = 1;
+ while ((i = getopt(argc, argv, "f")) != -1) {
+ switch(i) {
+ case 'f':
+ flags |= GV_FLAG_F;
+ break;
+ default:
+ warn("invalid flag: %c", i);
+ return;
+ }
+ }
+ argc -= optind;
+ argv += optind;
+ if (argc != 1) {
+ warnx("usage: detach [-f] <subdisk> | <plex>");
+ return;
+ }
+
+ req = gctl_get_handle();
+ gctl_ro_param(req, "class", -1, "VINUM");
+ gctl_ro_param(req, "verb", -1, "detach");
+ gctl_ro_param(req, "object", -1, argv[0]);
+ gctl_ro_param(req, "flags", sizeof(int), &flags);
+
+ errstr = gctl_issue(req);
+ if (errstr != NULL)
+ warnx("detach failed: %s", errstr);
+ gctl_free(req);
}
void
@@ -329,8 +674,16 @@ gvinum_help(void)
printf("COMMANDS\n"
"checkparity [-f] plex\n"
" Check the parity blocks of a RAID-5 plex.\n"
- "create description-file\n"
+ "create [-f] description-file\n"
" Create as per description-file or open editor.\n"
+ "attach plex volume [rename]\n"
+ "attach subdisk plex [offset] [rename]\n"
+ " Attach a plex to a volume, or a subdisk to a plex\n"
+ "concat [-fv] [-n name] drives\n"
+ " Create a concatenated volume from the specified drives.\n"
+ "detach [-f] [plex | subdisk]\n"
+ " Detach a plex or a subdisk from the volume or plex to\n"
+ " which it is attached.\n"
"l | list [-r] [-v] [-V] [volume | plex | subdisk]\n"
" List information about specified objects.\n"
"ld [-r] [-v] [-V] [volume]\n"
@@ -341,18 +694,22 @@ gvinum_help(void)
" List information about plexes.\n"
"lv [-r] [-v] [-V] [volume]\n"
" List information about volumes.\n"
+ "mirror [-fsv] [-n name] drives\n"
+ " Create a mirrored volume from the specified drives.\n"
"move | mv -f drive object ...\n"
" Move the object(s) to the specified drive.\n"
"quit Exit the vinum program when running in interactive mode."
" Nor-\n"
" mally this would be done by entering the EOF character.\n"
+ "raid5 [-fv] [-s stripesize] [-n name] drives\n"
+ " Create a RAID-5 volume from the specified drives.\n"
"rename [-r] [drive | subdisk | plex | volume] newname\n"
" Change the name of the specified object.\n"
"rebuildparity plex [-f]\n"
" Rebuild the parity blocks of a RAID-5 plex.\n"
"resetconfig\n"
" Reset the complete gvinum configuration\n"
- "rm [-r] volume | plex | subdisk | drive\n"
+ "rm [-r] [-f] volume | plex | subdisk | drive\n"
" Remove an object.\n"
"saveconfig\n"
" Save vinum configuration to disk after configuration"
@@ -363,6 +720,8 @@ gvinum_help(void)
" poses only.\n"
"start [-S size] volume | plex | subdisk\n"
" Allow the system to access the objects.\n"
+ "stripe [-fv] [-n name] drives\n"
+ " Create a striped volume from the specified drives.\n"
);
return;
@@ -488,6 +847,18 @@ gvinum_list(int argc, char **argv)
return;
}
+/* Create a mirrored volume. */
+void
+gvinum_mirror(int argc, char **argv)
+{
+
+ if (argc < 2) {
+ warnx("usage\tmirror [-fsv] [-n name] drives\n");
+ return;
+ }
+ create_volume(argc, argv, "mirror");
+}
+
/* Note that move is currently of form '[-r] target object [...]' */
void
gvinum_move(int argc, char **argv)
@@ -553,8 +924,7 @@ void
gvinum_parityop(int argc, char **argv, int rebuild)
{
struct gctl_req *req;
- int flags, i, rv;
- off_t offset;
+ int flags, i;
const char *errstr;
char *op, *msg;
@@ -591,47 +961,32 @@ gvinum_parityop(int argc, char **argv, int rebuild)
return;
}
- do {
- rv = 0;
- req = gctl_get_handle();
- gctl_ro_param(req, "class", -1, "VINUM");
- gctl_ro_param(req, "verb", -1, "parityop");
- gctl_ro_param(req, "flags", sizeof(int), &flags);
- gctl_ro_param(req, "rebuild", sizeof(int), &rebuild);
- gctl_rw_param(req, "rv", sizeof(int), &rv);
- gctl_rw_param(req, "offset", sizeof(off_t), &offset);
- gctl_ro_param(req, "plex", -1, argv[0]);
- errstr = gctl_issue(req);
- if (errstr) {
- warnx("%s\n", errstr);
- gctl_free(req);
- break;
- }
- gctl_free(req);
- if (flags & GV_FLAG_V) {
- printf("\r%s at %s ... ", msg,
- gv_roughlength(offset, 1));
- }
- if (rv == 1) {
- printf("Parity incorrect at offset 0x%jx\n",
- (intmax_t)offset);
- if (!rebuild)
- break;
- }
- fflush(stdout);
+ req = gctl_get_handle();
+ gctl_ro_param(req, "class", -1, "VINUM");
+ gctl_ro_param(req, "verb", -1, op);
+ gctl_ro_param(req, "rebuild", sizeof(int), &rebuild);
+ gctl_ro_param(req, "flags", sizeof(int), &flags);
+ gctl_ro_param(req, "plex", -1, argv[0]);
+
+ errstr = gctl_issue(req);
+ if (errstr)
+ warnx("%s\n", errstr);
+ gctl_free(req);
+}
- /* Clear the -f flag. */
- flags &= ~GV_FLAG_F;
- } while (rv >= 0);
+/* Create a RAID-5 volume. */
+void
+gvinum_raid5(int argc, char **argv)
+{
- if ((rv == 2) && (flags & GV_FLAG_V)) {
- if (rebuild)
- printf("Rebuilt parity on %s\n", argv[0]);
- else
- printf("%s has correct parity\n", argv[0]);
+ if (argc < 2) {
+ warnx("usage:\traid5 [-fv] [-s stripesize] [-n name] drives\n");
+ return;
}
+ create_volume(argc, argv, "raid5");
}
+
void
gvinum_rename(int argc, char **argv)
{
@@ -697,8 +1052,11 @@ gvinum_rm(int argc, char **argv)
flags = 0;
optreset = 1;
optind = 1;
- while ((j = getopt(argc, argv, "r")) != -1) {
+ while ((j = getopt(argc, argv, "rf")) != -1) {
switch (j) {
+ case 'f':
+ flags |= GV_FLAG_F;
+ break;
case 'r':
flags |= GV_FLAG_R;
break;
@@ -728,7 +1086,6 @@ gvinum_rm(int argc, char **argv)
return;
}
gctl_free(req);
- gvinum_list(0, NULL);
}
void
@@ -763,7 +1120,6 @@ gvinum_resetconfig(void)
return;
}
gctl_free(req);
- gvinum_list(0, NULL);
printf("gvinum configuration obliterated\n");
}
@@ -833,28 +1189,53 @@ gvinum_start(int argc, char **argv)
}
gctl_free(req);
- gvinum_list(0, NULL);
}
void
gvinum_stop(int argc, char **argv)
{
- int fileid;
+ int err, fileid;
fileid = kldfind(GVINUMMOD);
if (fileid == -1) {
warn("cannot find " GVINUMMOD);
return;
}
- if (kldunload(fileid) != 0) {
+
+ /*
+ * This little hack prevents that we end up in an infinite loop in
+ * g_unload_class(). gv_unload() will return EAGAIN so that the GEOM
+ * event thread will be free for the g_wither_geom() call from
+ * gv_unload(). It's silly, but it works.
+ */
+ printf("unloading " GVINUMMOD " kernel module... ");
+ fflush(stdout);
+ if ((err = kldunload(fileid)) != 0 && (errno == EAGAIN)) {
+ sleep(1);
+ err = kldunload(fileid);
+ }
+ if (err != 0) {
+ printf(" failed!\n");
warn("cannot unload " GVINUMMOD);
return;
}
- warnx(GVINUMMOD " unloaded");
+ printf("done\n");
exit(0);
}
+/* Create a striped volume. */
+void
+gvinum_stripe(int argc, char **argv)
+{
+
+ if (argc < 2) {
+ warnx("usage:\tstripe [-fv] [-n name] drives\n");
+ return;
+ }
+ create_volume(argc, argv, "stripe");
+}
+
void
parseline(int argc, char **argv)
{
@@ -865,6 +1246,12 @@ parseline(int argc, char **argv)
gvinum_create(argc, argv);
else if (!strcmp(argv[0], "exit") || !strcmp(argv[0], "quit"))
exit(0);
+ else if (!strcmp(argv[0], "attach"))
+ gvinum_attach(argc, argv);
+ else if (!strcmp(argv[0], "detach"))
+ gvinum_detach(argc, argv);
+ else if (!strcmp(argv[0], "concat"))
+ gvinum_concat(argc, argv);
else if (!strcmp(argv[0], "help"))
gvinum_help();
else if (!strcmp(argv[0], "list") || !strcmp(argv[0], "l"))
@@ -877,12 +1264,16 @@ parseline(int argc, char **argv)
gvinum_list(argc, argv);
else if (!strcmp(argv[0], "lv"))
gvinum_list(argc, argv);
+ else if (!strcmp(argv[0], "mirror"))
+ gvinum_mirror(argc, argv);
else if (!strcmp(argv[0], "move"))
gvinum_move(argc, argv);
else if (!strcmp(argv[0], "mv"))
gvinum_move(argc, argv);
else if (!strcmp(argv[0], "printconfig"))
gvinum_printconfig(argc, argv);
+ else if (!strcmp(argv[0], "raid5"))
+ gvinum_raid5(argc, argv);
else if (!strcmp(argv[0], "rename"))
gvinum_rename(argc, argv);
else if (!strcmp(argv[0], "resetconfig"))
@@ -897,6 +1288,8 @@ parseline(int argc, char **argv)
gvinum_start(argc, argv);
else if (!strcmp(argv[0], "stop"))
gvinum_stop(argc, argv);
+ else if (!strcmp(argv[0], "stripe"))
+ gvinum_stripe(argc, argv);
else if (!strcmp(argv[0], "checkparity"))
gvinum_parityop(argc, argv, 0);
else if (!strcmp(argv[0], "rebuildparity"))
diff --git a/sys/geom/vinum/geom_vinum.c b/sys/geom/vinum/geom_vinum.c
index 1a915ed..4a6760a 100644
--- a/sys/geom/vinum/geom_vinum.c
+++ b/sys/geom/vinum/geom_vinum.c
@@ -1,5 +1,6 @@
/*-
- * Copyright (c) 2004 Lukas Ertl
+ * Copyright (c) 2004, 2007 Lukas Ertl
+ * Copyright (c) 2007, 2009 Ulf Lilleengen
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -31,6 +32,7 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/bio.h>
#include <sys/kernel.h>
+#include <sys/kthread.h>
#include <sys/lock.h>
#include <sys/malloc.h>
#include <sys/module.h>
@@ -41,7 +43,7 @@ __FBSDID("$FreeBSD$");
#include <geom/geom.h>
#include <geom/vinum/geom_vinum_var.h>
#include <geom/vinum/geom_vinum.h>
-#include <geom/vinum/geom_vinum_share.h>
+#include <geom/vinum/geom_vinum_raid5.h>
SYSCTL_DECL(_kern_geom);
SYSCTL_NODE(_kern_geom, OID_AUTO, vinum, CTLFLAG_RW, 0, "GEOM_VINUM stuff");
@@ -50,14 +52,18 @@ TUNABLE_INT("kern.geom.vinum.debug", &g_vinum_debug);
SYSCTL_UINT(_kern_geom_vinum, OID_AUTO, debug, CTLFLAG_RW, &g_vinum_debug, 0,
"Debug level");
-int gv_create(struct g_geom *, struct gctl_req *);
+static int gv_create(struct g_geom *, struct gctl_req *);
+static void gv_attach(struct gv_softc *, struct gctl_req *);
+static void gv_detach(struct gv_softc *, struct gctl_req *);
+static void gv_parityop(struct gv_softc *, struct gctl_req *);
+
static void
gv_orphan(struct g_consumer *cp)
{
struct g_geom *gp;
struct gv_softc *sc;
- int error;
+ struct gv_drive *d;
g_topology_assert();
@@ -65,59 +71,90 @@ gv_orphan(struct g_consumer *cp)
gp = cp->geom;
KASSERT(gp != NULL, ("gv_orphan: null gp"));
sc = gp->softc;
+ KASSERT(sc != NULL, ("gv_orphan: null sc"));
+ d = cp->private;
+ KASSERT(d != NULL, ("gv_orphan: null d"));
g_trace(G_T_TOPOLOGY, "gv_orphan(%s)", gp->name);
- if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0)
- g_access(cp, -cp->acr, -cp->acw, -cp->ace);
- error = cp->provider->error;
- if (error == 0)
- error = ENXIO;
- g_detach(cp);
- g_destroy_consumer(cp);
- if (!LIST_EMPTY(&gp->consumer))
- return;
- g_free(sc);
- g_wither_geom(gp, error);
+ gv_post_event(sc, GV_EVENT_DRIVE_LOST, d, NULL, 0, 0);
}
-static void
+void
gv_start(struct bio *bp)
{
- struct bio *bp2;
struct g_geom *gp;
+ struct gv_softc *sc;
gp = bp->bio_to->geom;
- switch(bp->bio_cmd) {
+ sc = gp->softc;
+
+ switch (bp->bio_cmd) {
case BIO_READ:
case BIO_WRITE:
case BIO_DELETE:
- bp2 = g_clone_bio(bp);
- if (bp2 == NULL)
- g_io_deliver(bp, ENOMEM);
- else {
- bp2->bio_done = g_std_done;
- g_io_request(bp2, LIST_FIRST(&gp->consumer));
- }
- return;
+ break;
+ case BIO_GETATTR:
default:
g_io_deliver(bp, EOPNOTSUPP);
return;
}
+
+ mtx_lock(&sc->queue_mtx);
+ bioq_disksort(sc->bqueue, bp);
+ wakeup(sc);
+ mtx_unlock(&sc->queue_mtx);
}
-static int
+void
+gv_done(struct bio *bp)
+{
+ struct g_geom *gp;
+ struct gv_softc *sc;
+
+ KASSERT(bp != NULL, ("NULL bp"));
+
+ gp = bp->bio_from->geom;
+ sc = gp->softc;
+ bp->bio_cflags |= GV_BIO_DONE;
+
+ mtx_lock(&sc->queue_mtx);
+ bioq_disksort(sc->bqueue, bp);
+ wakeup(sc);
+ mtx_unlock(&sc->queue_mtx);
+}
+
+int
gv_access(struct g_provider *pp, int dr, int dw, int de)
{
struct g_geom *gp;
- struct g_consumer *cp;
+ struct gv_softc *sc;
+ struct gv_drive *d, *d2;
int error;
- gp = pp->geom;
error = ENXIO;
- cp = LIST_FIRST(&gp->consumer);
- error = g_access(cp, dr, dw, de);
- return (error);
+ gp = pp->geom;
+ sc = gp->softc;
+ if (dw > 0 && dr == 0)
+ dr = 1;
+ else if (dw < 0 && dr == 0)
+ dr = -1;
+ LIST_FOREACH(d, &sc->drives, drive) {
+ if (d->consumer == NULL)
+ continue;
+ error = g_access(d->consumer, dr, dw, de);
+ if (error) {
+ LIST_FOREACH(d2, &sc->drives, drive) {
+ if (d == d2)
+ break;
+ g_access(d2->consumer, -dr, -dw, -de);
+ }
+ G_VINUM_DEBUG(0, "g_access '%s' failed: %d", d->name,
+ error);
+ return (error);
+ }
+ }
+ return (0);
}
static void
@@ -136,14 +173,132 @@ gv_init(struct g_class *mp)
gp->softc = g_malloc(sizeof(struct gv_softc), M_WAITOK | M_ZERO);
sc = gp->softc;
sc->geom = gp;
+ sc->bqueue = g_malloc(sizeof(struct bio_queue_head), M_WAITOK | M_ZERO);
+ bioq_init(sc->bqueue);
LIST_INIT(&sc->drives);
LIST_INIT(&sc->subdisks);
LIST_INIT(&sc->plexes);
LIST_INIT(&sc->volumes);
+ TAILQ_INIT(&sc->equeue);
+ mtx_init(&sc->config_mtx, "gv_config", NULL, MTX_DEF);
+ mtx_init(&sc->queue_mtx, "gv_queue", NULL, MTX_DEF);
+ kproc_create(gv_worker, sc, NULL, 0, 0, "gv_worker");
+}
+
+static int
+gv_unload(struct gctl_req *req, struct g_class *mp, struct g_geom *gp)
+{
+ struct gv_softc *sc;
+
+ g_trace(G_T_TOPOLOGY, "gv_unload(%p)", mp);
+
+ g_topology_assert();
+ sc = gp->softc;
+
+ if (sc != NULL) {
+ gv_post_event(sc, GV_EVENT_THREAD_EXIT, NULL, NULL, 0, 0);
+ gp->softc = NULL;
+ g_wither_geom(gp, ENXIO);
+ return (EAGAIN);
+ }
+
+ return (0);
+}
+
+/* Handle userland request of attaching object. */
+static void
+gv_attach(struct gv_softc *sc, struct gctl_req *req)
+{
+ struct gv_volume *v;
+ struct gv_plex *p;
+ struct gv_sd *s;
+ off_t *offset;
+ int *rename, type_child, type_parent;
+ char *child, *parent;
+
+ child = gctl_get_param(req, "child", NULL);
+ if (child == NULL) {
+ gctl_error(req, "no child given");
+ return;
+ }
+ parent = gctl_get_param(req, "parent", NULL);
+ if (parent == NULL) {
+ gctl_error(req, "no parent given");
+ return;
+ }
+ offset = gctl_get_paraml(req, "offset", sizeof(*offset));
+ if (offset == NULL) {
+ gctl_error(req, "no offset given");
+ return;
+ }
+ rename = gctl_get_paraml(req, "rename", sizeof(*rename));
+ if (rename == NULL) {
+ gctl_error(req, "no rename flag given");
+ return;
+ }
+
+ type_child = gv_object_type(sc, child);
+ type_parent = gv_object_type(sc, parent);
+
+ switch (type_child) {
+ case GV_TYPE_PLEX:
+ if (type_parent != GV_TYPE_VOL) {
+ gctl_error(req, "no such volume to attach to");
+ return;
+ }
+ v = gv_find_vol(sc, parent);
+ p = gv_find_plex(sc, child);
+ gv_post_event(sc, GV_EVENT_ATTACH_PLEX, p, v, *offset, *rename);
+ break;
+ case GV_TYPE_SD:
+ if (type_parent != GV_TYPE_PLEX) {
+ gctl_error(req, "no such plex to attach to");
+ return;
+ }
+ p = gv_find_plex(sc, parent);
+ s = gv_find_sd(sc, child);
+ gv_post_event(sc, GV_EVENT_ATTACH_SD, s, p, *offset, *rename);
+ break;
+ default:
+ gctl_error(req, "invalid child type");
+ break;
+ }
+}
+
+/* Handle userland request of detaching object. */
+static void
+gv_detach(struct gv_softc *sc, struct gctl_req *req)
+{
+ struct gv_plex *p;
+ struct gv_sd *s;
+ int *flags, type;
+ char *object;
+
+ object = gctl_get_param(req, "object", NULL);
+ if (object == NULL) {
+ gctl_error(req, "no argument given");
+ return;
+ }
+
+ flags = gctl_get_paraml(req, "flags", sizeof(*flags));
+ type = gv_object_type(sc, object);
+ switch (type) {
+ case GV_TYPE_PLEX:
+ p = gv_find_plex(sc, object);
+ gv_post_event(sc, GV_EVENT_DETACH_PLEX, p, NULL, *flags, 0);
+ break;
+ case GV_TYPE_SD:
+ s = gv_find_sd(sc, object);
+ gv_post_event(sc, GV_EVENT_DETACH_SD, s, NULL, *flags, 0);
+ break;
+ default:
+ gctl_error(req, "invalid object type");
+ break;
+ }
}
/* Handle userland requests for creating new objects. */
-int
+static int
gv_create(struct g_geom *gp, struct gctl_req *req)
{
struct gv_softc *sc;
@@ -151,10 +306,9 @@ gv_create(struct g_geom *gp, struct gctl_req *req)
struct gv_plex *p, *p2;
struct gv_sd *s, *s2;
struct gv_volume *v, *v2;
- struct g_consumer *cp;
struct g_provider *pp;
- int error, i, *drives, *plexes, *subdisks, *volumes;
- char buf[20], errstr[ERRBUFSIZ];
+ int error, i, *drives, *flags, *plexes, *subdisks, *volumes;
+ char buf[20];
g_topology_assert();
@@ -170,6 +324,11 @@ gv_create(struct g_geom *gp, struct gctl_req *req)
gctl_error(req, "number of objects not given");
return (-1);
}
+ flags = gctl_get_paraml(req, "flags", sizeof(*flags));
+ if (flags == NULL) {
+ gctl_error(req, "flags not given");
+ return (-1);
+ }
/* First, handle drive definitions ... */
for (i = 0; i < *drives; i++) {
@@ -179,33 +338,33 @@ gv_create(struct g_geom *gp, struct gctl_req *req)
gctl_error(req, "no drive definition given");
return (-1);
}
- d = gv_find_drive(sc, d2->name);
- if (d != NULL) {
- gctl_error(req, "drive '%s' is already known",
- d->name);
- continue;
- }
-
- d = g_malloc(sizeof(*d), M_WAITOK | M_ZERO);
- bcopy(d2, d, sizeof(*d));
-
/*
- * Make sure that the provider specified in the drive
- * specification is an active GEOM provider.
+ * Make sure that the device specified in the drive config is
+ * an active GEOM provider.
*/
- pp = g_provider_by_name(d->device);
+ pp = g_provider_by_name(d2->device);
if (pp == NULL) {
- gctl_error(req, "%s: drive not found", d->device);
- g_free(d);
- return (-1);
+ gctl_error(req, "%s: device not found", d2->device);
+ goto error;
+ }
+ if (gv_find_drive(sc, d2->name) != NULL) {
+ /* Ignore error. */
+ if (*flags & GV_FLAG_F)
+ continue;
+ gctl_error(req, "drive '%s' already exists", d2->name);
+ goto error;
+ }
+ if (gv_find_drive_device(sc, d2->device) != NULL) {
+ gctl_error(req, "device '%s' already configured in "
+ "gvinum", d2->device);
+ goto error;
}
- d->size = pp->mediasize - GV_DATA_START;
- d->avail = d->size;
- gv_config_new_drive(d);
- d->flags |= GV_DRIVE_NEWBORN;
- LIST_INSERT_HEAD(&sc->drives, d, drive);
+ d = g_malloc(sizeof(*d), M_WAITOK | M_ZERO);
+ bcopy(d2, d, sizeof(*d));
+
+ gv_post_event(sc, GV_EVENT_CREATE_DRIVE, d, NULL, 0, 0);
}
/* ... then volume definitions ... */
@@ -217,19 +376,18 @@ gv_create(struct g_geom *gp, struct gctl_req *req)
gctl_error(req, "no volume definition given");
return (-1);
}
- v = gv_find_vol(sc, v2->name);
- if (v != NULL) {
- gctl_error(req, "volume '%s' is already known",
- v->name);
- return (-1);
+ if (gv_find_vol(sc, v2->name) != NULL) {
+ /* Ignore error. */
+ if (*flags & GV_FLAG_F)
+ continue;
+ gctl_error(req, "volume '%s' already exists", v2->name);
+ goto error;
}
v = g_malloc(sizeof(*v), M_WAITOK | M_ZERO);
bcopy(v2, v, sizeof(*v));
- v->vinumconf = sc;
- LIST_INIT(&v->plexes);
- LIST_INSERT_HEAD(&sc->volumes, v, volume);
+ gv_post_event(sc, GV_EVENT_CREATE_VOLUME, v, NULL, 0, 0);
}
/* ... then plex definitions ... */
@@ -241,35 +399,21 @@ gv_create(struct g_geom *gp, struct gctl_req *req)
gctl_error(req, "no plex definition given");
return (-1);
}
- p = gv_find_plex(sc, p2->name);
- if (p != NULL) {
- gctl_error(req, "plex '%s' is already known", p->name);
- return (-1);
+ if (gv_find_plex(sc, p2->name) != NULL) {
+ /* Ignore error. */
+ if (*flags & GV_FLAG_F)
+ continue;
+ gctl_error(req, "plex '%s' already exists", p2->name);
+ goto error;
}
p = g_malloc(sizeof(*p), M_WAITOK | M_ZERO);
bcopy(p2, p, sizeof(*p));
- /* Find the volume this plex should be attached to. */
- v = gv_find_vol(sc, p->volume);
- if (v == NULL) {
- gctl_error(req, "volume '%s' not found", p->volume);
- g_free(p);
- continue;
- }
- if (v->plexcount)
- p->flags |= GV_PLEX_ADDED;
- p->vol_sc = v;
- v->plexcount++;
- LIST_INSERT_HEAD(&v->plexes, p, in_volume);
-
- p->vinumconf = sc;
- p->flags |= GV_PLEX_NEWBORN;
- LIST_INIT(&p->subdisks);
- LIST_INSERT_HEAD(&sc->plexes, p, plex);
+ gv_post_event(sc, GV_EVENT_CREATE_PLEX, p, NULL, 0, 0);
}
- /* ... and finally, subdisk definitions. */
+ /* ... and, finally, subdisk definitions. */
for (i = 0; i < *subdisks; i++) {
error = 0;
snprintf(buf, sizeof(buf), "sd%d", i);
@@ -278,122 +422,23 @@ gv_create(struct g_geom *gp, struct gctl_req *req)
gctl_error(req, "no subdisk definition given");
return (-1);
}
- s = gv_find_sd(sc, s2->name);
- if (s != NULL) {
- gctl_error(req, "subdisk '%s' is already known",
- s->name);
- return (-1);
+ if (gv_find_sd(sc, s2->name) != NULL) {
+ /* Ignore error. */
+ if (*flags & GV_FLAG_F)
+ continue;
+ gctl_error(req, "sd '%s' already exists", s2->name);
+ goto error;
}
s = g_malloc(sizeof(*s), M_WAITOK | M_ZERO);
bcopy(s2, s, sizeof(*s));
- /* Find the drive where this subdisk should be put on. */
- d = gv_find_drive(sc, s->drive);
-
- /* drive not found - XXX */
- if (d == NULL) {
- gctl_error(req, "drive '%s' not found", s->drive);
- g_free(s);
- continue;
- }
-
- /* Find the plex where this subdisk belongs to. */
- p = gv_find_plex(sc, s->plex);
-
- /* plex not found - XXX */
- if (p == NULL) {
- gctl_error(req, "plex '%s' not found\n", s->plex);
- g_free(s);
- continue;
- }
-
- /*
- * First we give the subdisk to the drive, to handle autosized
- * values ...
- */
- error = gv_sd_to_drive(sc, d, s, errstr, sizeof(errstr));
- if (error) {
- gctl_error(req, errstr);
- g_free(s);
- continue;
- }
-
- /*
- * Then, we give the subdisk to the plex; we check if the
- * given values are correct and maybe adjust them.
- */
- error = gv_sd_to_plex(p, s, 1);
- if (error) {
- gctl_error(req, "GEOM_VINUM: couldn't give sd '%s' "
- "to plex '%s'\n", s->name, p->name);
- if (s->drive_sc)
- LIST_REMOVE(s, from_drive);
- gv_free_sd(s);
- g_free(s);
- /*
- * If this subdisk can't be created, we won't create
- * the attached plex either, if it is also a new one.
- */
- if (!(p->flags & GV_PLEX_NEWBORN))
- continue;
- LIST_FOREACH_SAFE(s, &p->subdisks, in_plex, s2) {
- if (s->drive_sc)
- LIST_REMOVE(s, from_drive);
- p->sdcount--;
- LIST_REMOVE(s, in_plex);
- LIST_REMOVE(s, sd);
- gv_free_sd(s);
- g_free(s);
- }
- if (p->vol_sc != NULL) {
- LIST_REMOVE(p, in_volume);
- p->vol_sc->plexcount--;
- }
- LIST_REMOVE(p, plex);
- g_free(p);
- continue;
- }
- s->flags |= GV_SD_NEWBORN;
-
- s->vinumconf = sc;
- LIST_INSERT_HEAD(&sc->subdisks, s, sd);
+ gv_post_event(sc, GV_EVENT_CREATE_SD, s, NULL, 0, 0);
}
- LIST_FOREACH(s, &sc->subdisks, sd)
- gv_update_sd_state(s);
- LIST_FOREACH(p, &sc->plexes, plex)
- gv_update_plex_config(p);
- LIST_FOREACH(v, &sc->volumes, volume)
- gv_update_vol_state(v);
-
- /*
- * Write out the configuration to each drive. If the drive doesn't
- * have a valid geom_slice geom yet, attach it temporarily to our VINUM
- * geom.
- */
- LIST_FOREACH(d, &sc->drives, drive) {
- if (d->geom == NULL) {
- /*
- * XXX if the provider disapears before we get a chance
- * to write the config out to the drive, should this
- * be handled any differently?
- */
- pp = g_provider_by_name(d->device);
- if (pp == NULL) {
- G_VINUM_DEBUG(0, "%s: drive disappeared?",
- d->device);
- continue;
- }
- cp = g_new_consumer(gp);
- g_attach(cp, pp);
- gv_save_config(cp, d, sc);
- g_detach(cp);
- g_destroy_consumer(cp);
- } else
- gv_save_config(NULL, d, sc);
- d->flags &= ~GV_DRIVE_NEWBORN;
- }
+error:
+ gv_post_event(sc, GV_EVENT_SETUP_OBJECTS, sc, NULL, 0, 0);
+ gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
return (0);
}
@@ -411,13 +456,21 @@ gv_config(struct gctl_req *req, struct g_class *mp, char const *verb)
gp = LIST_FIRST(&mp->geom);
sc = gp->softc;
- if (!strcmp(verb, "list")) {
+ if (!strcmp(verb, "attach")) {
+ gv_attach(sc, req);
+
+ } else if (!strcmp(verb, "concat")) {
+ gv_concat(gp, req);
+
+ } else if (!strcmp(verb, "detach")) {
+ gv_detach(sc, req);
+
+ } else if (!strcmp(verb, "list")) {
gv_list(gp, req);
/* Save our configuration back to disk. */
} else if (!strcmp(verb, "saveconfig")) {
-
- gv_save_config_all(sc);
+ gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
/* Return configuration in string form. */
} else if (!strcmp(verb, "getconfig")) {
@@ -435,11 +488,18 @@ gv_config(struct gctl_req *req, struct g_class *mp, char const *verb)
} else if (!strcmp(verb, "create")) {
gv_create(gp, req);
+ } else if (!strcmp(verb, "mirror")) {
+ gv_mirror(gp, req);
+
} else if (!strcmp(verb, "move")) {
gv_move(gp, req);
- } else if (!strcmp(verb, "parityop")) {
- gv_parityop(gp, req);
+ } else if (!strcmp(verb, "raid5")) {
+ gv_raid5(gp, req);
+
+ } else if (!strcmp(verb, "rebuildparity") ||
+ !strcmp(verb, "checkparity")) {
+ gv_parityop(sc, req);
} else if (!strcmp(verb, "remove")) {
gv_remove(gp, req);
@@ -448,100 +508,509 @@ gv_config(struct gctl_req *req, struct g_class *mp, char const *verb)
gv_rename(gp, req);
} else if (!strcmp(verb, "resetconfig")) {
- gv_resetconfig(gp, req);
+ gv_post_event(sc, GV_EVENT_RESET_CONFIG, sc, NULL, 0, 0);
} else if (!strcmp(verb, "start")) {
gv_start_obj(gp, req);
+ } else if (!strcmp(verb, "stripe")) {
+ gv_stripe(gp, req);
+
} else if (!strcmp(verb, "setstate")) {
gv_setstate(gp, req);
-
} else
gctl_error(req, "Unknown verb parameter");
}
-#if 0
-static int
-gv_destroy_geom(struct gctl_req *req, struct g_class *mp, struct g_geom *gp)
+static void
+gv_parityop(struct gv_softc *sc, struct gctl_req *req)
{
- struct g_geom *gp2;
- struct g_consumer *cp;
- struct gv_softc *sc;
- struct gv_drive *d, *d2;
- struct gv_plex *p, *p2;
- struct gv_sd *s, *s2;
- struct gv_volume *v, *v2;
- struct gv_freelist *fl, *fl2;
+ struct gv_plex *p;
+ int *flags, *rebuild, type;
+ char *plex;
- g_trace(G_T_TOPOLOGY, "gv_destroy_geom: %s", gp->name);
- g_topology_assert();
+ plex = gctl_get_param(req, "plex", NULL);
+ if (plex == NULL) {
+ gctl_error(req, "no plex given");
+ return;
+ }
- KASSERT(gp != NULL, ("gv_destroy_geom: null gp"));
- KASSERT(gp->softc != NULL, ("gv_destroy_geom: null sc"));
+ flags = gctl_get_paraml(req, "flags", sizeof(*flags));
+ if (flags == NULL) {
+ gctl_error(req, "no flags given");
+ return;
+ }
- sc = gp->softc;
+ rebuild = gctl_get_paraml(req, "rebuild", sizeof(*rebuild));
+ if (rebuild == NULL) {
+ gctl_error(req, "no operation given");
+ return;
+ }
- /*
- * Check if any of our drives is still open; if so, refuse destruction.
- */
- LIST_FOREACH(d, &sc->drives, drive) {
- gp2 = d->geom;
- cp = LIST_FIRST(&gp2->consumer);
- if (cp != NULL)
- g_access(cp, -1, -1, -1);
- if (gv_is_open(gp2))
- return (EBUSY);
+ type = gv_object_type(sc, plex);
+ if (type != GV_TYPE_PLEX) {
+ gctl_error(req, "'%s' is not a plex", plex);
+ return;
}
+ p = gv_find_plex(sc, plex);
- /* Clean up and deallocate what we allocated. */
- LIST_FOREACH_SAFE(d, &sc->drives, drive, d2) {
- LIST_REMOVE(d, drive);
- g_free(d->hdr);
- d->hdr = NULL;
- LIST_FOREACH_SAFE(fl, &d->freelist, freelist, fl2) {
- d->freelist_entries--;
- LIST_REMOVE(fl, freelist);
- g_free(fl);
- fl = NULL;
- }
- d->geom->softc = NULL;
- g_free(d);
+ if (p->state != GV_PLEX_UP) {
+ gctl_error(req, "plex %s is not completely accessible",
+ p->name);
+ return;
}
- LIST_FOREACH_SAFE(s, &sc->subdisks, sd, s2) {
- LIST_REMOVE(s, sd);
- s->drive_sc = NULL;
- s->plex_sc = NULL;
- s->provider = NULL;
- s->consumer = NULL;
- g_free(s);
+ if (p->org != GV_PLEX_RAID5) {
+ gctl_error(req, "plex %s is not a RAID5 plex", p->name);
+ return;
}
- LIST_FOREACH_SAFE(p, &sc->plexes, plex, p2) {
- LIST_REMOVE(p, plex);
- gv_kill_thread(p);
- p->vol_sc = NULL;
- p->geom->softc = NULL;
- p->provider = NULL;
- p->consumer = NULL;
- if (p->org == GV_PLEX_RAID5) {
- mtx_destroy(&p->worklist_mtx);
- }
- g_free(p);
+ /* Put it in the event queue. */
+ /* XXX: The state of the plex might have changed when this event is
+ * picked up ... We should perhaps check this afterwards. */
+ if (*rebuild)
+ gv_post_event(sc, GV_EVENT_PARITY_REBUILD, p, NULL, 0, 0);
+ else
+ gv_post_event(sc, GV_EVENT_PARITY_CHECK, p, NULL, 0, 0);
+}
+
+
+static struct g_geom *
+gv_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
+{
+ struct g_geom *gp;
+ struct g_consumer *cp;
+ struct gv_softc *sc;
+ struct gv_hdr vhdr;
+ int error;
+
+ g_topology_assert();
+ g_trace(G_T_TOPOLOGY, "gv_taste(%s, %s)", mp->name, pp->name);
+
+ gp = LIST_FIRST(&mp->geom);
+ if (gp == NULL) {
+ G_VINUM_DEBUG(0, "error: tasting, but not initialized?");
+ return (NULL);
}
+ sc = gp->softc;
- LIST_FOREACH_SAFE(v, &sc->volumes, volume, v2) {
- LIST_REMOVE(v, volume);
- v->geom->softc = NULL;
- g_free(v);
+ cp = g_new_consumer(gp);
+ if (g_attach(cp, pp) != 0) {
+ g_destroy_consumer(cp);
+ return (NULL);
}
+ if (g_access(cp, 1, 0, 0) != 0) {
+ g_detach(cp);
+ g_destroy_consumer(cp);
+ return (NULL);
+ }
+ g_topology_unlock();
- gp->softc = NULL;
- g_free(sc);
- g_wither_geom(gp, ENXIO);
- return (0);
+ error = gv_read_header(cp, &vhdr);
+
+ g_topology_lock();
+ g_access(cp, -1, 0, 0);
+ g_detach(cp);
+ g_destroy_consumer(cp);
+
+ /* Check if what we've been given is a valid vinum drive. */
+ if (!error)
+ gv_post_event(sc, GV_EVENT_DRIVE_TASTED, pp, NULL, 0, 0);
+
+ return (NULL);
+}
+
+void
+gv_worker(void *arg)
+{
+ struct g_provider *pp;
+ struct gv_softc *sc;
+ struct gv_event *ev;
+ struct gv_volume *v;
+ struct gv_plex *p;
+ struct gv_sd *s;
+ struct gv_drive *d;
+ struct bio *bp;
+ int newstate, flags, err, rename;
+ char *newname;
+ off_t offset;
+
+ sc = arg;
+ KASSERT(sc != NULL, ("NULL sc"));
+ mtx_lock(&sc->queue_mtx);
+ for (;;) {
+ /* Look at the events first... */
+ ev = TAILQ_FIRST(&sc->equeue);
+ if (ev != NULL) {
+ TAILQ_REMOVE(&sc->equeue, ev, events);
+ mtx_unlock(&sc->queue_mtx);
+
+ switch (ev->type) {
+ case GV_EVENT_DRIVE_TASTED:
+ G_VINUM_DEBUG(2, "event 'drive tasted'");
+ pp = ev->arg1;
+ gv_drive_tasted(sc, pp);
+ break;
+
+ case GV_EVENT_DRIVE_LOST:
+ G_VINUM_DEBUG(2, "event 'drive lost'");
+ d = ev->arg1;
+ gv_drive_lost(sc, d);
+ break;
+
+ case GV_EVENT_CREATE_DRIVE:
+ G_VINUM_DEBUG(2, "event 'create drive'");
+ d = ev->arg1;
+ gv_create_drive(sc, d);
+ break;
+
+ case GV_EVENT_CREATE_VOLUME:
+ G_VINUM_DEBUG(2, "event 'create volume'");
+ v = ev->arg1;
+ gv_create_volume(sc, v);
+ break;
+
+ case GV_EVENT_CREATE_PLEX:
+ G_VINUM_DEBUG(2, "event 'create plex'");
+ p = ev->arg1;
+ gv_create_plex(sc, p);
+ break;
+
+ case GV_EVENT_CREATE_SD:
+ G_VINUM_DEBUG(2, "event 'create sd'");
+ s = ev->arg1;
+ gv_create_sd(sc, s);
+ break;
+
+ case GV_EVENT_RM_DRIVE:
+ G_VINUM_DEBUG(2, "event 'remove drive'");
+ d = ev->arg1;
+ flags = ev->arg3;
+ gv_rm_drive(sc, d, flags);
+ /*gv_setup_objects(sc);*/
+ break;
+
+ case GV_EVENT_RM_VOLUME:
+ G_VINUM_DEBUG(2, "event 'remove volume'");
+ v = ev->arg1;
+ gv_rm_vol(sc, v);
+ /*gv_setup_objects(sc);*/
+ break;
+
+ case GV_EVENT_RM_PLEX:
+ G_VINUM_DEBUG(2, "event 'remove plex'");
+ p = ev->arg1;
+ gv_rm_plex(sc, p);
+ /*gv_setup_objects(sc);*/
+ break;
+
+ case GV_EVENT_RM_SD:
+ G_VINUM_DEBUG(2, "event 'remove sd'");
+ s = ev->arg1;
+ gv_rm_sd(sc, s);
+ /*gv_setup_objects(sc);*/
+ break;
+
+ case GV_EVENT_SAVE_CONFIG:
+ G_VINUM_DEBUG(2, "event 'save config'");
+ gv_save_config(sc);
+ break;
+
+ case GV_EVENT_SET_SD_STATE:
+ G_VINUM_DEBUG(2, "event 'setstate sd'");
+ s = ev->arg1;
+ newstate = ev->arg3;
+ flags = ev->arg4;
+ err = gv_set_sd_state(s, newstate, flags);
+ if (err)
+ G_VINUM_DEBUG(0, "error setting subdisk"
+ " state: error code %d", err);
+ break;
+
+ case GV_EVENT_SET_DRIVE_STATE:
+ G_VINUM_DEBUG(2, "event 'setstate drive'");
+ d = ev->arg1;
+ newstate = ev->arg3;
+ flags = ev->arg4;
+ err = gv_set_drive_state(d, newstate, flags);
+ if (err)
+ G_VINUM_DEBUG(0, "error setting drive "
+ "state: error code %d", err);
+ break;
+
+ case GV_EVENT_SET_VOL_STATE:
+ G_VINUM_DEBUG(2, "event 'setstate volume'");
+ v = ev->arg1;
+ newstate = ev->arg3;
+ flags = ev->arg4;
+ err = gv_set_vol_state(v, newstate, flags);
+ if (err)
+ G_VINUM_DEBUG(0, "error setting volume "
+ "state: error code %d", err);
+ break;
+
+ case GV_EVENT_SET_PLEX_STATE:
+ G_VINUM_DEBUG(2, "event 'setstate plex'");
+ p = ev->arg1;
+ newstate = ev->arg3;
+ flags = ev->arg4;
+ err = gv_set_plex_state(p, newstate, flags);
+ if (err)
+ G_VINUM_DEBUG(0, "error setting plex "
+ "state: error code %d", err);
+ break;
+
+ case GV_EVENT_SETUP_OBJECTS:
+ G_VINUM_DEBUG(2, "event 'setup objects'");
+ gv_setup_objects(sc);
+ break;
+
+ case GV_EVENT_RESET_CONFIG:
+ G_VINUM_DEBUG(2, "event 'resetconfig'");
+ err = gv_resetconfig(sc);
+ if (err)
+ G_VINUM_DEBUG(0, "error resetting "
+ "config: error code %d", err);
+ break;
+
+ case GV_EVENT_PARITY_REBUILD:
+ /*
+ * Start the rebuild. The gv_plex_done will
+ * handle issuing of the remaining rebuild bio's
+ * until it's finished.
+ */
+ G_VINUM_DEBUG(2, "event 'rebuild'");
+ p = ev->arg1;
+ if (p->state != GV_PLEX_UP) {
+ G_VINUM_DEBUG(0, "plex %s is not "
+ "completely accessible", p->name);
+ break;
+ }
+ p->synced = 0;
+ g_topology_assert_not();
+ g_topology_lock();
+ err = gv_access(p->vol_sc->provider, 1, 1, 0);
+ if (err) {
+ G_VINUM_DEBUG(0, "unable to access "
+ "provider");
+ break;
+ }
+ g_topology_unlock();
+ gv_parity_request(p, GV_BIO_CHECK |
+ GV_BIO_PARITY, 0);
+ break;
+
+ case GV_EVENT_PARITY_CHECK:
+ /* Start parity check. */
+ G_VINUM_DEBUG(2, "event 'check'");
+ p = ev->arg1;
+ if (p->state != GV_PLEX_UP) {
+ G_VINUM_DEBUG(0, "plex %s is not "
+ "completely accessible", p->name);
+ break;
+ }
+ p->synced = 0;
+ g_topology_assert_not();
+ g_topology_lock();
+ err = gv_access(p->vol_sc->provider, 1, 1, 0);
+ if (err) {
+ G_VINUM_DEBUG(0, "unable to access "
+ "provider");
+ break;
+ }
+ g_topology_unlock();
+ gv_parity_request(p, GV_BIO_CHECK, 0);
+ break;
+
+ case GV_EVENT_START_PLEX:
+ G_VINUM_DEBUG(2, "event 'start' plex");
+ p = ev->arg1;
+ gv_start_plex(p);
+ break;
+
+ case GV_EVENT_START_VOLUME:
+ G_VINUM_DEBUG(2, "event 'start' volume");
+ v = ev->arg1;
+ gv_start_vol(v);
+ break;
+
+ case GV_EVENT_ATTACH_PLEX:
+ G_VINUM_DEBUG(2, "event 'attach' plex");
+ p = ev->arg1;
+ v = ev->arg2;
+ rename = ev->arg4;
+ err = gv_attach_plex(p, v, rename);
+ if (err)
+ G_VINUM_DEBUG(0, "error attaching %s to"
+ " %s: error code %d", p->name,
+ v->name, err);
+ break;
+
+ case GV_EVENT_ATTACH_SD:
+ G_VINUM_DEBUG(2, "event 'attach' sd");
+ s = ev->arg1;
+ p = ev->arg2;
+ offset = ev->arg3;
+ rename = ev->arg4;
+ err = gv_attach_sd(s, p, offset, rename);
+ if (err)
+ G_VINUM_DEBUG(0, "error attaching %s to"
+ " %s: error code %d", s->name,
+ p->name, err);
+ break;
+
+ case GV_EVENT_DETACH_PLEX:
+ G_VINUM_DEBUG(2, "event 'detach' plex");
+ p = ev->arg1;
+ flags = ev->arg3;
+ err = gv_detach_plex(p, flags);
+ if (err)
+ G_VINUM_DEBUG(0, "error detaching %s: "
+ "error code %d", p->name, err);
+ break;
+
+ case GV_EVENT_DETACH_SD:
+ G_VINUM_DEBUG(2, "event 'detach' sd");
+ s = ev->arg1;
+ flags = ev->arg3;
+ err = gv_detach_sd(s, flags);
+ if (err)
+ G_VINUM_DEBUG(0, "error detaching %s: "
+ "error code %d", s->name, err);
+ break;
+
+ case GV_EVENT_RENAME_VOL:
+ G_VINUM_DEBUG(2, "event 'rename' volume");
+ v = ev->arg1;
+ newname = ev->arg2;
+ flags = ev->arg3;
+ err = gv_rename_vol(sc, v, newname, flags);
+ if (err)
+ G_VINUM_DEBUG(0, "error renaming %s to "
+ "%s: error code %d", v->name,
+ newname, err);
+ g_free(newname);
+ /* Destroy and recreate the provider if we can. */
+ if (gv_provider_is_open(v->provider)) {
+ G_VINUM_DEBUG(0, "unable to rename "
+ "provider to %s: provider in use",
+ v->name);
+ break;
+ }
+ g_wither_provider(v->provider, ENOENT);
+ v->provider = NULL;
+ gv_post_event(sc, GV_EVENT_SETUP_OBJECTS, sc,
+ NULL, 0, 0);
+ break;
+
+ case GV_EVENT_RENAME_PLEX:
+ G_VINUM_DEBUG(2, "event 'rename' plex");
+ p = ev->arg1;
+ newname = ev->arg2;
+ flags = ev->arg3;
+ err = gv_rename_plex(sc, p, newname, flags);
+ if (err)
+ G_VINUM_DEBUG(0, "error renaming %s to "
+ "%s: error code %d", p->name,
+ newname, err);
+ g_free(newname);
+ break;
+
+ case GV_EVENT_RENAME_SD:
+ G_VINUM_DEBUG(2, "event 'rename' sd");
+ s = ev->arg1;
+ newname = ev->arg2;
+ flags = ev->arg3;
+ err = gv_rename_sd(sc, s, newname, flags);
+ if (err)
+ G_VINUM_DEBUG(0, "error renaming %s to "
+ "%s: error code %d", s->name,
+ newname, err);
+ g_free(newname);
+ break;
+
+ case GV_EVENT_RENAME_DRIVE:
+ G_VINUM_DEBUG(2, "event 'rename' drive");
+ d = ev->arg1;
+ newname = ev->arg2;
+ flags = ev->arg3;
+ err = gv_rename_drive(sc, d, newname, flags);
+ if (err)
+ G_VINUM_DEBUG(0, "error renaming %s to "
+ "%s: error code %d", d->name,
+ newname, err);
+ g_free(newname);
+ break;
+
+ case GV_EVENT_MOVE_SD:
+ G_VINUM_DEBUG(2, "event 'move' sd");
+ s = ev->arg1;
+ d = ev->arg2;
+ flags = ev->arg3;
+ err = gv_move_sd(sc, s, d, flags);
+ if (err)
+ G_VINUM_DEBUG(0, "error moving %s to "
+ "%s: error code %d", s->name,
+ d->name, err);
+ break;
+
+ case GV_EVENT_THREAD_EXIT:
+ G_VINUM_DEBUG(2, "event 'thread exit'");
+ g_free(ev);
+ mtx_lock(&sc->queue_mtx);
+ gv_cleanup(sc);
+ mtx_destroy(&sc->queue_mtx);
+ g_free(sc->bqueue);
+ g_free(sc);
+ kproc_exit(ENXIO);
+ break; /* not reached */
+
+ default:
+ G_VINUM_DEBUG(1, "unknown event %d", ev->type);
+ }
+
+ g_free(ev);
+
+ mtx_lock(&sc->queue_mtx);
+ continue;
+ }
+
+ /* ... then do I/O processing. */
+ bp = bioq_takefirst(sc->bqueue);
+ if (bp == NULL) {
+ msleep(sc, &sc->queue_mtx, PRIBIO, "-", hz/10);
+ continue;
+ }
+ mtx_unlock(&sc->queue_mtx);
+
+ /* A bio that is coming up from an underlying device. */
+ if (bp->bio_cflags & GV_BIO_DONE) {
+ gv_bio_done(sc, bp);
+ /* A bio that interfered with another bio. */
+ } else if (bp->bio_cflags & GV_BIO_ONHOLD) {
+ s = bp->bio_caller1;
+ p = s->plex_sc;
+ /* Is it still locked out? */
+ if (gv_stripe_active(p, bp)) {
+ /* Park the bio on the waiting queue. */
+ bioq_disksort(p->wqueue, bp);
+ } else {
+ bp->bio_cflags &= ~GV_BIO_ONHOLD;
+ g_io_request(bp, s->drive_sc->consumer);
+ }
+ /* A special request requireing special handling. */
+ } else if (bp->bio_cflags & GV_BIO_INTERNAL ||
+ bp->bio_pflags & GV_BIO_INTERNAL) {
+ p = bp->bio_caller1;
+ gv_plex_start(p, bp);
+ /* A fresh bio, scheduled it down. */
+ } else {
+ gv_volume_start(sc, bp);
+ }
+
+ mtx_lock(&sc->queue_mtx);
+ }
}
-#endif
#define VINUM_CLASS_NAME "VINUM"
@@ -549,8 +1018,9 @@ static struct g_class g_vinum_class = {
.name = VINUM_CLASS_NAME,
.version = G_VERSION,
.init = gv_init,
- /*.destroy_geom = gv_destroy_geom,*/
+ .taste = gv_taste,
.ctlreq = gv_config,
+ .destroy_geom = gv_unload,
};
DECLARE_GEOM_CLASS(g_vinum_class, g_vinum);
diff --git a/sys/geom/vinum/geom_vinum.h b/sys/geom/vinum/geom_vinum.h
index 07c0410..2db22d3 100644
--- a/sys/geom/vinum/geom_vinum.h
+++ b/sys/geom/vinum/geom_vinum.h
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2004 Lukas Ertl
+ * Copyright (c) 2004, 2007 Lukas Ertl
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -29,20 +29,25 @@
#ifndef _GEOM_VINUM_H_
#define _GEOM_VINUM_H_
-#define ERRBUFSIZ 1024
+/* geom_vinum_create.c */
+void gv_concat(struct g_geom *gp, struct gctl_req *);
+void gv_mirror(struct g_geom *gp, struct gctl_req *);
+void gv_stripe(struct g_geom *gp, struct gctl_req *);
+void gv_raid5(struct g_geom *gp, struct gctl_req *);
+int gv_create_drive(struct gv_softc *, struct gv_drive *);
+int gv_create_volume(struct gv_softc *, struct gv_volume *);
+int gv_create_plex(struct gv_softc *, struct gv_plex *);
+int gv_create_sd(struct gv_softc *, struct gv_sd *);
/* geom_vinum_drive.c */
-void gv_config_new_drive(struct gv_drive *);
-void gv_drive_modify(struct gv_drive *);
+void gv_save_config(struct gv_softc *);
int gv_read_header(struct g_consumer *, struct gv_hdr *);
-void gv_save_config_all(struct gv_softc *);
-void gv_save_config(struct g_consumer *, struct gv_drive *,
- struct gv_softc *);
int gv_write_header(struct g_consumer *, struct gv_hdr *);
/* geom_vinum_init.c */
-void gv_parityop(struct g_geom *, struct gctl_req *);
void gv_start_obj(struct g_geom *, struct gctl_req *);
+int gv_start_plex(struct gv_plex *);
+int gv_start_vol(struct gv_volume *);
/* geom_vinum_list.c */
void gv_ld(struct g_geom *, struct gctl_req *, struct sbuf *);
@@ -53,48 +58,97 @@ void gv_list(struct g_geom *, struct gctl_req *);
/* geom_vinum_move.c */
void gv_move(struct g_geom *, struct gctl_req *);
+int gv_move_sd(struct gv_softc *, struct gv_sd *, struct gv_drive *, int);
/* geom_vinum_rename.c */
void gv_rename(struct g_geom *, struct gctl_req *);
+int gv_rename_drive(struct gv_softc *, struct gv_drive *, char *, int);
+int gv_rename_plex(struct gv_softc *, struct gv_plex *, char *, int);
+int gv_rename_sd(struct gv_softc *, struct gv_sd *, char *, int);
+int gv_rename_vol(struct gv_softc *, struct gv_volume *, char *, int);
/* geom_vinum_rm.c */
void gv_remove(struct g_geom *, struct gctl_req *);
-int gv_resetconfig(struct g_geom *, struct gctl_req *);
-int gv_rm_sd(struct gv_softc *sc, struct gctl_req *req,
- struct gv_sd *s, int flags);
+int gv_resetconfig(struct gv_softc *);
+void gv_rm_sd(struct gv_softc *sc, struct gv_sd *s);
+void gv_rm_drive(struct gv_softc *, struct gv_drive *, int);
+void gv_rm_plex(struct gv_softc *, struct gv_plex *);
+void gv_rm_vol(struct gv_softc *, struct gv_volume *);
+
/* geom_vinum_state.c */
int gv_sdstatemap(struct gv_plex *);
void gv_setstate(struct g_geom *, struct gctl_req *);
int gv_set_drive_state(struct gv_drive *, int, int);
int gv_set_sd_state(struct gv_sd *, int, int);
+int gv_set_vol_state(struct gv_volume *, int, int);
+int gv_set_plex_state(struct gv_plex *, int, int);
void gv_update_sd_state(struct gv_sd *);
void gv_update_plex_state(struct gv_plex *);
void gv_update_vol_state(struct gv_volume *);
/* geom_vinum_subr.c */
-void gv_adjust_freespace(struct gv_sd *, off_t);
-void gv_free_sd(struct gv_sd *);
-struct g_geom *find_vinum_geom(void);
-struct gv_drive *gv_find_drive(struct gv_softc *, char *);
-struct gv_plex *gv_find_plex(struct gv_softc *, char *);
-struct gv_sd *gv_find_sd(struct gv_softc *, char *);
-struct gv_volume *gv_find_vol(struct gv_softc *, char *);
-void gv_format_config(struct gv_softc *, struct sbuf *, int, char *);
-int gv_is_striped(struct gv_plex *);
-int gv_is_open(struct g_geom *);
-void gv_kill_drive_thread(struct gv_drive *);
-void gv_kill_plex_thread(struct gv_plex *);
-void gv_kill_vol_thread(struct gv_volume *);
-int gv_object_type(struct gv_softc *, char *);
-void gv_parse_config(struct gv_softc *, u_char *, int);
-int gv_sd_to_drive(struct gv_softc *, struct gv_drive *, struct gv_sd *,
- char *, int);
-int gv_sd_to_plex(struct gv_plex *, struct gv_sd *, int);
-void gv_update_plex_config(struct gv_plex *);
-void gv_update_vol_size(struct gv_volume *, off_t);
-off_t gv_vol_size(struct gv_volume *);
-off_t gv_plex_size(struct gv_plex *);
+void gv_adjust_freespace(struct gv_sd *, off_t);
+void gv_free_sd(struct gv_sd *);
+struct gv_drive *gv_find_drive(struct gv_softc *, char *);
+struct gv_drive *gv_find_drive_device(struct gv_softc *, char *);
+struct gv_plex *gv_find_plex(struct gv_softc *, char *);
+struct gv_sd *gv_find_sd(struct gv_softc *, char *);
+struct gv_volume *gv_find_vol(struct gv_softc *, char *);
+void gv_format_config(struct gv_softc *, struct sbuf *, int,
+ char *);
+int gv_is_striped(struct gv_plex *);
+int gv_consumer_is_open(struct g_consumer *);
+int gv_provider_is_open(struct g_provider *);
+int gv_object_type(struct gv_softc *, char *);
+void gv_parse_config(struct gv_softc *, char *,
+ struct gv_drive *);
+int gv_sd_to_drive(struct gv_sd *, struct gv_drive *);
+int gv_sd_to_plex(struct gv_sd *, struct gv_plex *);
+int gv_sdcount(struct gv_plex *, int);
+void gv_update_plex_config(struct gv_plex *);
+void gv_update_vol_size(struct gv_volume *, off_t);
+off_t gv_vol_size(struct gv_volume *);
+off_t gv_plex_size(struct gv_plex *);
+int gv_plexdown(struct gv_volume *);
+int gv_attach_plex(struct gv_plex *, struct gv_volume *,
+ int);
+int gv_attach_sd(struct gv_sd *, struct gv_plex *, off_t,
+ int);
+int gv_detach_plex(struct gv_plex *, int);
+int gv_detach_sd(struct gv_sd *, int);
+
+/* geom_vinum.c */
+void gv_worker(void *);
+void gv_post_event(struct gv_softc *, int, void *, void *, intmax_t,
+ intmax_t);
+void gv_drive_tasted(struct gv_softc *, struct g_provider *);
+void gv_drive_lost(struct gv_softc *, struct gv_drive *);
+void gv_setup_objects(struct gv_softc *);
+void gv_start(struct bio *);
+int gv_access(struct g_provider *, int, int, int);
+void gv_cleanup(struct gv_softc *);
+
+/* geom_vinum_volume.c */
+void gv_done(struct bio *);
+void gv_volume_start(struct gv_softc *, struct bio *);
+void gv_volume_flush(struct gv_volume *);
+void gv_bio_done(struct gv_softc *, struct bio *);
+
+/* geom_vinum_plex.c */
+void gv_plex_start(struct gv_plex *, struct bio *);
+void gv_plex_raid5_done(struct gv_plex *, struct bio *);
+void gv_plex_normal_done(struct gv_plex *, struct bio *);
+int gv_grow_request(struct gv_plex *, off_t, off_t, int, caddr_t);
+void gv_grow_complete(struct gv_plex *, struct bio *);
+void gv_init_request(struct gv_sd *, off_t, caddr_t, off_t);
+void gv_init_complete(struct gv_plex *, struct bio *);
+void gv_parity_request(struct gv_plex *, int, off_t);
+void gv_parity_complete(struct gv_plex *, struct bio *);
+void gv_rebuild_complete(struct gv_plex *, struct bio *);
+int gv_sync_request(struct gv_plex *, struct gv_plex *, off_t, off_t, int,
+ caddr_t);
+int gv_sync_complete(struct gv_plex *, struct bio *);
extern u_int g_vinum_debug;
diff --git a/sys/geom/vinum/geom_vinum_drive.c b/sys/geom/vinum/geom_vinum_drive.c
index 6f2f0fc..1e9dd0b 100644
--- a/sys/geom/vinum/geom_vinum_drive.c
+++ b/sys/geom/vinum/geom_vinum_drive.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2004, 2005 Lukas Ertl
+ * Copyright (c) 2004, 2005, 2007 Lukas Ertl
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -27,35 +27,20 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#include <sys/param.h>
-#include <sys/bio.h>
-#include <sys/errno.h>
#include <sys/endian.h>
-#include <sys/conf.h>
-#include <sys/kernel.h>
-#include <sys/kthread.h>
-#include <sys/libkern.h>
-#include <sys/lock.h>
#include <sys/malloc.h>
-#include <sys/module.h>
-#include <sys/mutex.h>
-#include <sys/sbuf.h>
#include <sys/systm.h>
-#include <sys/time.h>
#include <sys/vimage.h>
#include <geom/geom.h>
#include <geom/vinum/geom_vinum_var.h>
#include <geom/vinum/geom_vinum.h>
-#include <geom/vinum/geom_vinum_share.h>
#define GV_LEGACY_I386 0
#define GV_LEGACY_AMD64 1
#define GV_LEGACY_SPARC64 2
#define GV_LEGACY_POWERPC 3
-static void gv_drive_dead(void *, int);
-static void gv_drive_worker(void *);
static int gv_legacy_header_type(uint8_t *, int);
/*
@@ -267,620 +252,96 @@ gv_write_header(struct g_consumer *cp, struct gv_hdr *m_hdr)
return (ret);
}
+/* Save the vinum configuration back to each involved disk. */
void
-gv_config_new_drive(struct gv_drive *d)
-{
- struct gv_hdr *vhdr;
- struct gv_freelist *fl;
-
- KASSERT(d != NULL, ("config_new_drive: NULL d"));
-
- vhdr = g_malloc(sizeof(*vhdr), M_WAITOK | M_ZERO);
- vhdr->magic = GV_MAGIC;
- vhdr->config_length = GV_CFG_LEN;
-
- mtx_lock(&hostname_mtx);
- bcopy(G_hostname, vhdr->label.sysname, GV_HOSTNAME_LEN);
- mtx_unlock(&hostname_mtx);
- strncpy(vhdr->label.name, d->name, GV_MAXDRIVENAME);
- microtime(&vhdr->label.date_of_birth);
-
- d->hdr = vhdr;
-
- LIST_INIT(&d->subdisks);
- LIST_INIT(&d->freelist);
-
- fl = g_malloc(sizeof(struct gv_freelist), M_WAITOK | M_ZERO);
- fl->offset = GV_DATA_START;
- fl->size = d->avail;
- LIST_INSERT_HEAD(&d->freelist, fl, freelist);
- d->freelist_entries = 1;
-
- d->bqueue = g_malloc(sizeof(struct bio_queue_head), M_WAITOK | M_ZERO);
- bioq_init(d->bqueue);
- mtx_init(&d->bqueue_mtx, "gv_drive", NULL, MTX_DEF);
- kproc_create(gv_drive_worker, d, NULL, 0, 0, "gv_d %s", d->name);
- d->flags |= GV_DRIVE_THREAD_ACTIVE;
-}
-
-void
-gv_save_config_all(struct gv_softc *sc)
+gv_save_config(struct gv_softc *sc)
{
+ struct g_consumer *cp;
struct gv_drive *d;
-
- g_topology_assert();
-
- LIST_FOREACH(d, &sc->drives, drive) {
- if (d->geom == NULL)
- continue;
- gv_save_config(NULL, d, sc);
- }
-}
-
-/* Save the vinum configuration back to disk. */
-void
-gv_save_config(struct g_consumer *cp, struct gv_drive *d, struct gv_softc *sc)
-{
- struct g_geom *gp;
- struct g_consumer *cp2;
struct gv_hdr *vhdr, *hdr;
struct sbuf *sb;
+ struct timeval last_update;
int error;
- g_topology_assert();
-
- KASSERT(d != NULL, ("gv_save_config: null d"));
KASSERT(sc != NULL, ("gv_save_config: null sc"));
- /*
- * We can't save the config on a drive that isn't up, but drives that
- * were just created aren't officially up yet, so we check a special
- * flag.
- */
- if ((d->state != GV_DRIVE_UP) && !(d->flags && GV_DRIVE_NEWBORN))
- return;
-
- if (cp == NULL) {
- gp = d->geom;
- KASSERT(gp != NULL, ("gv_save_config: null gp"));
- cp2 = LIST_FIRST(&gp->consumer);
- KASSERT(cp2 != NULL, ("gv_save_config: null cp2"));
- } else
- cp2 = cp;
-
vhdr = g_malloc(GV_HDR_LEN, M_WAITOK | M_ZERO);
vhdr->magic = GV_MAGIC;
vhdr->config_length = GV_CFG_LEN;
-
- hdr = d->hdr;
- if (hdr == NULL) {
- G_VINUM_DEBUG(0, "drive %s has NULL hdr", d->name);
- g_free(vhdr);
- return;
- }
- microtime(&hdr->label.last_update);
- bcopy(&hdr->label, &vhdr->label, sizeof(struct gv_label));
+ microtime(&last_update);
sb = sbuf_new(NULL, NULL, GV_CFG_LEN, SBUF_FIXEDLEN);
gv_format_config(sc, sb, 1, NULL);
sbuf_finish(sb);
- error = g_access(cp2, 0, 1, 0);
- if (error) {
- G_VINUM_DEBUG(0, "g_access failed on drive %s, errno %d",
- d->name, error);
- sbuf_delete(sb);
- g_free(vhdr);
- return;
- }
- g_topology_unlock();
-
- do {
- error = gv_write_header(cp2, vhdr);
- if (error) {
- G_VINUM_DEBUG(0, "writing vhdr failed on drive %s, "
- "errno %d", d->name, error);
- break;
- }
-
- error = g_write_data(cp2, GV_CFG_OFFSET, sbuf_data(sb),
- GV_CFG_LEN);
- if (error) {
- G_VINUM_DEBUG(0, "writing first config copy failed "
- "on drive %s, errno %d", d->name, error);
- break;
- }
-
- error = g_write_data(cp2, GV_CFG_OFFSET + GV_CFG_LEN,
- sbuf_data(sb), GV_CFG_LEN);
- if (error)
- G_VINUM_DEBUG(0, "writing second config copy failed "
- "on drive %s, errno %d", d->name, error);
- } while (0);
-
- g_topology_lock();
- g_access(cp2, 0, -1, 0);
- sbuf_delete(sb);
- g_free(vhdr);
-
- if (d->geom != NULL)
- gv_drive_modify(d);
-}
-
-/* This resembles g_slice_access(). */
-static int
-gv_drive_access(struct g_provider *pp, int dr, int dw, int de)
-{
- struct g_geom *gp;
- struct g_consumer *cp;
- struct g_provider *pp2;
- struct gv_drive *d;
- struct gv_sd *s, *s2;
- int error;
-
- gp = pp->geom;
- cp = LIST_FIRST(&gp->consumer);
- if (cp == NULL)
- return (0);
-
- d = gp->softc;
- if (d == NULL)
- return (0);
-
- s = pp->private;
- KASSERT(s != NULL, ("gv_drive_access: NULL s"));
-
- LIST_FOREACH(s2, &d->subdisks, from_drive) {
- if (s == s2)
- continue;
- if (s->drive_offset + s->size <= s2->drive_offset)
- continue;
- if (s2->drive_offset + s2->size <= s->drive_offset)
+ LIST_FOREACH(d, &sc->drives, drive) {
+ /*
+ * We can't save the config on a drive that isn't up, but
+ * drives that were just created aren't officially up yet, so
+ * we check a special flag.
+ */
+ if (d->state != GV_DRIVE_UP)
continue;
- /* Overlap. */
- pp2 = s2->provider;
- KASSERT(s2 != NULL, ("gv_drive_access: NULL s2"));
- if ((pp->acw + dw) > 0 && pp2->ace > 0)
- return (EPERM);
- if ((pp->ace + de) > 0 && pp2->acw > 0)
- return (EPERM);
- }
-
- error = g_access(cp, dr, dw, de);
- return (error);
-}
-
-static void
-gv_drive_done(struct bio *bp)
-{
- struct gv_drive *d;
-
- /* Put the BIO on the worker queue again. */
- d = bp->bio_from->geom->softc;
- bp->bio_cflags |= GV_BIO_DONE;
- mtx_lock(&d->bqueue_mtx);
- bioq_insert_tail(d->bqueue, bp);
- wakeup(d);
- mtx_unlock(&d->bqueue_mtx);
-}
-
-
-static void
-gv_drive_start(struct bio *bp)
-{
- struct gv_drive *d;
- struct gv_sd *s;
-
- switch (bp->bio_cmd) {
- case BIO_READ:
- case BIO_WRITE:
- case BIO_DELETE:
- break;
- case BIO_GETATTR:
- default:
- g_io_deliver(bp, EOPNOTSUPP);
- return;
- }
-
- s = bp->bio_to->private;
- if ((s->state == GV_SD_DOWN) || (s->state == GV_SD_STALE)) {
- g_io_deliver(bp, ENXIO);
- return;
- }
-
- d = bp->bio_to->geom->softc;
-
- /*
- * Put the BIO on the worker queue, where the worker thread will pick
- * it up.
- */
- mtx_lock(&d->bqueue_mtx);
- bioq_disksort(d->bqueue, bp);
- wakeup(d);
- mtx_unlock(&d->bqueue_mtx);
-
-}
-
-static void
-gv_drive_worker(void *arg)
-{
- struct bio *bp, *cbp;
- struct g_geom *gp;
- struct g_provider *pp;
- struct gv_drive *d;
- struct gv_sd *s;
- int error;
-
- d = arg;
-
- mtx_lock(&d->bqueue_mtx);
- for (;;) {
- /* We were signaled to exit. */
- if (d->flags & GV_DRIVE_THREAD_DIE)
- break;
-
- /* Take the first BIO from out queue. */
- bp = bioq_takefirst(d->bqueue);
- if (bp == NULL) {
- msleep(d, &d->bqueue_mtx, PRIBIO, "-", hz/10);
+ cp = d->consumer;
+ if (cp == NULL) {
+ G_VINUM_DEBUG(0, "drive '%s' has no consumer!",
+ d->name);
continue;
- }
- mtx_unlock(&d->bqueue_mtx);
-
- pp = bp->bio_to;
- gp = pp->geom;
-
- /* Completed request. */
- if (bp->bio_cflags & GV_BIO_DONE) {
- error = bp->bio_error;
-
- /* Deliver the original request. */
- g_std_done(bp);
-
- /* The request had an error, we need to clean up. */
- if (error != 0) {
- g_topology_lock();
- gv_set_drive_state(d, GV_DRIVE_DOWN,
- GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG);
- g_topology_unlock();
- g_post_event(gv_drive_dead, d, M_WAITOK, d,
- NULL);
- }
-
- /* New request, needs to be sent downwards. */
- } else {
- s = pp->private;
-
- if ((s->state == GV_SD_DOWN) ||
- (s->state == GV_SD_STALE)) {
- g_io_deliver(bp, ENXIO);
- mtx_lock(&d->bqueue_mtx);
- continue;
- }
- if (bp->bio_offset > s->size) {
- g_io_deliver(bp, EINVAL);
- mtx_lock(&d->bqueue_mtx);
- continue;
- }
-
- cbp = g_clone_bio(bp);
- if (cbp == NULL) {
- g_io_deliver(bp, ENOMEM);
- mtx_lock(&d->bqueue_mtx);
- continue;
- }
- if (cbp->bio_offset + cbp->bio_length > s->size)
- cbp->bio_length = s->size -
- cbp->bio_offset;
- cbp->bio_done = gv_drive_done;
- cbp->bio_offset += s->drive_offset;
- g_io_request(cbp, LIST_FIRST(&gp->consumer));
- }
-
- mtx_lock(&d->bqueue_mtx);
- }
-
- while ((bp = bioq_takefirst(d->bqueue)) != NULL) {
- mtx_unlock(&d->bqueue_mtx);
- if (bp->bio_cflags & GV_BIO_DONE)
- g_std_done(bp);
- else
- g_io_deliver(bp, ENXIO);
- mtx_lock(&d->bqueue_mtx);
- }
- mtx_unlock(&d->bqueue_mtx);
- d->flags |= GV_DRIVE_THREAD_DEAD;
-
- kproc_exit(ENXIO);
-}
-
-
-static void
-gv_drive_orphan(struct g_consumer *cp)
-{
- struct g_geom *gp;
- struct gv_drive *d;
-
- g_topology_assert();
- gp = cp->geom;
- g_trace(G_T_TOPOLOGY, "gv_drive_orphan(%s)", gp->name);
- d = gp->softc;
- if (d != NULL) {
- gv_set_drive_state(d, GV_DRIVE_DOWN,
- GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG);
- g_post_event(gv_drive_dead, d, M_WAITOK, d, NULL);
- } else
- g_wither_geom(gp, ENXIO);
-}
-
-static struct g_geom *
-gv_drive_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
-{
- struct g_geom *gp, *gp2;
- struct g_consumer *cp;
- struct gv_drive *d;
- struct gv_sd *s;
- struct gv_softc *sc;
- struct gv_freelist *fl;
- struct gv_hdr *vhdr;
- int error;
- char *buf, errstr[ERRBUFSIZ];
-
- vhdr = NULL;
- d = NULL;
-
- g_trace(G_T_TOPOLOGY, "gv_drive_taste(%s, %s)", mp->name, pp->name);
- g_topology_assert();
-
- /* Find the VINUM class and its associated geom. */
- gp2 = find_vinum_geom();
- if (gp2 == NULL)
- return (NULL);
- sc = gp2->softc;
-
- gp = g_new_geomf(mp, "%s.vinumdrive", pp->name);
- gp->start = gv_drive_start;
- gp->orphan = gv_drive_orphan;
- gp->access = gv_drive_access;
- gp->start = gv_drive_start;
-
- cp = g_new_consumer(gp);
- g_attach(cp, pp);
- error = g_access(cp, 1, 0, 0);
- if (error) {
- g_detach(cp);
- g_destroy_consumer(cp);
- g_destroy_geom(gp);
- return (NULL);
- }
-
- g_topology_unlock();
-
- /* Now check if the provided slice is a valid vinum drive. */
- do {
- vhdr = g_malloc(GV_HDR_LEN, M_WAITOK | M_ZERO);
- error = gv_read_header(cp, vhdr);
- if (error) {
- g_free(vhdr);
- break;
}
- /* A valid vinum drive, let's parse the on-disk information. */
- buf = g_read_data(cp, GV_CFG_OFFSET, GV_CFG_LEN, NULL);
- if (buf == NULL) {
+ hdr = d->hdr;
+ if (hdr == NULL) {
+ G_VINUM_DEBUG(0, "drive '%s' has no header",
+ d->name);
g_free(vhdr);
- break;
+ continue;
}
+ bcopy(&last_update, &hdr->label.last_update,
+ sizeof(struct timeval));
+ bcopy(&hdr->label, &vhdr->label, sizeof(struct gv_label));
g_topology_lock();
- gv_parse_config(sc, buf, 1);
- g_free(buf);
-
- /*
- * Let's see if this drive is already known in the
- * configuration.
- */
- d = gv_find_drive(sc, vhdr->label.name);
-
- /* We already know about this drive. */
- if (d != NULL) {
- /* Check if this drive already has a geom. */
- if (d->geom != NULL) {
- g_topology_unlock();
- g_free(vhdr);
- break;
- }
- bcopy(vhdr, d->hdr, sizeof(*vhdr));
- g_free(vhdr);
-
- /* This is a new drive. */
- } else {
- d = g_malloc(sizeof(*d), M_WAITOK | M_ZERO);
-
- /* Initialize all needed variables. */
- d->size = pp->mediasize - GV_DATA_START;
- d->avail = d->size;
- d->hdr = vhdr;
- strncpy(d->name, vhdr->label.name, GV_MAXDRIVENAME);
- LIST_INIT(&d->subdisks);
- LIST_INIT(&d->freelist);
-
- /* We also need a freelist entry. */
- fl = g_malloc(sizeof(*fl), M_WAITOK | M_ZERO);
- fl->offset = GV_DATA_START;
- fl->size = d->avail;
- LIST_INSERT_HEAD(&d->freelist, fl, freelist);
- d->freelist_entries = 1;
-
- /* Save it into the main configuration. */
- LIST_INSERT_HEAD(&sc->drives, d, drive);
- }
-
- /*
- * Create bio queue, queue mutex and a worker thread, if
- * necessary.
- */
- if (d->bqueue == NULL) {
- d->bqueue = g_malloc(sizeof(struct bio_queue_head),
- M_WAITOK | M_ZERO);
- bioq_init(d->bqueue);
- }
- if (mtx_initialized(&d->bqueue_mtx) == 0)
- mtx_init(&d->bqueue_mtx, "gv_drive", NULL, MTX_DEF);
-
- if (!(d->flags & GV_DRIVE_THREAD_ACTIVE)) {
- kproc_create(gv_drive_worker, d, NULL, 0, 0,
- "gv_d %s", d->name);
- d->flags |= GV_DRIVE_THREAD_ACTIVE;
+ error = g_access(cp, 0, 1, 0);
+ if (error) {
+ G_VINUM_DEBUG(0, "g_access failed on "
+ "drive %s, errno %d", d->name, error);
+ g_topology_unlock();
+ continue;
}
+ g_topology_unlock();
- g_access(cp, -1, 0, 0);
-
- gp->softc = d;
- d->geom = gp;
- d->vinumconf = sc;
- strncpy(d->device, pp->name, GV_MAXDRIVENAME);
-
- /*
- * Find out which subdisks belong to this drive and crosslink
- * them.
- */
- LIST_FOREACH(s, &sc->subdisks, sd) {
- if (!strncmp(s->drive, d->name, GV_MAXDRIVENAME))
- /* XXX: errors ignored */
- gv_sd_to_drive(sc, d, s, errstr,
- sizeof(errstr));
+ error = gv_write_header(cp, vhdr);
+ if (error) {
+ G_VINUM_DEBUG(0, "writing vhdr failed on drive %s, "
+ "errno %d", d->name, error);
+ g_topology_lock();
+ g_access(cp, 0, -1, 0);
+ g_topology_unlock();
+ continue;
}
-
- /* This drive is now up for sure. */
- gv_set_drive_state(d, GV_DRIVE_UP, 0);
-
- /*
- * If there are subdisks on this drive, we need to create
- * providers for them.
- */
- if (d->sdcount)
- gv_drive_modify(d);
-
- return (gp);
-
- } while (0);
-
- g_topology_lock();
- g_access(cp, -1, 0, 0);
-
- g_detach(cp);
- g_destroy_consumer(cp);
- g_destroy_geom(gp);
- return (NULL);
-}
-
-/*
- * Modify the providers for the given drive 'd'. It is assumed that the
- * subdisk list of 'd' is already correctly set up.
- */
-void
-gv_drive_modify(struct gv_drive *d)
-{
- struct g_geom *gp;
- struct g_consumer *cp;
- struct g_provider *pp, *pp2;
- struct gv_sd *s;
-
- KASSERT(d != NULL, ("gv_drive_modify: null d"));
- gp = d->geom;
- KASSERT(gp != NULL, ("gv_drive_modify: null gp"));
- cp = LIST_FIRST(&gp->consumer);
- KASSERT(cp != NULL, ("gv_drive_modify: null cp"));
- pp = cp->provider;
- KASSERT(pp != NULL, ("gv_drive_modify: null pp"));
-
- g_topology_assert();
-
- LIST_FOREACH(s, &d->subdisks, from_drive) {
- /* This subdisk already has a provider. */
- if (s->provider != NULL)
+ /* First config copy. */
+ error = g_write_data(cp, GV_CFG_OFFSET, sbuf_data(sb),
+ GV_CFG_LEN);
+ if (error) {
+ G_VINUM_DEBUG(0, "writing first config copy failed on "
+ "drive %s, errno %d", d->name, error);
+ g_topology_lock();
+ g_access(cp, 0, -1, 0);
+ g_topology_unlock();
continue;
- pp2 = g_new_providerf(gp, "gvinum/sd/%s", s->name);
- pp2->mediasize = s->size;
- pp2->sectorsize = pp->sectorsize;
- g_error_provider(pp2, 0);
- s->provider = pp2;
- pp2->private = s;
- }
-}
-
-static void
-gv_drive_dead(void *arg, int flag)
-{
- struct g_geom *gp;
- struct g_consumer *cp;
- struct gv_drive *d;
- struct gv_sd *s;
-
- g_topology_assert();
- KASSERT(arg != NULL, ("gv_drive_dead: NULL arg"));
-
- if (flag == EV_CANCEL)
- return;
-
- d = arg;
- if (d->state != GV_DRIVE_DOWN)
- return;
-
- g_trace(G_T_TOPOLOGY, "gv_drive_dead(%s)", d->name);
-
- gp = d->geom;
- if (gp == NULL)
- return;
-
- LIST_FOREACH(cp, &gp->consumer, consumer) {
- if (cp->nstart != cp->nend) {
- G_VINUM_DEBUG(0, "dead drive '%s' still has "
- "active requests, cannot detach consumer",
- d->name);
- g_post_event(gv_drive_dead, d, M_WAITOK, d,
- NULL);
- return;
}
- if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0)
- g_access(cp, -cp->acr, -cp->acw, -cp->ace);
- }
+ /* Second config copy. */
+ error = g_write_data(cp, GV_CFG_OFFSET + GV_CFG_LEN,
+ sbuf_data(sb), GV_CFG_LEN);
+ if (error)
+ G_VINUM_DEBUG(0, "writing second config copy failed on "
+ "drive %s, errno %d", d->name, error);
- G_VINUM_DEBUG(1, "lost drive '%s'", d->name);
- d->geom = NULL;
- LIST_FOREACH(s, &d->subdisks, from_drive) {
- s->provider = NULL;
- s->consumer = NULL;
+ g_topology_lock();
+ g_access(cp, 0, -1, 0);
+ g_topology_unlock();
}
- gv_kill_drive_thread(d);
- gp->softc = NULL;
- g_wither_geom(gp, ENXIO);
-}
-static int
-gv_drive_destroy_geom(struct gctl_req *req, struct g_class *mp,
- struct g_geom *gp)
-{
- struct gv_drive *d;
-
- g_trace(G_T_TOPOLOGY, "gv_drive_destroy_geom: %s", gp->name);
- g_topology_assert();
-
- d = gp->softc;
- gv_kill_drive_thread(d);
-
- g_wither_geom(gp, ENXIO);
- return (0);
+ sbuf_delete(sb);
+ g_free(vhdr);
}
-
-#define VINUMDRIVE_CLASS_NAME "VINUMDRIVE"
-
-static struct g_class g_vinum_drive_class = {
- .name = VINUMDRIVE_CLASS_NAME,
- .version = G_VERSION,
- .taste = gv_drive_taste,
- .destroy_geom = gv_drive_destroy_geom
-};
-
-DECLARE_GEOM_CLASS(g_vinum_drive_class, g_vinum_drive);
diff --git a/sys/geom/vinum/geom_vinum_init.c b/sys/geom/vinum/geom_vinum_init.c
index fddc435..34f1156 100644
--- a/sys/geom/vinum/geom_vinum_init.c
+++ b/sys/geom/vinum/geom_vinum_init.c
@@ -1,5 +1,6 @@
/*-
- * Copyright (c) 2004 Lukas Ertl
+ * Copyright (c) 2004, 2007 Lukas Ertl
+ * Copyright (c) 2007, 2009 Ulf Lilleengen
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -26,158 +27,21 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-
#include <sys/param.h>
#include <sys/bio.h>
-#include <sys/kernel.h>
-#include <sys/kthread.h>
#include <sys/libkern.h>
#include <sys/malloc.h>
-#include <sys/queue.h>
#include <geom/geom.h>
#include <geom/vinum/geom_vinum_var.h>
#include <geom/vinum/geom_vinum.h>
-#include <geom/vinum/geom_vinum_share.h>
-
-static int gv_init_plex(struct gv_plex *);
-void gv_init_td(void *);
-static int gv_rebuild_plex(struct gv_plex *);
-void gv_rebuild_td(void *);
-static int gv_start_plex(struct gv_plex *);
-static int gv_start_vol(struct gv_volume *);
-static int gv_sync(struct gv_volume *);
-void gv_sync_td(void *);
-
-struct gv_sync_args {
- struct gv_volume *v;
- struct gv_plex *from;
- struct gv_plex *to;
- off_t syncsize;
-};
-
-void
-gv_parityop(struct g_geom *gp, struct gctl_req *req)
-{
- struct gv_softc *sc;
- struct gv_plex *p;
- struct bio *bp;
- struct g_consumer *cp;
- int error, *flags, type, *rebuild, rv;
- char *plex;
-
- rv = -1;
-
- plex = gctl_get_param(req, "plex", NULL);
- if (plex == NULL) {
- gctl_error(req, "no plex given");
- goto out;
- }
-
- flags = gctl_get_paraml(req, "flags", sizeof(*flags));
- if (flags == NULL) {
- gctl_error(req, "no flags given");
- goto out;
- }
-
- rebuild = gctl_get_paraml(req, "rebuild", sizeof(*rebuild));
- if (rebuild == NULL) {
- gctl_error(req, "no rebuild op given");
- goto out;
- }
-
- sc = gp->softc;
- type = gv_object_type(sc, plex);
- switch (type) {
- case GV_TYPE_PLEX:
- break;
- case GV_TYPE_VOL:
- case GV_TYPE_SD:
- case GV_TYPE_DRIVE:
- default:
- gctl_error(req, "'%s' is not a plex", plex);
- goto out;
- }
-
- p = gv_find_plex(sc, plex);
- if (p->state != GV_PLEX_UP) {
- gctl_error(req, "plex %s is not completely accessible",
- p->name);
- goto out;
- }
- if (p->org != GV_PLEX_RAID5) {
- gctl_error(req, "plex %s is not a RAID5 plex", p->name);
- goto out;
- }
-
- cp = p->consumer;
- error = g_access(cp, 1, 1, 0);
- if (error) {
- gctl_error(req, "cannot access consumer");
- goto out;
- }
- g_topology_unlock();
-
- /* Reset the check pointer when using -f. */
- if (*flags & GV_FLAG_F)
- p->synced = 0;
-
- bp = g_new_bio();
- if (bp == NULL) {
- gctl_error(req, "cannot create BIO - out of memory");
- g_topology_lock();
- error = g_access(cp, -1, -1, 0);
- goto out;
- }
- bp->bio_cmd = BIO_WRITE;
- bp->bio_done = NULL;
- bp->bio_data = g_malloc(p->stripesize, M_WAITOK | M_ZERO);
- bp->bio_cflags |= GV_BIO_CHECK;
- if (*rebuild)
- bp->bio_cflags |= GV_BIO_PARITY;
- bp->bio_offset = p->synced;
- bp->bio_length = p->stripesize;
-
- /* Schedule it down ... */
- g_io_request(bp, cp);
-
- /* ... and wait for the result. */
- error = biowait(bp, "gwrite");
- g_free(bp->bio_data);
- g_destroy_bio(bp);
-
- if (error) {
- /* Incorrect parity. */
- if (error == EAGAIN)
- rv = 1;
- /* Some other error happened. */
- else
- gctl_error(req, "Parity check failed at offset 0x%jx, "
- "errno %d", (intmax_t)p->synced, error);
-
- /* Correct parity. */
- } else
- rv = 0;
-
- gctl_set_param(req, "offset", &p->synced, sizeof(p->synced));
-
- /* Advance the checkpointer if there was no error. */
- if (rv == 0)
- p->synced += p->stripesize;
-
- /* End of plex; reset the check pointer and signal it to the caller. */
- if (p->synced >= p->size) {
- p->synced = 0;
- rv = -2;
- }
-
- g_topology_lock();
- error = g_access(cp, -1, -1, 0);
-
-out:
- gctl_set_param(req, "rv", &rv, sizeof(rv));
-}
+static int gv_sync(struct gv_volume *);
+static int gv_rebuild_plex(struct gv_plex *);
+static int gv_init_plex(struct gv_plex *);
+static int gv_grow_plex(struct gv_plex *);
+static int gv_sync_plex(struct gv_plex *, struct gv_plex *);
+static struct gv_plex *gv_find_good_plex(struct gv_volume *);
void
gv_start_obj(struct g_geom *gp, struct gctl_req *req)
@@ -187,7 +51,7 @@ gv_start_obj(struct g_geom *gp, struct gctl_req *req)
struct gv_plex *p;
int *argc, *initsize;
char *argv, buf[20];
- int err, i, type;
+ int i, type;
argc = gctl_get_paraml(req, "argc", sizeof(*argc));
initsize = gctl_get_paraml(req, "initsize", sizeof(*initsize));
@@ -208,37 +72,21 @@ gv_start_obj(struct g_geom *gp, struct gctl_req *req)
switch (type) {
case GV_TYPE_VOL:
v = gv_find_vol(sc, argv);
- err = gv_start_vol(v);
- if (err) {
- if (err == EINPROGRESS) {
- gctl_error(req, "cannot start volume "
- "'%s': already in progress", argv);
- } else {
- gctl_error(req, "cannot start volume "
- "'%s'; errno: %d", argv, err);
- }
- return;
- }
+ if (v != NULL)
+ gv_post_event(sc, GV_EVENT_START_VOLUME, v,
+ NULL, *initsize, 0);
break;
case GV_TYPE_PLEX:
p = gv_find_plex(sc, argv);
- err = gv_start_plex(p);
- if (err) {
- if (err == EINPROGRESS) {
- gctl_error(req, "cannot start plex "
- "'%s': already in progress", argv);
- } else {
- gctl_error(req, "cannot start plex "
- "'%s'; errno: %d", argv, err);
- }
- return;
- }
+ if (p != NULL)
+ gv_post_event(sc, GV_EVENT_START_PLEX, p, NULL,
+ *initsize, 0);
break;
case GV_TYPE_SD:
case GV_TYPE_DRIVE:
- /* XXX not yet */
+ /* XXX Not implemented, but what is the use? */
gctl_error(req, "cannot start '%s' - not yet supported",
argv);
return;
@@ -249,36 +97,73 @@ gv_start_obj(struct g_geom *gp, struct gctl_req *req)
}
}
-static int
+int
gv_start_plex(struct gv_plex *p)
{
struct gv_volume *v;
+ struct gv_plex *up;
+ struct gv_sd *s;
int error;
KASSERT(p != NULL, ("gv_start_plex: NULL p"));
- if (p->state == GV_PLEX_UP)
- return (0);
-
error = 0;
v = p->vol_sc;
- if ((v != NULL) && (v->plexcount > 1))
- error = gv_sync(v);
- else if (p->org == GV_PLEX_RAID5) {
- if (p->state == GV_PLEX_DEGRADED)
+
+ /* RAID5 plexes can either be init, rebuilt or grown. */
+ if (p->org == GV_PLEX_RAID5) {
+ if (p->state > GV_PLEX_DEGRADED) {
+ LIST_FOREACH(s, &p->subdisks, in_plex) {
+ if (s->flags & GV_SD_GROW) {
+ error = gv_grow_plex(p);
+ return (error);
+ }
+ }
+ } else if (p->state == GV_PLEX_DEGRADED) {
error = gv_rebuild_plex(p);
- else
+ } else
error = gv_init_plex(p);
+ } else {
+ /* We want to sync from the other plex if we're down. */
+ if (p->state == GV_PLEX_DOWN && v->plexcount > 1) {
+ up = gv_find_good_plex(v);
+ if (up == NULL) {
+ G_VINUM_DEBUG(1, "unable to find a good plex");
+ return (ENXIO);
+ }
+ g_topology_lock();
+ error = gv_access(v->provider, 1, 1, 0);
+ if (error) {
+ g_topology_unlock();
+ G_VINUM_DEBUG(0, "sync from '%s' failed to "
+ "access volume: %d", up->name, error);
+ return (error);
+ }
+ g_topology_unlock();
+ error = gv_sync_plex(p, up);
+ if (error)
+ return (error);
+ /*
+ * In case we have a stripe that is up, check whether it can be
+ * grown.
+ */
+ } else if (p->org == GV_PLEX_STRIPED &&
+ p->state != GV_PLEX_DOWN) {
+ LIST_FOREACH(s, &p->subdisks, in_plex) {
+ if (s->flags & GV_SD_GROW) {
+ error = gv_grow_plex(p);
+ break;
+ }
+ }
+ }
}
-
return (error);
}
-static int
+int
gv_start_vol(struct gv_volume *v)
{
struct gv_plex *p;
- struct gv_sd *s;
int error;
KASSERT(v != NULL, ("gv_start_vol: NULL v"));
@@ -291,39 +176,46 @@ gv_start_vol(struct gv_volume *v)
else if (v->plexcount == 1) {
p = LIST_FIRST(&v->plexes);
KASSERT(p != NULL, ("gv_start_vol: NULL p on %s", v->name));
- if (p->org == GV_PLEX_RAID5) {
- switch (p->state) {
- case GV_PLEX_DOWN:
- error = gv_init_plex(p);
- break;
- case GV_PLEX_DEGRADED:
- error = gv_rebuild_plex(p);
- break;
- default:
- return (0);
- }
- } else {
- LIST_FOREACH(s, &p->subdisks, in_plex) {
- gv_set_sd_state(s, GV_SD_UP,
- GV_SETSTATE_CONFIG);
- }
- }
+ error = gv_start_plex(p);
} else
error = gv_sync(v);
return (error);
}
+/* Sync a plex p from the plex up. */
static int
-gv_sync(struct gv_volume *v)
+gv_sync_plex(struct gv_plex *p, struct gv_plex *up)
{
- struct gv_softc *sc;
- struct gv_plex *p, *up;
- struct gv_sync_args *sync;
+ int error;
- KASSERT(v != NULL, ("gv_sync: NULL v"));
- sc = v->vinumconf;
- KASSERT(sc != NULL, ("gv_sync: NULL sc on %s", v->name));
+ KASSERT(p != NULL, ("%s: NULL p", __func__));
+ KASSERT(up != NULL, ("%s: NULL up", __func__));
+ if ((p == up) || (p->state == GV_PLEX_UP))
+ return (0);
+ if (p->flags & GV_PLEX_SYNCING ||
+ p->flags & GV_PLEX_REBUILDING ||
+ p->flags & GV_PLEX_GROWING) {
+ return (EINPROGRESS);
+ }
+ p->synced = 0;
+ p->flags |= GV_PLEX_SYNCING;
+ G_VINUM_DEBUG(1, "starting sync of plex %s", p->name);
+ error = gv_sync_request(up, p, p->synced,
+ MIN(GV_DFLT_SYNCSIZE, up->size - p->synced),
+ BIO_READ, NULL);
+ if (error) {
+ G_VINUM_DEBUG(0, "error syncing plex %s", p->name);
+ return (error);
+ }
+ return (0);
+}
+
+/* Return a good plex from volume v. */
+static struct gv_plex *
+gv_find_good_plex(struct gv_volume *v)
+{
+ struct gv_plex *up;
/* Find the plex that's up. */
up = NULL;
@@ -331,341 +223,166 @@ gv_sync(struct gv_volume *v)
if (up->state == GV_PLEX_UP)
break;
}
-
/* Didn't find a good plex. */
- if (up == NULL)
- return (ENXIO);
-
- LIST_FOREACH(p, &v->plexes, in_volume) {
- if ((p == up) || (p->state == GV_PLEX_UP))
- continue;
- if (p->flags & GV_PLEX_SYNCING) {
- return (EINPROGRESS);
- }
- p->flags |= GV_PLEX_SYNCING;
- sync = g_malloc(sizeof(*sync), M_WAITOK | M_ZERO);
- sync->v = v;
- sync->from = up;
- sync->to = p;
- sync->syncsize = GV_DFLT_SYNCSIZE;
- kproc_create(gv_sync_td, sync, NULL, 0, 0, "gv_sync '%s'",
- p->name);
- }
-
- return (0);
+ return (up);
}
static int
-gv_rebuild_plex(struct gv_plex *p)
-{
- struct gv_sync_args *sync;
-
- if (gv_is_open(p->geom))
- return (EBUSY);
-
- if (p->flags & GV_PLEX_SYNCING)
- return (EINPROGRESS);
- p->flags |= GV_PLEX_SYNCING;
-
- sync = g_malloc(sizeof(*sync), M_WAITOK | M_ZERO);
- sync->to = p;
- sync->syncsize = GV_DFLT_SYNCSIZE;
-
- kproc_create(gv_rebuild_td, sync, NULL, 0, 0, "gv_rebuild %s",
- p->name);
-
- return (0);
-}
-
-static int
-gv_init_plex(struct gv_plex *p)
-{
- struct gv_sd *s;
-
- KASSERT(p != NULL, ("gv_init_plex: NULL p"));
-
- LIST_FOREACH(s, &p->subdisks, in_plex) {
- if (s->state == GV_SD_INITIALIZING)
- return (EINPROGRESS);
- gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE);
- s->init_size = GV_DFLT_SYNCSIZE;
- kproc_create(gv_init_td, s, NULL, 0, 0, "gv_init %s",
- s->name);
- }
-
- return (0);
-}
-
-/* This thread is responsible for rebuilding a degraded RAID5 plex. */
-void
-gv_rebuild_td(void *arg)
+gv_sync(struct gv_volume *v)
{
- struct bio *bp;
- struct gv_plex *p;
- struct g_consumer *cp;
- struct gv_sync_args *sync;
- u_char *buf;
- off_t i;
+ struct gv_softc *sc;
+ struct gv_plex *p, *up;
int error;
- buf = NULL;
- bp = NULL;
+ KASSERT(v != NULL, ("gv_sync: NULL v"));
+ sc = v->vinumconf;
+ KASSERT(sc != NULL, ("gv_sync: NULL sc on %s", v->name));
- sync = arg;
- p = sync->to;
- p->synced = 0;
- cp = p->consumer;
+ up = gv_find_good_plex(v);
+ if (up == NULL)
+ return (ENXIO);
g_topology_lock();
- error = g_access(cp, 1, 1, 0);
+ error = gv_access(v->provider, 1, 1, 0);
if (error) {
g_topology_unlock();
- G_VINUM_DEBUG(0, "rebuild of %s failed to access consumer: "
- "%d", p->name, error);
- kproc_exit(error);
+ G_VINUM_DEBUG(0, "sync from '%s' failed to access volume: %d",
+ up->name, error);
+ return (error);
}
g_topology_unlock();
- buf = g_malloc(sync->syncsize, M_WAITOK);
-
- G_VINUM_DEBUG(1, "rebuild of %s started", p->name);
- i = 0;
- for (i = 0; i < p->size; i += (p->stripesize * (p->sdcount - 1))) {
-/*
- if (i + sync->syncsize > p->size)
- sync->syncsize = p->size - i;
-*/
- bp = g_new_bio();
- if (bp == NULL) {
- G_VINUM_DEBUG(0, "rebuild of %s failed creating bio: "
- "out of memory", p->name);
- break;
- }
- bp->bio_cmd = BIO_WRITE;
- bp->bio_done = NULL;
- bp->bio_data = buf;
- bp->bio_cflags |= GV_BIO_REBUILD;
- bp->bio_offset = i;
- bp->bio_length = p->stripesize;
-
- /* Schedule it down ... */
- g_io_request(bp, cp);
-
- /* ... and wait for the result. */
- error = biowait(bp, "gwrite");
- if (error) {
- G_VINUM_DEBUG(0, "rebuild of %s failed at offset %jd "
- "errno: %d", p->name, i, error);
+ /* Go through the good plex, and issue BIO's to all other plexes. */
+ LIST_FOREACH(p, &v->plexes, in_volume) {
+ error = gv_sync_plex(p, up);
+ if (error)
break;
- }
- g_destroy_bio(bp);
- bp = NULL;
}
-
- if (bp != NULL)
- g_destroy_bio(bp);
- if (buf != NULL)
- g_free(buf);
-
- g_topology_lock();
- g_access(cp, -1, -1, 0);
- gv_save_config_all(p->vinumconf);
- g_topology_unlock();
-
- p->flags &= ~GV_PLEX_SYNCING;
- p->synced = 0;
-
- /* Successful initialization. */
- if (!error)
- G_VINUM_DEBUG(1, "rebuild of %s finished", p->name);
-
- g_free(sync);
- kproc_exit(error);
+ return (0);
}
-void
-gv_sync_td(void *arg)
+static int
+gv_rebuild_plex(struct gv_plex *p)
{
- struct bio *bp;
- struct gv_plex *p;
- struct g_consumer *from, *to;
- struct gv_sync_args *sync;
- u_char *buf;
- off_t i;
+ struct gv_drive *d;
+ struct gv_sd *s;
int error;
- sync = arg;
-
- from = sync->from->consumer;
- to = sync->to->consumer;
-
- p = sync->to;
+ if (p->flags & GV_PLEX_SYNCING ||
+ p->flags & GV_PLEX_REBUILDING ||
+ p->flags & GV_PLEX_GROWING)
+ return (EINPROGRESS);
+ /*
+ * Make sure that all subdisks have consumers. We won't allow a rebuild
+ * unless every subdisk have one.
+ */
+ LIST_FOREACH(s, &p->subdisks, in_plex) {
+ d = s->drive_sc;
+ if (d == NULL || (d->flags & GV_DRIVE_REFERENCED)) {
+ G_VINUM_DEBUG(0, "can't rebuild %s, subdisk(s) have no "
+ "drives", p->name);
+ return (ENXIO);
+ }
+ }
+ p->flags |= GV_PLEX_REBUILDING;
p->synced = 0;
- error = 0;
-
+ g_topology_assert_not();
g_topology_lock();
- error = g_access(from, 1, 0, 0);
+ error = gv_access(p->vol_sc->provider, 1, 1, 0);
if (error) {
- g_topology_unlock();
- G_VINUM_DEBUG(0, "sync from '%s' failed to access "
- "consumer: %d", sync->from->name, error);
- g_free(sync);
- kproc_exit(error);
- }
- error = g_access(to, 0, 1, 0);
- if (error) {
- g_access(from, -1, 0, 0);
- g_topology_unlock();
- G_VINUM_DEBUG(0, "sync to '%s' failed to access "
- "consumer: %d", p->name, error);
- g_free(sync);
- kproc_exit(error);
+ G_VINUM_DEBUG(0, "unable to access provider");
+ return (0);
}
g_topology_unlock();
- G_VINUM_DEBUG(1, "plex sync %s -> %s started", sync->from->name,
- sync->to->name);
- for (i = 0; i < p->size; i+= sync->syncsize) {
- /* Read some bits from the good plex. */
- buf = g_read_data(from, i, sync->syncsize, &error);
- if (buf == NULL) {
- G_VINUM_DEBUG(0, "sync read from '%s' failed at "
- "offset %jd; errno: %d", sync->from->name, i,
- error);
- break;
- }
-
- /*
- * Create a bio and schedule it down on the 'bad' plex. We
- * cannot simply use g_write_data() because we have to let the
- * lower parts know that we are an initialization process and
- * not a 'normal' request.
- */
- bp = g_new_bio();
- if (bp == NULL) {
- G_VINUM_DEBUG(0, "sync write to '%s' failed at "
- "offset %jd; out of memory", p->name, i);
- g_free(buf);
- break;
- }
- bp->bio_cmd = BIO_WRITE;
- bp->bio_offset = i;
- bp->bio_length = sync->syncsize;
- bp->bio_data = buf;
- bp->bio_done = NULL;
-
- /*
- * This hack declare this bio as part of an initialization
- * process, so that the lower levels allow it to get through.
- */
- bp->bio_cflags |= GV_BIO_SYNCREQ;
-
- /* Schedule it down ... */
- g_io_request(bp, to);
+ gv_parity_request(p, GV_BIO_REBUILD, 0);
+ return (0);
+}
- /* ... and wait for the result. */
- error = biowait(bp, "gwrite");
- g_destroy_bio(bp);
- g_free(buf);
- if (error) {
- G_VINUM_DEBUG(0, "sync write to '%s' failed at "
- "offset %jd; errno: %d\n", p->name, i, error);
- break;
- }
+static int
+gv_grow_plex(struct gv_plex *p)
+{
+ struct gv_volume *v;
+ struct gv_sd *s;
+ off_t origsize, origlength;
+ int error, sdcount;
- /* Note that we have synced a little bit more. */
- p->synced += sync->syncsize;
- }
+ KASSERT(p != NULL, ("gv_grow_plex: NULL p"));
+ v = p->vol_sc;
+ KASSERT(v != NULL, ("gv_grow_plex: NULL v"));
+ if (p->flags & GV_PLEX_GROWING ||
+ p->flags & GV_PLEX_SYNCING ||
+ p->flags & GV_PLEX_REBUILDING)
+ return (EINPROGRESS);
g_topology_lock();
- g_access(from, -1, 0, 0);
- g_access(to, 0, -1, 0);
- gv_save_config_all(p->vinumconf);
+ error = gv_access(v->provider, 1, 1, 0);
g_topology_unlock();
+ if (error) {
+ G_VINUM_DEBUG(0, "unable to access provider");
+ return (error);
+ }
- /* Successful initialization. */
- if (!error)
- G_VINUM_DEBUG(1, "plex sync %s -> %s finished",
- sync->from->name, sync->to->name);
-
- p->flags &= ~GV_PLEX_SYNCING;
+ /* XXX: This routine with finding origsize is used two other places as
+ * well, so we should create a function for it. */
+ sdcount = p->sdcount;
+ LIST_FOREACH(s, &p->subdisks, in_plex) {
+ if (s->flags & GV_SD_GROW)
+ sdcount--;
+ }
+ s = LIST_FIRST(&p->subdisks);
+ if (s == NULL) {
+ G_VINUM_DEBUG(0, "error growing plex without subdisks");
+ return (GV_ERR_NOTFOUND);
+ }
+ p->flags |= GV_PLEX_GROWING;
+ origsize = (sdcount - 1) * s->size;
+ origlength = (sdcount - 1) * p->stripesize;
p->synced = 0;
+ G_VINUM_DEBUG(1, "starting growing of plex %s", p->name);
+ gv_grow_request(p, 0, MIN(origlength, origsize), BIO_READ, NULL);
- g_free(sync);
- kproc_exit(error);
+ return (0);
}
-void
-gv_init_td(void *arg)
+static int
+gv_init_plex(struct gv_plex *p)
{
- struct gv_sd *s;
struct gv_drive *d;
- struct g_geom *gp;
- struct g_consumer *cp;
+ struct gv_sd *s;
int error;
- off_t i, init_size, start, offset, length;
- u_char *buf;
-
- s = arg;
- KASSERT(s != NULL, ("gv_init_td: NULL s"));
- d = s->drive_sc;
- KASSERT(d != NULL, ("gv_init_td: NULL d"));
- gp = d->geom;
- KASSERT(gp != NULL, ("gv_init_td: NULL gp"));
-
- cp = LIST_FIRST(&gp->consumer);
- KASSERT(cp != NULL, ("gv_init_td: NULL cp"));
+ off_t start;
+ caddr_t data;
- s->init_error = 0;
- init_size = s->init_size;
- start = s->drive_offset + s->initialized;
- offset = s->drive_offset;
- length = s->size;
-
- buf = g_malloc(s->init_size, M_WAITOK | M_ZERO);
-
- g_topology_lock();
- error = g_access(cp, 0, 1, 0);
- if (error) {
- s->init_error = error;
- g_topology_unlock();
- G_VINUM_DEBUG(0, "subdisk '%s' init: failed to access "
- "consumer; error: %d", s->name, error);
- kproc_exit(error);
- }
- g_topology_unlock();
+ KASSERT(p != NULL, ("gv_init_plex: NULL p"));
- for (i = start; i < offset + length; i += init_size) {
- error = g_write_data(cp, i, buf, init_size);
- if (error) {
- G_VINUM_DEBUG(0, "subdisk '%s' init: write failed"
- " at offset %jd (drive offset %jd); error %d",
- s->name, (intmax_t)s->initialized, (intmax_t)i,
- error);
+ LIST_FOREACH(s, &p->subdisks, in_plex) {
+ if (s->state == GV_SD_INITIALIZING)
+ return (EINPROGRESS);
+ gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE);
+ s->init_size = GV_DFLT_SYNCSIZE;
+ start = s->drive_offset + s->initialized;
+ d = s->drive_sc;
+ if (d == NULL) {
+ G_VINUM_DEBUG(0, "subdisk %s has no drive yet", s->name);
break;
}
- s->initialized += init_size;
- }
-
- g_free(buf);
-
- g_topology_lock();
- g_access(cp, 0, -1, 0);
- g_topology_unlock();
- if (error) {
- s->init_error = error;
- g_topology_lock();
- gv_set_sd_state(s, GV_SD_STALE,
- GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG);
- g_topology_unlock();
- } else {
+ /*
+ * Take the lock here since we need to avoid a race in
+ * gv_init_request if the BIO is completed before the lock is
+ * released.
+ */
g_topology_lock();
- gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_CONFIG);
+ error = g_access(d->consumer, 0, 1, 0);
g_topology_unlock();
- s->initialized = 0;
- G_VINUM_DEBUG(1, "subdisk '%s' init: finished successfully",
- s->name);
+ if (error) {
+ G_VINUM_DEBUG(0, "error accessing consumer when "
+ "initializing %s", s->name);
+ break;
+ }
+ data = g_malloc(s->init_size, M_WAITOK | M_ZERO);
+ gv_init_request(s, start, data, s->init_size);
}
- kproc_exit(error);
+ return (0);
}
diff --git a/sys/geom/vinum/geom_vinum_list.c b/sys/geom/vinum/geom_vinum_list.c
index b1a668f..00b89495 100644
--- a/sys/geom/vinum/geom_vinum_list.c
+++ b/sys/geom/vinum/geom_vinum_list.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2004 Lukas Ertl
+ * Copyright (c) 2004, 2007 Lukas Ertl
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -28,7 +28,6 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#include <sys/param.h>
#include <sys/libkern.h>
#include <sys/malloc.h>
@@ -302,19 +301,37 @@ gv_lpi(struct gv_plex *p, struct sbuf *sb, int flags)
sbuf_printf(sb, "Plex %s:\tSize:\t%9jd bytes (%jd MB)\n",
p->name, (intmax_t)p->size, (intmax_t)p->size / MEGABYTE);
sbuf_printf(sb, "\t\tSubdisks: %8d\n", p->sdcount);
- sbuf_printf(sb, "\t\tState: %s\n\t\tOrganization: %s",
- gv_plexstate(p->state), gv_plexorg(p->org));
+ sbuf_printf(sb, "\t\tState: %s\n", gv_plexstate(p->state));
+ if ((p->flags & GV_PLEX_SYNCING) ||
+ (p->flags & GV_PLEX_GROWING) ||
+ (p->flags & GV_PLEX_REBUILDING)) {
+ sbuf_printf(sb, "\t\tSynced: ");
+ sbuf_printf(sb, "%16jd bytes (%d%%)\n",
+ (intmax_t)p->synced,
+ (p->size > 0) ? (int)((p->synced * 100) / p->size) :
+ 0);
+ }
+ sbuf_printf(sb, "\t\tOrganization: %s", gv_plexorg(p->org));
if (gv_is_striped(p)) {
sbuf_printf(sb, "\tStripe size: %s\n",
gv_roughlength(p->stripesize, 1));
}
+ sbuf_printf(sb, "\t\tFlags: %d\n", p->flags);
if (p->vol_sc != NULL) {
sbuf_printf(sb, "\t\tPart of volume %s\n", p->volume);
}
} else {
- sbuf_printf(sb, "P %-18s %2s State: %s\tSubdisks: %5d"
- "\tSize: %s\n", p->name, gv_plexorg_short(p->org),
- gv_plexstate(p->state), p->sdcount,
+ sbuf_printf(sb, "P %-18s %2s State: ", p->name,
+ gv_plexorg_short(p->org));
+ if ((p->flags & GV_PLEX_SYNCING) ||
+ (p->flags & GV_PLEX_GROWING) ||
+ (p->flags & GV_PLEX_REBUILDING)) {
+ sbuf_printf(sb, "S %d%%\t", (int)((p->synced * 100) /
+ p->size));
+ } else {
+ sbuf_printf(sb, "%s\t", gv_plexstate(p->state));
+ }
+ sbuf_printf(sb, "Subdisks: %5d\tSize: %s\n", p->sdcount,
gv_roughlength(p->size, 0));
}
@@ -396,6 +413,7 @@ gv_lsi(struct gv_sd *s, struct sbuf *sb, int flags)
s->drive_sc == NULL ? "*missing*" : s->drive_sc->name,
(intmax_t)s->drive_offset,
gv_roughlength(s->drive_offset, 1));
+ sbuf_printf(sb, "\t\tFlags: %d\n", s->flags);
} else {
sbuf_printf(sb, "S %-21s State: ", s->name);
if (s->state == GV_SD_INITIALIZING ||
@@ -455,6 +473,7 @@ gv_ldi(struct gv_drive *d, struct sbuf *sb, int flags)
sbuf_printf(sb, "\t\tAvailable: %11jd bytes (%jd MB)\n",
(intmax_t)d->avail, (intmax_t)d->avail / MEGABYTE);
sbuf_printf(sb, "\t\tState: %s\n", gv_drivestate(d->state));
+ sbuf_printf(sb, "\t\tFlags: %d\n", d->flags);
/* Be very verbose. */
if (flags & GV_FLAG_VV) {
@@ -469,7 +488,7 @@ gv_ldi(struct gv_drive *d, struct sbuf *sb, int flags)
sbuf_printf(sb, "D %-21s State: %s\t/dev/%s\tA: %jd/%jd MB "
"(%d%%)\n", d->name, gv_drivestate(d->state), d->device,
(intmax_t)d->avail / MEGABYTE, (intmax_t)d->size / MEGABYTE,
- (int)((d->avail * 100) / d->size));
+ d->size > 0 ? (int)((d->avail * 100) / d->size) : 0);
}
/* Recursive listing. */
diff --git a/sys/geom/vinum/geom_vinum_move.c b/sys/geom/vinum/geom_vinum_move.c
index 04822bc..e55a6a2 100644
--- a/sys/geom/vinum/geom_vinum_move.c
+++ b/sys/geom/vinum/geom_vinum_move.c
@@ -32,26 +32,21 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#include <sys/param.h>
#include <sys/libkern.h>
-#include <sys/kernel.h>
#include <sys/malloc.h>
#include <geom/geom.h>
#include <geom/vinum/geom_vinum_var.h>
#include <geom/vinum/geom_vinum.h>
-#include <geom/vinum/geom_vinum_share.h>
-
-static int gv_move_sd(struct gv_softc *, struct gctl_req *,
- struct gv_sd *, char *, int);
void
gv_move(struct g_geom *gp, struct gctl_req *req)
{
struct gv_softc *sc;
struct gv_sd *s;
+ struct gv_drive *d;
char buf[20], *destination, *object;
- int *argc, err, *flags, i, type;
+ int *argc, *flags, i, type;
sc = gp->softc;
@@ -74,6 +69,7 @@ gv_move(struct g_geom *gp, struct gctl_req *req)
gctl_error(req, "destination '%s' is not a drive", destination);
return;
}
+ d = gv_find_drive(sc, destination);
/*
* We start with 1 here, because argv[0] on the command line is the
@@ -97,67 +93,59 @@ gv_move(struct g_geom *gp, struct gctl_req *req)
gctl_error(req, "unknown subdisk '%s'", object);
return;
}
- err = gv_move_sd(sc, req, s, destination, *flags);
- if (err)
- return;
+ gv_post_event(sc, GV_EVENT_MOVE_SD, s, d, *flags, 0);
}
-
- gv_save_config_all(sc);
}
/* Move a subdisk. */
-static int
-gv_move_sd(struct gv_softc *sc, struct gctl_req *req, struct gv_sd *cursd, char *destination, int flags)
+int
+gv_move_sd(struct gv_softc *sc, struct gv_sd *cursd,
+ struct gv_drive *destination, int flags)
{
struct gv_drive *d;
struct gv_sd *newsd, *s, *s2;
struct gv_plex *p;
- struct g_consumer *cp;
- char errstr[ERRBUFSIZ];
int err;
g_topology_assert();
KASSERT(cursd != NULL, ("gv_move_sd: NULL cursd"));
+ KASSERT(destination != NULL, ("gv_move_sd: NULL destination"));
- cp = cursd->consumer;
+ d = cursd->drive_sc;
- if (cp != NULL && (cp->acr || cp->acw || cp->ace)) {
- gctl_error(req, "subdisk '%s' is busy", cursd->name);
- return (-1);
+ if ((gv_consumer_is_open(d->consumer) ||
+ gv_consumer_is_open(destination->consumer)) &&
+ !(flags && GV_FLAG_F)) {
+ G_VINUM_DEBUG(0, "consumers on current and destination drive "
+ " still open");
+ return (GV_ERR_ISBUSY);
}
if (!(flags && GV_FLAG_F)) {
- gctl_error(req, "-f flag not passed; move would be "
+ G_VINUM_DEBUG(1, "-f flag not passed; move would be "
"destructive");
- return (-1);
+ return (GV_ERR_INVFLAG);
}
- d = gv_find_drive(sc, destination);
- if (d == NULL) {
- gctl_error(req, "destination drive '%s' not found",
- destination);
- return (-1);
- }
-
- if (d == cursd->drive_sc) {
- gctl_error(req, "subdisk '%s' already on drive '%s'",
- cursd->name, destination);
- return (-1);
+ if (destination == cursd->drive_sc) {
+ G_VINUM_DEBUG(1, "subdisk '%s' already on drive '%s'",
+ cursd->name, destination->name);
+ return (GV_ERR_ISATTACHED);
}
/* XXX: Does it have to be part of a plex? */
p = gv_find_plex(sc, cursd->plex);
if (p == NULL) {
- gctl_error(req, "subdisk '%s' is not part of a plex",
+ G_VINUM_DEBUG(0, "subdisk '%s' is not part of a plex",
cursd->name);
- return (-1);
+ return (GV_ERR_NOTFOUND);
}
-
+
/* Stale the old subdisk. */
err = gv_set_sd_state(cursd, GV_SD_STALE,
GV_SETSTATE_FORCE | GV_SETSTATE_CONFIG);
if (err) {
- gctl_error(req, "could not set the subdisk '%s' to state "
+ G_VINUM_DEBUG(0, "could not set the subdisk '%s' to state "
"'stale'", cursd->name);
return (err);
}
@@ -171,54 +159,30 @@ gv_move_sd(struct gv_softc *sc, struct gctl_req *req, struct gv_sd *cursd, char
newsd->plex_offset = cursd->plex_offset;
newsd->size = cursd->size;
newsd->drive_offset = -1;
- strncpy(newsd->name, cursd->name, GV_MAXSDNAME);
- strncpy(newsd->drive, destination, GV_MAXDRIVENAME);
- strncpy(newsd->plex, cursd->plex, GV_MAXPLEXNAME);
+ strlcpy(newsd->name, cursd->name, sizeof(newsd->name));
+ strlcpy(newsd->drive, destination->name, sizeof(newsd->drive));
+ strlcpy(newsd->plex, cursd->plex, sizeof(newsd->plex));
newsd->state = GV_SD_STALE;
newsd->vinumconf = cursd->vinumconf;
- err = gv_sd_to_drive(sc, d, newsd, errstr, ERRBUFSIZ);
+ err = gv_sd_to_drive(newsd, destination);
if (err) {
/* XXX not enough free space? */
- gctl_error(req, errstr);
g_free(newsd);
return (err);
}
/* Replace the old sd by the new one. */
- if (cp != NULL)
- g_detach(cp);
LIST_FOREACH_SAFE(s, &p->subdisks, in_plex, s2) {
if (s == cursd) {
- p->sdcount--;
- p->size -= s->size;
- err = gv_rm_sd(sc, req, s, 0);
- if (err)
- return (err);
-
+ gv_rm_sd(sc, s);
}
}
-
- gv_sd_to_plex(p, newsd, 1);
-
- /* Creates the new providers.... */
- gv_drive_modify(d);
-
- /* And reconnect the consumer ... */
- if (cp != NULL) {
- newsd->consumer = cp;
- err = g_attach(cp, newsd->provider);
- if (err) {
- g_destroy_consumer(cp);
- gctl_error(req, "proposed move would create a loop "
- "in GEOM config");
- return (err);
- }
- }
-
+ gv_sd_to_plex(newsd, p);
LIST_INSERT_HEAD(&sc->subdisks, newsd, sd);
-
- gv_save_config_all(sc);
-
+ /* Update volume size of plex. */
+ if (p->vol_sc != NULL)
+ gv_update_vol_size(p->vol_sc, gv_vol_size(p->vol_sc));
+ gv_save_config(p->vinumconf);
return (0);
}
diff --git a/sys/geom/vinum/geom_vinum_plex.c b/sys/geom/vinum/geom_vinum_plex.c
index 8281cb2..f177068 100644
--- a/sys/geom/vinum/geom_vinum_plex.c
+++ b/sys/geom/vinum/geom_vinum_plex.c
@@ -1,5 +1,6 @@
/*-
- * Copyright (c) 2004 Lukas Ertl
+ * Copyright (c) 2004, 2007 Lukas Ertl
+ * Copyright (c) 2007, 2009 Ulf Lilleengen
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -29,13 +30,8 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/bio.h>
-#include <sys/kernel.h>
-#include <sys/kthread.h>
-#include <sys/libkern.h>
#include <sys/lock.h>
#include <sys/malloc.h>
-#include <sys/module.h>
-#include <sys/mutex.h>
#include <sys/systm.h>
#include <geom/geom.h>
@@ -43,329 +39,422 @@ __FBSDID("$FreeBSD$");
#include <geom/vinum/geom_vinum_raid5.h>
#include <geom/vinum/geom_vinum.h>
-static void gv_plex_completed_request(struct gv_plex *, struct bio *);
-static void gv_plex_normal_request(struct gv_plex *, struct bio *);
-static void gv_plex_worker(void *);
-static int gv_check_parity(struct gv_plex *, struct bio *,
- struct gv_raid5_packet *);
-static int gv_normal_parity(struct gv_plex *, struct bio *,
- struct gv_raid5_packet *);
-
-/* XXX: is this the place to catch dying subdisks? */
-static void
-gv_plex_orphan(struct g_consumer *cp)
+static int gv_check_parity(struct gv_plex *, struct bio *,
+ struct gv_raid5_packet *);
+static int gv_normal_parity(struct gv_plex *, struct bio *,
+ struct gv_raid5_packet *);
+static void gv_plex_flush(struct gv_plex *);
+static int gv_plex_offset(struct gv_plex *, off_t, off_t, off_t *, off_t *,
+ int *, int);
+static int gv_plex_normal_request(struct gv_plex *, struct bio *, off_t,
+ off_t, caddr_t);
+void
+gv_plex_start(struct gv_plex *p, struct bio *bp)
{
- struct g_geom *gp;
- struct gv_plex *p;
- int error;
+ struct bio *cbp;
+ struct gv_sd *s;
+ struct gv_raid5_packet *wp;
+ caddr_t addr;
+ off_t bcount, boff, len;
- g_topology_assert();
- gp = cp->geom;
- g_trace(G_T_TOPOLOGY, "gv_plex_orphan(%s)", gp->name);
-
- if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0)
- g_access(cp, -cp->acr, -cp->acw, -cp->ace);
- error = cp->provider->error;
- if (error == 0)
- error = ENXIO;
- g_detach(cp);
- g_destroy_consumer(cp);
- if (!LIST_EMPTY(&gp->consumer))
- return;
+ bcount = bp->bio_length;
+ addr = bp->bio_data;
+ boff = bp->bio_offset;
+
+ /* Walk over the whole length of the request, we might split it up. */
+ while (bcount > 0) {
+ wp = NULL;
+
+ /*
+ * RAID5 plexes need special treatment, as a single request
+ * might involve several read/write sub-requests.
+ */
+ if (p->org == GV_PLEX_RAID5) {
+ wp = gv_raid5_start(p, bp, addr, boff, bcount);
+ if (wp == NULL)
+ return;
+
+ len = wp->length;
- p = gp->softc;
- if (p != NULL) {
- gv_kill_plex_thread(p);
- p->geom = NULL;
- p->provider = NULL;
- p->consumer = NULL;
+ if (TAILQ_EMPTY(&wp->bits))
+ g_free(wp);
+ else if (wp->lockbase != -1)
+ TAILQ_INSERT_TAIL(&p->packets, wp, list);
+
+ /*
+ * Requests to concatenated and striped plexes go straight
+ * through.
+ */
+ } else {
+ len = gv_plex_normal_request(p, bp, boff, bcount, addr);
+ }
+ if (len < 0)
+ return;
+
+ bcount -= len;
+ addr += len;
+ boff += len;
}
- gp->softc = NULL;
- g_wither_geom(gp, error);
-}
-void
-gv_plex_done(struct bio *bp)
-{
- struct gv_plex *p;
-
- p = bp->bio_from->geom->softc;
- bp->bio_cflags |= GV_BIO_DONE;
- mtx_lock(&p->bqueue_mtx);
- bioq_insert_tail(p->bqueue, bp);
- wakeup(p);
- mtx_unlock(&p->bqueue_mtx);
+ /*
+ * Fire off all sub-requests. We get the correct consumer (== drive)
+ * to send each request to via the subdisk that was stored in
+ * cbp->bio_caller1.
+ */
+ cbp = bioq_takefirst(p->bqueue);
+ while (cbp != NULL) {
+ /*
+ * RAID5 sub-requests need to come in correct order, otherwise
+ * we trip over the parity, as it might be overwritten by
+ * another sub-request. We abuse cbp->bio_caller2 to mark
+ * potential overlap situations.
+ */
+ if (cbp->bio_caller2 != NULL && gv_stripe_active(p, cbp)) {
+ /* Park the bio on the waiting queue. */
+ cbp->bio_cflags |= GV_BIO_ONHOLD;
+ bioq_disksort(p->wqueue, cbp);
+ } else {
+ s = cbp->bio_caller1;
+ g_io_request(cbp, s->drive_sc->consumer);
+ }
+ cbp = bioq_takefirst(p->bqueue);
+ }
}
-/* Find the correct subdisk to send the bio to and build a bio to send. */
static int
-gv_plexbuffer(struct gv_plex *p, struct bio *bp, caddr_t addr, off_t boff, off_t bcount)
+gv_plex_offset(struct gv_plex *p, off_t boff, off_t bcount, off_t *real_off,
+ off_t *real_len, int *sdno, int growing)
{
- struct g_geom *gp;
struct gv_sd *s;
- struct bio *cbp, *pbp;
- int i, sdno;
- off_t len_left, real_len, real_off;
- off_t stripeend, stripeno, stripestart;
-
- if (p == NULL || LIST_EMPTY(&p->subdisks))
- return (ENXIO);
-
- s = NULL;
- gp = bp->bio_to->geom;
+ int i, sdcount;
+ off_t len_left, stripeend, stripeno, stripestart;
- /*
- * We only handle concatenated and striped plexes here. RAID5 plexes
- * are handled in build_raid5_request().
- */
switch (p->org) {
case GV_PLEX_CONCAT:
/*
* Find the subdisk where this request starts. The subdisks in
* this list must be ordered by plex_offset.
*/
+ i = 0;
LIST_FOREACH(s, &p->subdisks, in_plex) {
if (s->plex_offset <= boff &&
- s->plex_offset + s->size > boff)
+ s->plex_offset + s->size > boff) {
+ *sdno = i;
break;
+ }
+ i++;
}
- /* Subdisk not found. */
- if (s == NULL)
- return (ENXIO);
+ if (s == NULL || s->drive_sc == NULL)
+ return (GV_ERR_NOTFOUND);
/* Calculate corresponding offsets on disk. */
- real_off = boff - s->plex_offset;
- len_left = s->size - real_off;
- real_len = (bcount > len_left) ? len_left : bcount;
+ *real_off = boff - s->plex_offset;
+ len_left = s->size - (*real_off);
+ KASSERT(len_left >= 0, ("gv_plex_offset: len_left < 0"));
+ *real_len = (bcount > len_left) ? len_left : bcount;
break;
case GV_PLEX_STRIPED:
/* The number of the stripe where the request starts. */
stripeno = boff / p->stripesize;
+ KASSERT(stripeno >= 0, ("gv_plex_offset: stripeno < 0"));
- /* The number of the subdisk where the stripe resides. */
- sdno = stripeno % p->sdcount;
-
- /* Find the right subdisk. */
- i = 0;
- LIST_FOREACH(s, &p->subdisks, in_plex) {
- if (i == sdno)
- break;
- i++;
- }
+ /* Take growing subdisks into account when calculating. */
+ sdcount = gv_sdcount(p, (boff >= p->synced));
- /* Subdisk not found. */
- if (s == NULL)
- return (ENXIO);
+ if (!(boff + bcount <= p->synced) &&
+ (p->flags & GV_PLEX_GROWING) &&
+ !growing)
+ return (GV_ERR_ISBUSY);
+ *sdno = stripeno % sdcount;
- /* The offset of the stripe from the start of the subdisk. */
- stripestart = (stripeno / p->sdcount) *
+ KASSERT(sdno >= 0, ("gv_plex_offset: sdno < 0"));
+ stripestart = (stripeno / sdcount) *
p->stripesize;
-
- /* The offset at the end of the stripe. */
+ KASSERT(stripestart >= 0, ("gv_plex_offset: stripestart < 0"));
stripeend = stripestart + p->stripesize;
-
- /* The offset of the request on this subdisk. */
- real_off = boff - (stripeno * p->stripesize) +
+ *real_off = boff - (stripeno * p->stripesize) +
stripestart;
+ len_left = stripeend - *real_off;
+ KASSERT(len_left >= 0, ("gv_plex_offset: len_left < 0"));
- /* The length left in this stripe. */
- len_left = stripeend - real_off;
-
- real_len = (bcount <= len_left) ? bcount : len_left;
+ *real_len = (bcount <= len_left) ? bcount : len_left;
break;
default:
- return (EINVAL);
+ return (GV_ERR_PLEXORG);
+ }
+ return (0);
+}
+
+/*
+ * Prepare a normal plex request.
+ */
+static int
+gv_plex_normal_request(struct gv_plex *p, struct bio *bp, off_t boff,
+ off_t bcount, caddr_t addr)
+{
+ struct gv_sd *s;
+ struct bio *cbp;
+ off_t real_len, real_off;
+ int i, err, sdno;
+
+ s = NULL;
+ sdno = -1;
+ real_len = real_off = 0;
+
+ err = ENXIO;
+
+ if (p == NULL || LIST_EMPTY(&p->subdisks))
+ goto bad;
+
+ err = gv_plex_offset(p, boff, bcount, &real_off,
+ &real_len, &sdno, (bp->bio_pflags & GV_BIO_SYNCREQ));
+ /* If the request was blocked, put it into wait. */
+ if (err == GV_ERR_ISBUSY) {
+ bioq_disksort(p->rqueue, bp);
+ return (-1); /* "Fail", and delay request. */
+ }
+ if (err) {
+ err = ENXIO;
+ goto bad;
}
+ err = ENXIO;
+
+ /* Find the right subdisk. */
+ i = 0;
+ LIST_FOREACH(s, &p->subdisks, in_plex) {
+ if (i == sdno)
+ break;
+ i++;
+ }
+
+ /* Subdisk not found. */
+ if (s == NULL || s->drive_sc == NULL)
+ goto bad;
/* Now check if we can handle the request on this subdisk. */
switch (s->state) {
case GV_SD_UP:
/* If the subdisk is up, just continue. */
break;
-
+ case GV_SD_DOWN:
+ if (bp->bio_cflags & GV_BIO_INTERNAL)
+ G_VINUM_DEBUG(0, "subdisk must be in the stale state in"
+ " order to perform administrative requests");
+ goto bad;
case GV_SD_STALE:
- if (!(bp->bio_cflags & GV_BIO_SYNCREQ))
- return (ENXIO);
+ if (!(bp->bio_cflags & GV_BIO_SYNCREQ)) {
+ G_VINUM_DEBUG(0, "subdisk stale, unable to perform "
+ "regular requests");
+ goto bad;
+ }
G_VINUM_DEBUG(1, "sd %s is initializing", s->name);
gv_set_sd_state(s, GV_SD_INITIALIZING, GV_SETSTATE_FORCE);
break;
-
case GV_SD_INITIALIZING:
if (bp->bio_cmd == BIO_READ)
- return (ENXIO);
+ goto bad;
break;
-
default:
/* All other subdisk states mean it's not accessible. */
- return (ENXIO);
+ goto bad;
}
/* Clone the bio and adjust the offsets and sizes. */
cbp = g_clone_bio(bp);
- if (cbp == NULL)
- return (ENOMEM);
- cbp->bio_offset = real_off;
+ if (cbp == NULL) {
+ err = ENOMEM;
+ goto bad;
+ }
+ cbp->bio_offset = real_off + s->drive_offset;
cbp->bio_length = real_len;
cbp->bio_data = addr;
- cbp->bio_done = g_std_done;
- cbp->bio_caller2 = s->consumer;
- if ((bp->bio_cflags & GV_BIO_SYNCREQ)) {
+ cbp->bio_done = gv_done;
+ cbp->bio_caller1 = s;
+ if ((bp->bio_cflags & GV_BIO_SYNCREQ))
cbp->bio_cflags |= GV_BIO_SYNCREQ;
- cbp->bio_done = gv_plex_done;
- }
- if (bp->bio_driver1 == NULL) {
- bp->bio_driver1 = cbp;
- } else {
- pbp = bp->bio_driver1;
- while (pbp->bio_caller1 != NULL)
- pbp = pbp->bio_caller1;
- pbp->bio_caller1 = cbp;
+ /* Store the sub-requests now and let others issue them. */
+ bioq_insert_tail(p->bqueue, cbp);
+ return (real_len);
+bad:
+ G_VINUM_LOGREQ(0, bp, "plex request failed.");
+ /* Building the sub-request failed. If internal BIO, do not deliver. */
+ if (bp->bio_cflags & GV_BIO_INTERNAL) {
+ if (bp->bio_cflags & GV_BIO_MALLOC)
+ g_free(bp->bio_data);
+ g_destroy_bio(bp);
+ p->flags &= ~(GV_PLEX_SYNCING | GV_PLEX_REBUILDING |
+ GV_PLEX_GROWING);
+ return (-1);
}
-
- return (0);
+ g_io_deliver(bp, err);
+ return (-1);
}
-static void
-gv_plex_start(struct bio *bp)
+/*
+ * Handle a completed request to a striped or concatenated plex.
+ */
+void
+gv_plex_normal_done(struct gv_plex *p, struct bio *bp)
{
- struct gv_plex *p;
-
- switch(bp->bio_cmd) {
- case BIO_READ:
- case BIO_WRITE:
- case BIO_DELETE:
- break;
- case BIO_GETATTR:
- default:
- g_io_deliver(bp, EOPNOTSUPP);
- return;
- }
+ struct bio *pbp;
- /*
- * We cannot handle this request if too many of our subdisks are
- * inaccessible.
- */
- p = bp->bio_to->geom->softc;
- if ((p->state < GV_PLEX_DEGRADED) &&
- !(bp->bio_cflags & GV_BIO_SYNCREQ)) {
- g_io_deliver(bp, ENXIO);
- return;
+ pbp = bp->bio_parent;
+ if (pbp->bio_error == 0)
+ pbp->bio_error = bp->bio_error;
+ g_destroy_bio(bp);
+ pbp->bio_inbed++;
+ if (pbp->bio_children == pbp->bio_inbed) {
+ /* Just set it to length since multiple plexes will
+ * screw things up. */
+ pbp->bio_completed = pbp->bio_length;
+ if (pbp->bio_cflags & GV_BIO_SYNCREQ)
+ gv_sync_complete(p, pbp);
+ else if (pbp->bio_pflags & GV_BIO_SYNCREQ)
+ gv_grow_complete(p, pbp);
+ else
+ g_io_deliver(pbp, pbp->bio_error);
}
-
- mtx_lock(&p->bqueue_mtx);
- bioq_disksort(p->bqueue, bp);
- wakeup(p);
- mtx_unlock(&p->bqueue_mtx);
}
-static void
-gv_plex_worker(void *arg)
+/*
+ * Handle a completed request to a RAID-5 plex.
+ */
+void
+gv_plex_raid5_done(struct gv_plex *p, struct bio *bp)
{
- struct bio *bp;
- struct gv_plex *p;
- struct gv_sd *s;
+ struct gv_softc *sc;
+ struct bio *cbp, *pbp;
+ struct gv_bioq *bq, *bq2;
+ struct gv_raid5_packet *wp;
+ off_t completed;
+ int i;
- p = arg;
- KASSERT(p != NULL, ("NULL p"));
+ completed = 0;
+ sc = p->vinumconf;
+ wp = bp->bio_caller2;
- mtx_lock(&p->bqueue_mtx);
- for (;;) {
- /* We were signaled to exit. */
- if (p->flags & GV_PLEX_THREAD_DIE)
+ switch (bp->bio_parent->bio_cmd) {
+ case BIO_READ:
+ if (wp == NULL) {
+ completed = bp->bio_completed;
break;
+ }
- /* Take the first BIO from our queue. */
- bp = bioq_takefirst(p->bqueue);
- if (bp == NULL) {
- msleep(p, &p->bqueue_mtx, PRIBIO, "-", hz/10);
- continue;
+ TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
+ if (bq->bp != bp)
+ continue;
+ TAILQ_REMOVE(&wp->bits, bq, queue);
+ g_free(bq);
+ for (i = 0; i < wp->length; i++)
+ wp->data[i] ^= bp->bio_data[i];
+ break;
}
- mtx_unlock(&p->bqueue_mtx);
-
- /* A completed request. */
- if (bp->bio_cflags & GV_BIO_DONE) {
- if (bp->bio_cflags & GV_BIO_SYNCREQ ||
- bp->bio_cflags & GV_BIO_REBUILD) {
- s = bp->bio_to->private;
- if (bp->bio_error == 0)
- s->initialized += bp->bio_length;
- if (s->initialized >= s->size) {
- g_topology_lock();
- gv_set_sd_state(s, GV_SD_UP,
- GV_SETSTATE_CONFIG);
- g_topology_unlock();
- s->initialized = 0;
+ if (TAILQ_EMPTY(&wp->bits)) {
+ completed = wp->length;
+ if (wp->lockbase != -1) {
+ TAILQ_REMOVE(&p->packets, wp, list);
+ /* Bring the waiting bios back into the game. */
+ pbp = bioq_takefirst(p->wqueue);
+ while (pbp != NULL) {
+ mtx_lock(&sc->queue_mtx);
+ bioq_disksort(sc->bqueue, pbp);
+ mtx_unlock(&sc->queue_mtx);
+ pbp = bioq_takefirst(p->wqueue);
}
}
+ g_free(wp);
+ }
- if (bp->bio_cflags & GV_BIO_SYNCREQ)
- g_std_done(bp);
- else
- gv_plex_completed_request(p, bp);
- /*
- * A sub-request that was hold back because it interfered with
- * another sub-request.
- */
- } else if (bp->bio_cflags & GV_BIO_ONHOLD) {
- /* Is it still locked out? */
- if (gv_stripe_active(p, bp)) {
- /* Park the bio on the waiting queue. */
- mtx_lock(&p->bqueue_mtx);
- bioq_disksort(p->wqueue, bp);
- mtx_unlock(&p->bqueue_mtx);
- } else {
- bp->bio_cflags &= ~GV_BIO_ONHOLD;
- g_io_request(bp, bp->bio_caller2);
- }
+ break;
- /* A normal request to this plex. */
- } else
- gv_plex_normal_request(p, bp);
+ case BIO_WRITE:
+ /* XXX can this ever happen? */
+ if (wp == NULL) {
+ completed = bp->bio_completed;
+ break;
+ }
- mtx_lock(&p->bqueue_mtx);
- }
- mtx_unlock(&p->bqueue_mtx);
- p->flags |= GV_PLEX_THREAD_DEAD;
- wakeup(p);
+ /* Check if we need to handle parity data. */
+ TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
+ if (bq->bp != bp)
+ continue;
+ TAILQ_REMOVE(&wp->bits, bq, queue);
+ g_free(bq);
+ cbp = wp->parity;
+ if (cbp != NULL) {
+ for (i = 0; i < wp->length; i++)
+ cbp->bio_data[i] ^= bp->bio_data[i];
+ }
+ break;
+ }
- kproc_exit(ENXIO);
-}
+ /* Handle parity data. */
+ if (TAILQ_EMPTY(&wp->bits)) {
+ if (bp->bio_parent->bio_cflags & GV_BIO_CHECK)
+ i = gv_check_parity(p, bp, wp);
+ else
+ i = gv_normal_parity(p, bp, wp);
-static int
-gv_normal_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp)
-{
- struct bio *cbp, *pbp;
- int finished, i;
+ /* All of our sub-requests have finished. */
+ if (i) {
+ completed = wp->length;
+ TAILQ_REMOVE(&p->packets, wp, list);
+ /* Bring the waiting bios back into the game. */
+ pbp = bioq_takefirst(p->wqueue);
+ while (pbp != NULL) {
+ mtx_lock(&sc->queue_mtx);
+ bioq_disksort(sc->bqueue, pbp);
+ mtx_unlock(&sc->queue_mtx);
+ pbp = bioq_takefirst(p->wqueue);
+ }
+ g_free(wp);
+ }
+ }
- finished = 1;
+ break;
+ }
- if (wp->waiting != NULL) {
- pbp = wp->waiting;
- wp->waiting = NULL;
- cbp = wp->parity;
- for (i = 0; i < wp->length; i++)
- cbp->bio_data[i] ^= pbp->bio_data[i];
- g_io_request(pbp, pbp->bio_caller2);
- finished = 0;
+ pbp = bp->bio_parent;
+ if (pbp->bio_error == 0)
+ pbp->bio_error = bp->bio_error;
+ pbp->bio_completed += completed;
- } else if (wp->parity != NULL) {
- cbp = wp->parity;
- wp->parity = NULL;
- g_io_request(cbp, cbp->bio_caller2);
- finished = 0;
+ /* When the original request is finished, we deliver it. */
+ pbp->bio_inbed++;
+ if (pbp->bio_inbed == pbp->bio_children) {
+ /* Hand it over for checking or delivery. */
+ if (pbp->bio_cmd == BIO_WRITE &&
+ (pbp->bio_cflags & GV_BIO_CHECK)) {
+ gv_parity_complete(p, pbp);
+ } else if (pbp->bio_cmd == BIO_WRITE &&
+ (pbp->bio_cflags & GV_BIO_REBUILD)) {
+ gv_rebuild_complete(p, pbp);
+ } else if (pbp->bio_cflags & GV_BIO_INIT) {
+ gv_init_complete(p, pbp);
+ } else if (pbp->bio_cflags & GV_BIO_SYNCREQ) {
+ gv_sync_complete(p, pbp);
+ } else if (pbp->bio_pflags & GV_BIO_SYNCREQ) {
+ gv_grow_complete(p, pbp);
+ } else {
+ g_io_deliver(pbp, pbp->bio_error);
+ }
}
- return (finished);
+ /* Clean up what we allocated. */
+ if (bp->bio_cflags & GV_BIO_MALLOC)
+ g_free(bp->bio_data);
+ g_destroy_bio(bp);
}
static int
gv_check_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp)
{
struct bio *pbp;
+ struct gv_sd *s;
int err, finished, i;
err = 0;
@@ -374,7 +463,8 @@ gv_check_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp)
if (wp->waiting != NULL) {
pbp = wp->waiting;
wp->waiting = NULL;
- g_io_request(pbp, pbp->bio_caller2);
+ s = pbp->bio_caller1;
+ g_io_request(pbp, s->drive_sc->consumer);
finished = 0;
} else if (wp->parity != NULL) {
@@ -395,7 +485,8 @@ gv_check_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp)
/* ... but we rebuild it. */
if (bp->bio_parent->bio_cflags & GV_BIO_PARITY) {
- g_io_request(pbp, pbp->bio_caller2);
+ s = pbp->bio_caller1;
+ g_io_request(pbp, s->drive_sc->consumer);
finished = 0;
}
}
@@ -414,454 +505,542 @@ gv_check_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp)
return (finished);
}
-void
-gv_plex_completed_request(struct gv_plex *p, struct bio *bp)
+static int
+gv_normal_parity(struct gv_plex *p, struct bio *bp, struct gv_raid5_packet *wp)
{
struct bio *cbp, *pbp;
- struct gv_bioq *bq, *bq2;
- struct gv_raid5_packet *wp;
- int i;
+ struct gv_sd *s;
+ int finished, i;
- wp = bp->bio_driver1;
+ finished = 1;
- switch (bp->bio_parent->bio_cmd) {
- case BIO_READ:
- if (wp == NULL)
- break;
+ if (wp->waiting != NULL) {
+ pbp = wp->waiting;
+ wp->waiting = NULL;
+ cbp = wp->parity;
+ for (i = 0; i < wp->length; i++)
+ cbp->bio_data[i] ^= pbp->bio_data[i];
+ s = pbp->bio_caller1;
+ g_io_request(pbp, s->drive_sc->consumer);
+ finished = 0;
- TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
- if (bq->bp == bp) {
- TAILQ_REMOVE(&wp->bits, bq, queue);
- g_free(bq);
- for (i = 0; i < wp->length; i++)
- wp->data[i] ^= bp->bio_data[i];
- break;
- }
- }
- if (TAILQ_EMPTY(&wp->bits)) {
- bp->bio_parent->bio_completed += wp->length;
- if (wp->lockbase != -1) {
- TAILQ_REMOVE(&p->packets, wp, list);
- /* Bring the waiting bios back into the game. */
- mtx_lock(&p->bqueue_mtx);
- pbp = bioq_takefirst(p->wqueue);
- while (pbp != NULL) {
- bioq_disksort(p->bqueue, pbp);
- pbp = bioq_takefirst(p->wqueue);
- }
- mtx_unlock(&p->bqueue_mtx);
- }
- g_free(wp);
- }
+ } else if (wp->parity != NULL) {
+ cbp = wp->parity;
+ wp->parity = NULL;
+ s = cbp->bio_caller1;
+ g_io_request(cbp, s->drive_sc->consumer);
+ finished = 0;
+ }
- break;
+ return (finished);
+}
- case BIO_WRITE:
- if (wp == NULL)
- break;
+/* Flush the queue with delayed requests. */
+static void
+gv_plex_flush(struct gv_plex *p)
+{
+ struct gv_softc *sc;
+ struct bio *bp;
- /* Check if we need to handle parity data. */
- TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
- if (bq->bp == bp) {
- TAILQ_REMOVE(&wp->bits, bq, queue);
- g_free(bq);
- cbp = wp->parity;
- if (cbp != NULL) {
- for (i = 0; i < wp->length; i++)
- cbp->bio_data[i] ^=
- bp->bio_data[i];
- }
- break;
- }
- }
+ sc = p->vinumconf;
+ bp = bioq_takefirst(p->rqueue);
+ while (bp != NULL) {
+ gv_plex_start(p, bp);
+ bp = bioq_takefirst(p->rqueue);
+ }
+}
- /* Handle parity data. */
- if (TAILQ_EMPTY(&wp->bits)) {
- if (bp->bio_parent->bio_cflags & GV_BIO_CHECK)
- i = gv_check_parity(p, bp, wp);
- else
- i = gv_normal_parity(p, bp, wp);
+int
+gv_sync_request(struct gv_plex *from, struct gv_plex *to, off_t offset,
+ off_t length, int type, caddr_t data)
+{
+ struct gv_softc *sc;
+ struct bio *bp;
- /* All of our sub-requests have finished. */
- if (i) {
- bp->bio_parent->bio_completed += wp->length;
- TAILQ_REMOVE(&p->packets, wp, list);
- /* Bring the waiting bios back into the game. */
- mtx_lock(&p->bqueue_mtx);
- pbp = bioq_takefirst(p->wqueue);
- while (pbp != NULL) {
- bioq_disksort(p->bqueue, pbp);
- pbp = bioq_takefirst(p->wqueue);
- }
- mtx_unlock(&p->bqueue_mtx);
- g_free(wp);
- }
- }
+ KASSERT(from != NULL, ("NULL from"));
+ KASSERT(to != NULL, ("NULL to"));
+ sc = from->vinumconf;
+ KASSERT(sc != NULL, ("NULL sc"));
- break;
+ bp = g_new_bio();
+ if (bp == NULL) {
+ G_VINUM_DEBUG(0, "sync from '%s' failed at offset "
+ " %jd; out of memory", from->name, offset);
+ return (ENOMEM);
}
+ bp->bio_length = length;
+ bp->bio_done = gv_done;
+ bp->bio_cflags |= GV_BIO_SYNCREQ;
+ bp->bio_offset = offset;
+ bp->bio_caller1 = from;
+ bp->bio_caller2 = to;
+ bp->bio_cmd = type;
+ if (data == NULL)
+ data = g_malloc(length, M_WAITOK);
+ bp->bio_cflags |= GV_BIO_MALLOC; /* Free on the next run. */
+ bp->bio_data = data;
+
+ /* Send down next. */
+ mtx_lock(&sc->queue_mtx);
+ bioq_disksort(sc->bqueue, bp);
+ mtx_unlock(&sc->queue_mtx);
+ //gv_plex_start(from, bp);
+ return (0);
+}
- pbp = bp->bio_parent;
- if (pbp->bio_error == 0)
- pbp->bio_error = bp->bio_error;
+/*
+ * Handle a finished plex sync bio.
+ */
+int
+gv_sync_complete(struct gv_plex *to, struct bio *bp)
+{
+ struct gv_plex *from, *p;
+ struct gv_sd *s;
+ struct gv_volume *v;
+ struct gv_softc *sc;
+ off_t offset;
+ int err;
- /* When the original request is finished, we deliver it. */
- pbp->bio_inbed++;
- if (pbp->bio_inbed == pbp->bio_children)
- g_io_deliver(pbp, pbp->bio_error);
+ g_topology_assert_not();
- /* Clean up what we allocated. */
- if (bp->bio_cflags & GV_BIO_MALLOC)
- g_free(bp->bio_data);
+ err = 0;
+ KASSERT(to != NULL, ("NULL to"));
+ KASSERT(bp != NULL, ("NULL bp"));
+ from = bp->bio_caller2;
+ KASSERT(from != NULL, ("NULL from"));
+ v = to->vol_sc;
+ KASSERT(v != NULL, ("NULL v"));
+ sc = v->vinumconf;
+ KASSERT(sc != NULL, ("NULL sc"));
+
+ /* If it was a read, write it. */
+ if (bp->bio_cmd == BIO_READ) {
+ err = gv_sync_request(from, to, bp->bio_offset, bp->bio_length,
+ BIO_WRITE, bp->bio_data);
+ /* If it was a write, read the next one. */
+ } else if (bp->bio_cmd == BIO_WRITE) {
+ if (bp->bio_cflags & GV_BIO_MALLOC)
+ g_free(bp->bio_data);
+ to->synced += bp->bio_length;
+ /* If we're finished, clean up. */
+ if (bp->bio_offset + bp->bio_length >= from->size) {
+ G_VINUM_DEBUG(1, "syncing of %s from %s completed",
+ to->name, from->name);
+ /* Update our state. */
+ LIST_FOREACH(s, &to->subdisks, in_plex)
+ gv_set_sd_state(s, GV_SD_UP, 0);
+ gv_update_plex_state(to);
+ to->flags &= ~GV_PLEX_SYNCING;
+ to->synced = 0;
+ gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
+ } else {
+ offset = bp->bio_offset + bp->bio_length;
+ err = gv_sync_request(from, to, offset,
+ MIN(bp->bio_length, from->size - offset),
+ BIO_READ, NULL);
+ }
+ }
g_destroy_bio(bp);
+ /* Clean up if there was an error. */
+ if (err) {
+ to->flags &= ~GV_PLEX_SYNCING;
+ G_VINUM_DEBUG(0, "error syncing plexes: error code %d", err);
+ }
+
+ /* Check if all plexes are synced, and lower refcounts. */
+ g_topology_lock();
+ LIST_FOREACH(p, &v->plexes, in_volume) {
+ if (p->flags & GV_PLEX_SYNCING) {
+ g_topology_unlock();
+ return (-1);
+ }
+ }
+ /* If we came here, all plexes are synced, and we're free. */
+ gv_access(v->provider, -1, -1, 0);
+ g_topology_unlock();
+ G_VINUM_DEBUG(1, "plex sync completed");
+ gv_volume_flush(v);
+ return (0);
}
-void
-gv_plex_normal_request(struct gv_plex *p, struct bio *bp)
+/*
+ * Create a new bio struct for the next grow request.
+ */
+int
+gv_grow_request(struct gv_plex *p, off_t offset, off_t length, int type,
+ caddr_t data)
{
- struct bio *cbp, *pbp;
- struct gv_bioq *bq, *bq2;
- struct gv_raid5_packet *wp, *wp2;
- caddr_t addr;
- off_t bcount, boff;
- int err;
-
- bcount = bp->bio_length;
- addr = bp->bio_data;
- boff = bp->bio_offset;
+ struct gv_softc *sc;
+ struct bio *bp;
- /* Walk over the whole length of the request, we might split it up. */
- while (bcount > 0) {
- wp = NULL;
+ KASSERT(p != NULL, ("gv_grow_request: NULL p"));
+ sc = p->vinumconf;
+ KASSERT(sc != NULL, ("gv_grow_request: NULL sc"));
- /*
- * RAID5 plexes need special treatment, as a single write
- * request involves several read/write sub-requests.
- */
- if (p->org == GV_PLEX_RAID5) {
- wp = g_malloc(sizeof(*wp), M_WAITOK | M_ZERO);
- wp->bio = bp;
- TAILQ_INIT(&wp->bits);
-
- if (bp->bio_cflags & GV_BIO_REBUILD)
- err = gv_rebuild_raid5(p, wp, bp, addr,
- boff, bcount);
- else if (bp->bio_cflags & GV_BIO_CHECK)
- err = gv_check_raid5(p, wp, bp, addr,
- boff, bcount);
- else
- err = gv_build_raid5_req(p, wp, bp, addr,
- boff, bcount);
-
- /*
- * Building the sub-request failed, we probably need to
- * clean up a lot.
- */
- if (err) {
- G_VINUM_LOGREQ(0, bp, "plex request failed.");
- TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
- TAILQ_REMOVE(&wp->bits, bq, queue);
- g_free(bq);
- }
- if (wp->waiting != NULL) {
- if (wp->waiting->bio_cflags &
- GV_BIO_MALLOC)
- g_free(wp->waiting->bio_data);
- g_destroy_bio(wp->waiting);
- }
- if (wp->parity != NULL) {
- if (wp->parity->bio_cflags &
- GV_BIO_MALLOC)
- g_free(wp->parity->bio_data);
- g_destroy_bio(wp->parity);
- }
- g_free(wp);
-
- TAILQ_FOREACH_SAFE(wp, &p->packets, list, wp2) {
- if (wp->bio == bp) {
- TAILQ_REMOVE(&p->packets, wp,
- list);
- TAILQ_FOREACH_SAFE(bq,
- &wp->bits, queue, bq2) {
- TAILQ_REMOVE(&wp->bits,
- bq, queue);
- g_free(bq);
- }
- g_free(wp);
- }
- }
+ bp = g_new_bio();
+ if (bp == NULL) {
+ G_VINUM_DEBUG(0, "grow of %s failed creating bio: "
+ "out of memory", p->name);
+ return (ENOMEM);
+ }
- cbp = bp->bio_driver1;
- while (cbp != NULL) {
- pbp = cbp->bio_caller1;
- if (cbp->bio_cflags & GV_BIO_MALLOC)
- g_free(cbp->bio_data);
- g_destroy_bio(cbp);
- cbp = pbp;
- }
+ bp->bio_cmd = type;
+ bp->bio_done = gv_done;
+ bp->bio_error = 0;
+ bp->bio_caller1 = p;
+ bp->bio_offset = offset;
+ bp->bio_length = length;
+ bp->bio_pflags |= GV_BIO_SYNCREQ; /* XXX: misuse of pflags AND syncreq.*/
+ if (data == NULL)
+ data = g_malloc(length, M_WAITOK);
+ bp->bio_cflags |= GV_BIO_MALLOC;
+ bp->bio_data = data;
+
+ mtx_lock(&sc->queue_mtx);
+ bioq_disksort(sc->bqueue, bp);
+ mtx_unlock(&sc->queue_mtx);
+ //gv_plex_start(p, bp);
+ return (0);
+}
- g_io_deliver(bp, err);
- return;
- }
-
- if (TAILQ_EMPTY(&wp->bits))
- g_free(wp);
- else if (wp->lockbase != -1)
- TAILQ_INSERT_TAIL(&p->packets, wp, list);
+/*
+ * Finish handling of a bio to a growing plex.
+ */
+void
+gv_grow_complete(struct gv_plex *p, struct bio *bp)
+{
+ struct gv_softc *sc;
+ struct gv_sd *s;
+ struct gv_volume *v;
+ off_t origsize, offset;
+ int sdcount, err;
+
+ v = p->vol_sc;
+ KASSERT(v != NULL, ("gv_grow_complete: NULL v"));
+ sc = v->vinumconf;
+ KASSERT(sc != NULL, ("gv_grow_complete: NULL sc"));
+ err = 0;
- /*
- * Requests to concatenated and striped plexes go straight
- * through.
- */
- } else {
- err = gv_plexbuffer(p, bp, addr, boff, bcount);
-
- /* Building the sub-request failed. */
- if (err) {
- G_VINUM_LOGREQ(0, bp, "plex request failed.");
- cbp = bp->bio_driver1;
- while (cbp != NULL) {
- pbp = cbp->bio_caller1;
- g_destroy_bio(cbp);
- cbp = pbp;
- }
- g_io_deliver(bp, err);
- return;
+ /* If it was a read, write it. */
+ if (bp->bio_cmd == BIO_READ) {
+ p->synced += bp->bio_length;
+ err = gv_grow_request(p, bp->bio_offset, bp->bio_length,
+ BIO_WRITE, bp->bio_data);
+ /* If it was a write, read next. */
+ } else if (bp->bio_cmd == BIO_WRITE) {
+ if (bp->bio_cflags & GV_BIO_MALLOC)
+ g_free(bp->bio_data);
+
+ /* Find the real size of the plex. */
+ sdcount = gv_sdcount(p, 1);
+ s = LIST_FIRST(&p->subdisks);
+ KASSERT(s != NULL, ("NULL s"));
+ origsize = (s->size * (sdcount - 1));
+ if (bp->bio_offset + bp->bio_length >= origsize) {
+ G_VINUM_DEBUG(1, "growing of %s completed", p->name);
+ p->flags &= ~GV_PLEX_GROWING;
+ LIST_FOREACH(s, &p->subdisks, in_plex) {
+ s->flags &= ~GV_SD_GROW;
+ gv_set_sd_state(s, GV_SD_UP, 0);
}
+ p->size = gv_plex_size(p);
+ gv_update_vol_size(v, gv_vol_size(v));
+ gv_set_plex_state(p, GV_PLEX_UP, 0);
+ g_topology_lock();
+ gv_access(v->provider, -1, -1, 0);
+ g_topology_unlock();
+ p->synced = 0;
+ gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
+ /* Issue delayed requests. */
+ gv_plex_flush(p);
+ } else {
+ offset = bp->bio_offset + bp->bio_length;
+ err = gv_grow_request(p, offset,
+ MIN(bp->bio_length, origsize - offset),
+ BIO_READ, NULL);
}
-
- /* Abuse bio_caller1 as linked list. */
- pbp = bp->bio_driver1;
- while (pbp->bio_caller1 != NULL)
- pbp = pbp->bio_caller1;
- bcount -= pbp->bio_length;
- addr += pbp->bio_length;
- boff += pbp->bio_length;
- }
-
- /* Fire off all sub-requests. */
- pbp = bp->bio_driver1;
- while (pbp != NULL) {
- /*
- * RAID5 sub-requests need to come in correct order, otherwise
- * we trip over the parity, as it might be overwritten by
- * another sub-request.
- */
- if (pbp->bio_driver1 != NULL &&
- gv_stripe_active(p, pbp)) {
- /* Park the bio on the waiting queue. */
- pbp->bio_cflags |= GV_BIO_ONHOLD;
- mtx_lock(&p->bqueue_mtx);
- bioq_disksort(p->wqueue, pbp);
- mtx_unlock(&p->bqueue_mtx);
- } else
- g_io_request(pbp, pbp->bio_caller2);
- pbp = pbp->bio_caller1;
+ }
+ g_destroy_bio(bp);
+
+ if (err) {
+ p->flags &= ~GV_PLEX_GROWING;
+ G_VINUM_DEBUG(0, "error growing plex: error code %d", err);
}
}
-static int
-gv_plex_access(struct g_provider *pp, int dr, int dw, int de)
+
+/*
+ * Create an initialization BIO and send it off to the consumer. Assume that
+ * we're given initialization data as parameter.
+ */
+void
+gv_init_request(struct gv_sd *s, off_t start, caddr_t data, off_t length)
+{
+ struct gv_drive *d;
+ struct g_consumer *cp;
+ struct bio *bp, *cbp;
+
+ KASSERT(s != NULL, ("gv_init_request: NULL s"));
+ d = s->drive_sc;
+ KASSERT(d != NULL, ("gv_init_request: NULL d"));
+ cp = d->consumer;
+ KASSERT(cp != NULL, ("gv_init_request: NULL cp"));
+
+ bp = g_new_bio();
+ if (bp == NULL) {
+ G_VINUM_DEBUG(0, "subdisk '%s' init: write failed at offset %jd"
+ " (drive offset %jd); out of memory", s->name,
+ (intmax_t)s->initialized, (intmax_t)start);
+ return; /* XXX: Error codes. */
+ }
+ bp->bio_cmd = BIO_WRITE;
+ bp->bio_data = data;
+ bp->bio_done = gv_done;
+ bp->bio_error = 0;
+ bp->bio_length = length;
+ bp->bio_cflags |= GV_BIO_INIT;
+ bp->bio_offset = start;
+ bp->bio_caller1 = s;
+
+ /* Then ofcourse, we have to clone it. */
+ cbp = g_clone_bio(bp);
+ if (cbp == NULL) {
+ G_VINUM_DEBUG(0, "subdisk '%s' init: write failed at offset %jd"
+ " (drive offset %jd); out of memory", s->name,
+ (intmax_t)s->initialized, (intmax_t)start);
+ return; /* XXX: Error codes. */
+ }
+ cbp->bio_done = gv_done;
+ cbp->bio_caller1 = s;
+ /* Send it off to the consumer. */
+ g_io_request(cbp, cp);
+}
+
+/*
+ * Handle a finished initialization BIO.
+ */
+void
+gv_init_complete(struct gv_plex *p, struct bio *bp)
{
- struct gv_plex *p;
- struct g_geom *gp;
- struct g_consumer *cp, *cp2;
+ struct gv_softc *sc;
+ struct gv_drive *d;
+ struct g_consumer *cp;
+ struct gv_sd *s;
+ off_t start, length;
+ caddr_t data;
int error;
- gp = pp->geom;
- p = gp->softc;
- KASSERT(p != NULL, ("NULL p"));
+ s = bp->bio_caller1;
+ start = bp->bio_offset;
+ length = bp->bio_length;
+ error = bp->bio_error;
+ data = bp->bio_data;
- if (p->org == GV_PLEX_RAID5) {
- if (dw > 0 && dr == 0)
- dr = 1;
- else if (dw < 0 && dr == 0)
- dr = -1;
- }
+ KASSERT(s != NULL, ("gv_init_complete: NULL s"));
+ d = s->drive_sc;
+ KASSERT(d != NULL, ("gv_init_complete: NULL d"));
+ cp = d->consumer;
+ KASSERT(cp != NULL, ("gv_init_complete: NULL cp"));
+ sc = p->vinumconf;
+ KASSERT(sc != NULL, ("gv_init_complete: NULL sc"));
+
+ g_destroy_bio(bp);
- LIST_FOREACH(cp, &gp->consumer, consumer) {
- error = g_access(cp, dr, dw, de);
+ /*
+ * First we need to find out if it was okay, and abort if it's not.
+ * Then we need to free previous buffers, find out the correct subdisk,
+ * as well as getting the correct starting point and length of the BIO.
+ */
+ if (start >= s->drive_offset + s->size) {
+ /* Free the data we initialized. */
+ if (data != NULL)
+ g_free(data);
+ g_topology_assert_not();
+ g_topology_lock();
+ g_access(cp, 0, -1, 0);
+ g_topology_unlock();
if (error) {
- LIST_FOREACH(cp2, &gp->consumer, consumer) {
- if (cp == cp2)
- break;
- g_access(cp2, -dr, -dw, -de);
- }
- return (error);
+ gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE |
+ GV_SETSTATE_CONFIG);
+ } else {
+ gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_CONFIG);
+ s->initialized = 0;
+ gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
+ G_VINUM_DEBUG(1, "subdisk '%s' init: finished "
+ "successfully", s->name);
}
+ return;
}
- return (0);
+ s->initialized += length;
+ start += length;
+ gv_init_request(s, start, data, length);
}
-static struct g_geom *
-gv_plex_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
+/*
+ * Create a new bio struct for the next parity rebuild. Used both by internal
+ * rebuild of degraded plexes as well as user initiated rebuilds/checks.
+ */
+void
+gv_parity_request(struct gv_plex *p, int flags, off_t offset)
{
- struct g_geom *gp;
- struct g_consumer *cp, *cp2;
- struct g_provider *pp2;
- struct gv_plex *p;
- struct gv_sd *s;
struct gv_softc *sc;
- int error;
-
- g_trace(G_T_TOPOLOGY, "gv_plex_taste(%s, %s)", mp->name, pp->name);
- g_topology_assert();
-
- /* We only want to attach to subdisks. */
- if (strcmp(pp->geom->class->name, "VINUMDRIVE"))
- return (NULL);
-
- /* Find the VINUM class and its associated geom. */
- gp = find_vinum_geom();
- if (gp == NULL)
- return (NULL);
- sc = gp->softc;
- KASSERT(sc != NULL, ("gv_plex_taste: NULL sc"));
+ struct bio *bp;
- /* Find out which subdisk the offered provider corresponds to. */
- s = pp->private;
- KASSERT(s != NULL, ("gv_plex_taste: NULL s"));
+ KASSERT(p != NULL, ("gv_parity_request: NULL p"));
+ sc = p->vinumconf;
+ KASSERT(sc != NULL, ("gv_parity_request: NULL sc"));
- /* Now find the correct plex where this subdisk belongs to. */
- p = gv_find_plex(sc, s->plex);
- if (p == NULL) {
- G_VINUM_DEBUG(0, "%s: NULL p for '%s'", __func__, s->name);
- return (NULL);
+ bp = g_new_bio();
+ if (bp == NULL) {
+ G_VINUM_DEBUG(0, "rebuild of %s failed creating bio: "
+ "out of memory", p->name);
+ return;
}
+ bp->bio_cmd = BIO_WRITE;
+ bp->bio_done = gv_done;
+ bp->bio_error = 0;
+ bp->bio_length = p->stripesize;
+ bp->bio_caller1 = p;
+
/*
- * Add this subdisk to this plex. Since we trust the on-disk
- * configuration, we don't check the given value (should we?).
- * XXX: shouldn't be done here
+ * Check if it's a rebuild of a degraded plex or a user request of
+ * parity rebuild.
*/
- gv_sd_to_plex(p, s, 0);
+ if (flags & GV_BIO_REBUILD)
+ bp->bio_data = g_malloc(GV_DFLT_SYNCSIZE, M_WAITOK);
+ else if (flags & GV_BIO_CHECK)
+ bp->bio_data = g_malloc(p->stripesize, M_WAITOK | M_ZERO);
+ else {
+ G_VINUM_DEBUG(0, "invalid flags given in rebuild");
+ return;
+ }
- /* Now check if there's already a geom for this plex. */
- gp = p->geom;
+ bp->bio_cflags = flags;
+ bp->bio_cflags |= GV_BIO_MALLOC;
- /* Yes, there is already a geom, so we just add the consumer. */
- if (gp != NULL) {
- cp2 = LIST_FIRST(&gp->consumer);
- /* Need to attach a new consumer to this subdisk. */
- cp = g_new_consumer(gp);
- error = g_attach(cp, pp);
- if (error) {
- G_VINUM_DEBUG(0, "unable to attach consumer to %s",
- pp->name);
- g_destroy_consumer(cp);
- return (NULL);
- }
- /* Adjust the access counts of the new consumer. */
- if ((cp2 != NULL) && (cp2->acr || cp2->acw || cp2->ace)) {
- error = g_access(cp, cp2->acr, cp2->acw, cp2->ace);
- if (error) {
- G_VINUM_DEBUG(0, "unable to set access counts"
- " for consumer on %s", pp->name);
- g_detach(cp);
- g_destroy_consumer(cp);
- return (NULL);
- }
- }
- s->consumer = cp;
+ /* We still have more parity to build. */
+ bp->bio_offset = offset;
+ mtx_lock(&sc->queue_mtx);
+ bioq_disksort(sc->bqueue, bp);
+ mtx_unlock(&sc->queue_mtx);
+ //gv_plex_start(p, bp); /* Send it down to the plex. */
+}
- /* Adjust the size of the providers this plex has. */
- LIST_FOREACH(pp2, &gp->provider, provider)
- pp2->mediasize = p->size;
+/*
+ * Handle a finished parity write.
+ */
+void
+gv_parity_complete(struct gv_plex *p, struct bio *bp)
+{
+ struct gv_softc *sc;
+ int error, flags;
- /* Update the size of the volume this plex is attached to. */
- if (p->vol_sc != NULL)
- gv_update_vol_size(p->vol_sc, p->size);
+ error = bp->bio_error;
+ flags = bp->bio_cflags;
+ flags &= ~GV_BIO_MALLOC;
- /*
- * If necessary, create bio queues, queue mutex and a worker
- * thread.
- */
- if (p->bqueue == NULL) {
- p->bqueue = g_malloc(sizeof(struct bio_queue_head),
- M_WAITOK | M_ZERO);
- bioq_init(p->bqueue);
- }
- if (p->wqueue == NULL) {
- p->wqueue = g_malloc(sizeof(struct bio_queue_head),
- M_WAITOK | M_ZERO);
- bioq_init(p->wqueue);
- }
- if (mtx_initialized(&p->bqueue_mtx) == 0)
- mtx_init(&p->bqueue_mtx, "gv_plex", NULL, MTX_DEF);
- if (!(p->flags & GV_PLEX_THREAD_ACTIVE)) {
- kproc_create(gv_plex_worker, p, NULL, 0, 0, "gv_p %s",
- p->name);
- p->flags |= GV_PLEX_THREAD_ACTIVE;
- }
+ sc = p->vinumconf;
+ KASSERT(sc != NULL, ("gv_parity_complete: NULL sc"));
+
+ /* Clean up what we allocated. */
+ if (bp->bio_cflags & GV_BIO_MALLOC)
+ g_free(bp->bio_data);
+ g_destroy_bio(bp);
- return (NULL);
+ if (error == EAGAIN) {
+ G_VINUM_DEBUG(0, "parity incorrect at offset 0x%jx",
+ (intmax_t)p->synced);
+ }
- /* We need to create a new geom. */
+ /* Any error is fatal, except EAGAIN when we're rebuilding. */
+ if (error && !(error == EAGAIN && (flags & GV_BIO_PARITY))) {
+ /* Make sure we don't have the lock. */
+ g_topology_assert_not();
+ g_topology_lock();
+ gv_access(p->vol_sc->provider, -1, -1, 0);
+ g_topology_unlock();
+ G_VINUM_DEBUG(0, "parity check on %s failed at 0x%jx "
+ "errno %d", p->name, (intmax_t)p->synced, error);
+ return;
} else {
- gp = g_new_geomf(mp, "%s", p->name);
- gp->start = gv_plex_start;
- gp->orphan = gv_plex_orphan;
- gp->access = gv_plex_access;
- gp->softc = p;
- p->geom = gp;
-
- TAILQ_INIT(&p->packets);
- p->bqueue = g_malloc(sizeof(struct bio_queue_head),
- M_WAITOK | M_ZERO);
- bioq_init(p->bqueue);
- p->wqueue = g_malloc(sizeof(struct bio_queue_head),
- M_WAITOK | M_ZERO);
- bioq_init(p->wqueue);
- mtx_init(&p->bqueue_mtx, "gv_plex", NULL, MTX_DEF);
- kproc_create(gv_plex_worker, p, NULL, 0, 0, "gv_p %s",
- p->name);
- p->flags |= GV_PLEX_THREAD_ACTIVE;
-
- /* Attach a consumer to this provider. */
- cp = g_new_consumer(gp);
- g_attach(cp, pp);
- s->consumer = cp;
-
- /* Create a provider for the outside world. */
- pp2 = g_new_providerf(gp, "gvinum/plex/%s", p->name);
- pp2->mediasize = p->size;
- pp2->sectorsize = pp->sectorsize;
- p->provider = pp2;
- g_error_provider(pp2, 0);
- return (gp);
+ p->synced += p->stripesize;
}
-}
-static int
-gv_plex_destroy_geom(struct gctl_req *req, struct g_class *mp,
- struct g_geom *gp)
-{
- struct gv_plex *p;
+ if (p->synced >= p->size) {
+ /* Make sure we don't have the lock. */
+ g_topology_assert_not();
+ g_topology_lock();
+ gv_access(p->vol_sc->provider, -1, -1, 0);
+ g_topology_unlock();
+ /* We're finished. */
+ G_VINUM_DEBUG(1, "parity operation on %s finished", p->name);
+ p->synced = 0;
+ gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
+ return;
+ }
- g_trace(G_T_TOPOLOGY, "gv_plex_destroy_geom: %s", gp->name);
- g_topology_assert();
+ /* Send down next. It will determine if we need to itself. */
+ gv_parity_request(p, flags, p->synced);
+}
- p = gp->softc;
+/*
+ * Handle a finished plex rebuild bio.
+ */
+void
+gv_rebuild_complete(struct gv_plex *p, struct bio *bp)
+{
+ struct gv_softc *sc;
+ struct gv_sd *s;
+ int error, flags;
+ off_t offset;
- KASSERT(p != NULL, ("gv_plex_destroy_geom: null p of '%s'", gp->name));
+ error = bp->bio_error;
+ flags = bp->bio_cflags;
+ offset = bp->bio_offset;
+ flags &= ~GV_BIO_MALLOC;
+ sc = p->vinumconf;
+ KASSERT(sc != NULL, ("gv_rebuild_complete: NULL sc"));
- /*
- * If this is a RAID5 plex, check if its worker thread is still active
- * and signal it to self destruct.
- */
- gv_kill_plex_thread(p);
- /* g_free(sc); */
- g_wither_geom(gp, ENXIO);
- return (0);
-}
+ /* Clean up what we allocated. */
+ if (bp->bio_cflags & GV_BIO_MALLOC)
+ g_free(bp->bio_data);
+ g_destroy_bio(bp);
-#define VINUMPLEX_CLASS_NAME "VINUMPLEX"
+ if (error) {
+ g_topology_assert_not();
+ g_topology_lock();
+ gv_access(p->vol_sc->provider, -1, -1, 0);
+ g_topology_unlock();
+
+ G_VINUM_DEBUG(0, "rebuild of %s failed at offset %jd errno: %d",
+ p->name, (intmax_t)offset, error);
+ p->flags &= ~GV_PLEX_REBUILDING;
+ p->synced = 0;
+ gv_plex_flush(p); /* Flush out remaining rebuild BIOs. */
+ return;
+ }
-static struct g_class g_vinum_plex_class = {
- .name = VINUMPLEX_CLASS_NAME,
- .version = G_VERSION,
- .taste = gv_plex_taste,
- .destroy_geom = gv_plex_destroy_geom,
-};
+ offset += (p->stripesize * (gv_sdcount(p, 1) - 1));
+ if (offset >= p->size) {
+ /* We're finished. */
+ g_topology_assert_not();
+ g_topology_lock();
+ gv_access(p->vol_sc->provider, -1, -1, 0);
+ g_topology_unlock();
+
+ G_VINUM_DEBUG(1, "rebuild of %s finished", p->name);
+ gv_save_config(p->vinumconf);
+ p->flags &= ~GV_PLEX_REBUILDING;
+ p->synced = 0;
+ /* Try to up all subdisks. */
+ LIST_FOREACH(s, &p->subdisks, in_plex)
+ gv_update_sd_state(s);
+ gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
+ gv_plex_flush(p); /* Flush out remaining rebuild BIOs. */
+ return;
+ }
-DECLARE_GEOM_CLASS(g_vinum_plex_class, g_vinum_plex);
+ /* Send down next. It will determine if we need to itself. */
+ gv_parity_request(p, flags, offset);
+}
diff --git a/sys/geom/vinum/geom_vinum_raid5.c b/sys/geom/vinum/geom_vinum_raid5.c
index abfed51..088162e 100644
--- a/sys/geom/vinum/geom_vinum_raid5.c
+++ b/sys/geom/vinum/geom_vinum_raid5.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2004 Lukas Ertl
+ * Copyright (c) 2004, 2007 Lukas Ertl
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -29,14 +29,8 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/bio.h>
-#include <sys/conf.h>
-#include <sys/errno.h>
-#include <sys/kernel.h>
-#include <sys/kthread.h>
-#include <sys/libkern.h>
#include <sys/lock.h>
#include <sys/malloc.h>
-#include <sys/mutex.h>
#include <sys/systm.h>
#include <geom/geom.h>
@@ -44,8 +38,103 @@ __FBSDID("$FreeBSD$");
#include <geom/vinum/geom_vinum_raid5.h>
#include <geom/vinum/geom_vinum.h>
-int gv_raid5_offset(struct gv_plex *, off_t, off_t, off_t *, off_t *,
- int *, int *);
+static int gv_raid5_offset(struct gv_plex *, off_t, off_t,
+ off_t *, off_t *, int *, int *, int);
+static struct bio * gv_raid5_clone_bio(struct bio *, struct gv_sd *,
+ struct gv_raid5_packet *, caddr_t, int);
+static int gv_raid5_request(struct gv_plex *, struct gv_raid5_packet *,
+ struct bio *, caddr_t, off_t, off_t, int *);
+static int gv_raid5_check(struct gv_plex *, struct gv_raid5_packet *,
+ struct bio *, caddr_t, off_t, off_t);
+static int gv_raid5_rebuild(struct gv_plex *, struct gv_raid5_packet *,
+ struct bio *, caddr_t, off_t, off_t);
+
+struct gv_raid5_packet *
+gv_raid5_start(struct gv_plex *p, struct bio *bp, caddr_t addr, off_t boff,
+ off_t bcount)
+{
+ struct bio *cbp;
+ struct gv_raid5_packet *wp, *wp2;
+ struct gv_bioq *bq, *bq2;
+ int err, delay;
+
+ delay = 0;
+ wp = g_malloc(sizeof(*wp), M_WAITOK | M_ZERO);
+ wp->bio = bp;
+ wp->waiting = NULL;
+ wp->parity = NULL;
+ TAILQ_INIT(&wp->bits);
+
+ if (bp->bio_cflags & GV_BIO_REBUILD)
+ err = gv_raid5_rebuild(p, wp, bp, addr, boff, bcount);
+ else if (bp->bio_cflags & GV_BIO_CHECK)
+ err = gv_raid5_check(p, wp, bp, addr, boff, bcount);
+ else
+ err = gv_raid5_request(p, wp, bp, addr, boff, bcount, &delay);
+
+ /* Means we have a delayed request. */
+ if (delay) {
+ g_free(wp);
+ return (NULL);
+ }
+
+ /*
+ * Building the sub-request failed, we probably need to clean up a lot.
+ */
+ if (err) {
+ G_VINUM_LOGREQ(0, bp, "raid5 plex request failed.");
+ TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
+ TAILQ_REMOVE(&wp->bits, bq, queue);
+ g_free(bq);
+ }
+ if (wp->waiting != NULL) {
+ if (wp->waiting->bio_cflags & GV_BIO_MALLOC)
+ g_free(wp->waiting->bio_data);
+ g_destroy_bio(wp->waiting);
+ }
+ if (wp->parity != NULL) {
+ if (wp->parity->bio_cflags & GV_BIO_MALLOC)
+ g_free(wp->parity->bio_data);
+ g_destroy_bio(wp->parity);
+ }
+ g_free(wp);
+
+ TAILQ_FOREACH_SAFE(wp, &p->packets, list, wp2) {
+ if (wp->bio != bp)
+ continue;
+
+ TAILQ_REMOVE(&p->packets, wp, list);
+ TAILQ_FOREACH_SAFE(bq, &wp->bits, queue, bq2) {
+ TAILQ_REMOVE(&wp->bits, bq, queue);
+ g_free(bq);
+ }
+ g_free(wp);
+ }
+
+ cbp = bioq_takefirst(p->bqueue);
+ while (cbp != NULL) {
+ if (cbp->bio_cflags & GV_BIO_MALLOC)
+ g_free(cbp->bio_data);
+ g_destroy_bio(cbp);
+ cbp = bioq_takefirst(p->bqueue);
+ }
+
+ /* If internal, stop and reset state. */
+ if (bp->bio_cflags & GV_BIO_INTERNAL) {
+ if (bp->bio_cflags & GV_BIO_MALLOC)
+ g_free(cbp->bio_data);
+ g_destroy_bio(bp);
+ /* Reset flags. */
+ p->flags &= ~(GV_PLEX_SYNCING | GV_PLEX_REBUILDING |
+ GV_PLEX_GROWING);
+ return (NULL);
+ }
+ g_io_deliver(bp, err);
+ return (NULL);
+ }
+
+ return (wp);
+}
/*
* Check if the stripe that the work packet wants is already being used by
@@ -57,7 +146,7 @@ gv_stripe_active(struct gv_plex *p, struct bio *bp)
struct gv_raid5_packet *wp, *owp;
int overlap;
- wp = bp->bio_driver1;
+ wp = bp->bio_caller2;
if (wp->lockbase == -1)
return (0);
@@ -80,20 +169,20 @@ gv_stripe_active(struct gv_plex *p, struct bio *bp)
return (overlap);
}
-int
-gv_check_raid5(struct gv_plex *p, struct gv_raid5_packet *wp, struct bio *bp,
+static int
+gv_raid5_check(struct gv_plex *p, struct gv_raid5_packet *wp, struct bio *bp,
caddr_t addr, off_t boff, off_t bcount)
{
struct gv_sd *parity, *s;
struct gv_bioq *bq;
- struct bio *cbp, *pbp;
+ struct bio *cbp;
int i, psdno;
off_t real_len, real_off;
if (p == NULL || LIST_EMPTY(&p->subdisks))
return (ENXIO);
- gv_raid5_offset(p, boff, bcount, &real_off, &real_len, NULL, &psdno);
+ gv_raid5_offset(p, boff, bcount, &real_off, &real_len, NULL, &psdno, 1);
/* Find the right subdisk. */
parity = NULL;
@@ -122,20 +211,16 @@ gv_check_raid5(struct gv_plex *p, struct gv_raid5_packet *wp, struct bio *bp,
/* Skip the parity subdisk. */
if (s == parity)
continue;
+ /* Skip growing subdisks. */
+ if (s->flags & GV_SD_GROW)
+ continue;
- cbp = g_clone_bio(bp);
+ cbp = gv_raid5_clone_bio(bp, s, wp, NULL, 1);
if (cbp == NULL)
return (ENOMEM);
cbp->bio_cmd = BIO_READ;
- cbp->bio_data = g_malloc(real_len, M_WAITOK);
- cbp->bio_cflags |= GV_BIO_MALLOC;
- cbp->bio_offset = real_off;
- cbp->bio_length = real_len;
- cbp->bio_done = gv_plex_done;
- cbp->bio_caller2 = s->consumer;
- cbp->bio_driver1 = wp;
- GV_ENQUEUE(bp, cbp, pbp);
+ bioq_insert_tail(p->bqueue, cbp);
bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
bq->bp = cbp;
@@ -143,51 +228,38 @@ gv_check_raid5(struct gv_plex *p, struct gv_raid5_packet *wp, struct bio *bp,
}
/* Read the parity data. */
- cbp = g_clone_bio(bp);
+ cbp = gv_raid5_clone_bio(bp, parity, wp, NULL, 1);
if (cbp == NULL)
return (ENOMEM);
cbp->bio_cmd = BIO_READ;
- cbp->bio_data = g_malloc(real_len, M_WAITOK | M_ZERO);
- cbp->bio_cflags |= GV_BIO_MALLOC;
- cbp->bio_offset = real_off;
- cbp->bio_length = real_len;
- cbp->bio_done = gv_plex_done;
- cbp->bio_caller2 = parity->consumer;
- cbp->bio_driver1 = wp;
wp->waiting = cbp;
/*
* In case we want to rebuild the parity, create an extra BIO to write
* it out. It also acts as buffer for the XOR operations.
*/
- cbp = g_clone_bio(bp);
+ cbp = gv_raid5_clone_bio(bp, parity, wp, addr, 1);
if (cbp == NULL)
return (ENOMEM);
- cbp->bio_data = addr;
- cbp->bio_offset = real_off;
- cbp->bio_length = real_len;
- cbp->bio_done = gv_plex_done;
- cbp->bio_caller2 = parity->consumer;
- cbp->bio_driver1 = wp;
wp->parity = cbp;
return (0);
}
/* Rebuild a degraded RAID5 plex. */
-int
-gv_rebuild_raid5(struct gv_plex *p, struct gv_raid5_packet *wp, struct bio *bp,
+static int
+gv_raid5_rebuild(struct gv_plex *p, struct gv_raid5_packet *wp, struct bio *bp,
caddr_t addr, off_t boff, off_t bcount)
{
struct gv_sd *broken, *s;
struct gv_bioq *bq;
- struct bio *cbp, *pbp;
+ struct bio *cbp;
off_t real_len, real_off;
if (p == NULL || LIST_EMPTY(&p->subdisks))
return (ENXIO);
- gv_raid5_offset(p, boff, bcount, &real_off, &real_len, NULL, NULL);
+ gv_raid5_offset(p, boff, bcount, &real_off, &real_len, NULL, NULL, 1);
/* Find the right subdisk. */
broken = NULL;
@@ -210,6 +282,8 @@ gv_rebuild_raid5(struct gv_plex *p, struct gv_raid5_packet *wp, struct bio *bp,
G_VINUM_DEBUG(1, "sd %s is reviving", broken->name);
gv_set_sd_state(broken, GV_SD_REVIVING, GV_SETSTATE_FORCE);
+ /* Set this bit now, but should be set at end. */
+ broken->flags |= GV_SD_CANGOUP;
break;
case GV_SD_REVIVING:
@@ -232,19 +306,16 @@ gv_rebuild_raid5(struct gv_plex *p, struct gv_raid5_packet *wp, struct bio *bp,
if (s == broken)
continue;
- cbp = g_clone_bio(bp);
+ /* Skip growing subdisks. */
+ if (s->flags & GV_SD_GROW)
+ continue;
+
+ cbp = gv_raid5_clone_bio(bp, s, wp, NULL, 1);
if (cbp == NULL)
return (ENOMEM);
cbp->bio_cmd = BIO_READ;
- cbp->bio_data = g_malloc(real_len, M_WAITOK);
- cbp->bio_cflags |= GV_BIO_MALLOC;
- cbp->bio_offset = real_off;
- cbp->bio_length = real_len;
- cbp->bio_done = gv_plex_done;
- cbp->bio_caller2 = s->consumer;
- cbp->bio_driver1 = wp;
- GV_ENQUEUE(bp, cbp, pbp);
+ bioq_insert_tail(p->bqueue, cbp);
bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
bq->bp = cbp;
@@ -252,34 +323,28 @@ gv_rebuild_raid5(struct gv_plex *p, struct gv_raid5_packet *wp, struct bio *bp,
}
/* Write the parity data. */
- cbp = g_clone_bio(bp);
+ cbp = gv_raid5_clone_bio(bp, broken, wp, NULL, 1);
if (cbp == NULL)
return (ENOMEM);
- cbp->bio_data = g_malloc(real_len, M_WAITOK | M_ZERO);
- cbp->bio_cflags |= GV_BIO_MALLOC;
- cbp->bio_offset = real_off;
- cbp->bio_length = real_len;
- cbp->bio_done = gv_plex_done;
- cbp->bio_caller2 = broken->consumer;
- cbp->bio_driver1 = wp;
cbp->bio_cflags |= GV_BIO_REBUILD;
wp->parity = cbp;
p->synced = boff;
+ /* Post notification that we're finished. */
return (0);
}
/* Build a request group to perform (part of) a RAID5 request. */
-int
-gv_build_raid5_req(struct gv_plex *p, struct gv_raid5_packet *wp,
- struct bio *bp, caddr_t addr, off_t boff, off_t bcount)
+static int
+gv_raid5_request(struct gv_plex *p, struct gv_raid5_packet *wp,
+ struct bio *bp, caddr_t addr, off_t boff, off_t bcount, int *delay)
{
struct g_geom *gp;
struct gv_sd *broken, *original, *parity, *s;
struct gv_bioq *bq;
- struct bio *cbp, *pbp;
- int i, psdno, sdno, type;
+ struct bio *cbp;
+ int i, psdno, sdno, type, grow;
off_t real_len, real_off;
gp = bp->bio_to->geom;
@@ -295,7 +360,24 @@ gv_build_raid5_req(struct gv_plex *p, struct gv_raid5_packet *wp,
type = REQ_TYPE_NORMAL;
original = parity = broken = NULL;
- gv_raid5_offset(p, boff, bcount, &real_off, &real_len, &sdno, &psdno);
+ /* XXX: The resize won't crash with rebuild or sync, but we should still
+ * be aware of it. Also this should perhaps be done on rebuild/check as
+ * well?
+ */
+ /* If we're over, we must use the old. */
+ if (boff >= p->synced) {
+ grow = 1;
+ /* Or if over the resized offset, we use all drives. */
+ } else if (boff + bcount <= p->synced) {
+ grow = 0;
+ /* Else, we're in the middle, and must wait a bit. */
+ } else {
+ bioq_disksort(p->rqueue, bp);
+ *delay = 1;
+ return (0);
+ }
+ gv_raid5_offset(p, boff, bcount, &real_off, &real_len,
+ &sdno, &psdno, grow);
/* Find the right subdisks. */
i = 0;
@@ -315,8 +397,13 @@ gv_build_raid5_req(struct gv_plex *p, struct gv_raid5_packet *wp,
/* Our data stripe is missing. */
if (original->state != GV_SD_UP)
type = REQ_TYPE_DEGRADED;
+
+ /* If synchronizing request, just write it if disks are stale. */
+ if (original->state == GV_SD_STALE && parity->state == GV_SD_STALE &&
+ bp->bio_cflags & GV_BIO_SYNCREQ && bp->bio_cmd == BIO_WRITE) {
+ type = REQ_TYPE_NORMAL;
/* Our parity stripe is missing. */
- if (parity->state != GV_SD_UP) {
+ } else if (parity->state != GV_SD_UP) {
/* We cannot take another failure if we're already degraded. */
if (type != REQ_TYPE_NORMAL)
return (ENXIO);
@@ -330,9 +417,15 @@ gv_build_raid5_req(struct gv_plex *p, struct gv_raid5_packet *wp,
KASSERT(wp->length >= 0, ("gv_build_raid5_request: wp->length < 0"));
- if ((p->flags & GV_PLEX_SYNCING) && (boff + real_len < p->synced))
+ if ((p->flags & GV_PLEX_REBUILDING) && (boff + real_len < p->synced))
type = REQ_TYPE_NORMAL;
+ if ((p->flags & GV_PLEX_REBUILDING) && (boff + real_len >= p->synced)) {
+ bioq_disksort(p->rqueue, bp);
+ *delay = 1;
+ return (0);
+ }
+
switch (bp->bio_cmd) {
case BIO_READ:
/*
@@ -346,18 +439,14 @@ gv_build_raid5_req(struct gv_plex *p, struct gv_raid5_packet *wp,
/* Skip the broken subdisk. */
if (s == broken)
continue;
- cbp = g_clone_bio(bp);
+ /* Skip growing if within offset. */
+ if (grow && s->flags & GV_SD_GROW)
+ continue;
+ cbp = gv_raid5_clone_bio(bp, s, wp, NULL, 1);
if (cbp == NULL)
return (ENOMEM);
- cbp->bio_data = g_malloc(real_len, M_WAITOK);
- cbp->bio_cflags |= GV_BIO_MALLOC;
- cbp->bio_offset = real_off;
- cbp->bio_length = real_len;
- cbp->bio_done = gv_plex_done;
- cbp->bio_caller2 = s->consumer;
- cbp->bio_driver1 = wp;
- GV_ENQUEUE(bp, cbp, pbp);
+ bioq_insert_tail(p->bqueue, cbp);
bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
bq->bp = cbp;
@@ -366,16 +455,11 @@ gv_build_raid5_req(struct gv_plex *p, struct gv_raid5_packet *wp,
/* A normal read can be fulfilled with the original subdisk. */
} else {
- cbp = g_clone_bio(bp);
+ cbp = gv_raid5_clone_bio(bp, original, wp, addr, 0);
if (cbp == NULL)
return (ENOMEM);
- cbp->bio_offset = real_off;
- cbp->bio_length = real_len;
- cbp->bio_data = addr;
- cbp->bio_done = g_std_done;
- cbp->bio_caller2 = original->consumer;
- GV_ENQUEUE(bp, cbp, pbp);
+ bioq_insert_tail(p->bqueue, cbp);
}
wp->lockbase = -1;
@@ -394,20 +478,16 @@ gv_build_raid5_req(struct gv_plex *p, struct gv_raid5_packet *wp,
/* Skip the broken and the parity subdisk. */
if ((s == broken) || (s == parity))
continue;
+ /* Skip growing if within offset. */
+ if (grow && s->flags & GV_SD_GROW)
+ continue;
- cbp = g_clone_bio(bp);
+ cbp = gv_raid5_clone_bio(bp, s, wp, NULL, 1);
if (cbp == NULL)
return (ENOMEM);
cbp->bio_cmd = BIO_READ;
- cbp->bio_data = g_malloc(real_len, M_WAITOK);
- cbp->bio_cflags |= GV_BIO_MALLOC;
- cbp->bio_offset = real_off;
- cbp->bio_length = real_len;
- cbp->bio_done = gv_plex_done;
- cbp->bio_caller2 = s->consumer;
- cbp->bio_driver1 = wp;
- GV_ENQUEUE(bp, cbp, pbp);
+ bioq_insert_tail(p->bqueue, cbp);
bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
bq->bp = cbp;
@@ -415,34 +495,21 @@ gv_build_raid5_req(struct gv_plex *p, struct gv_raid5_packet *wp,
}
/* Write the parity data. */
- cbp = g_clone_bio(bp);
+ cbp = gv_raid5_clone_bio(bp, parity, wp, NULL, 1);
if (cbp == NULL)
return (ENOMEM);
- cbp->bio_data = g_malloc(real_len, M_WAITOK);
- cbp->bio_cflags |= GV_BIO_MALLOC;
- bcopy(addr, cbp->bio_data, real_len);
- cbp->bio_offset = real_off;
- cbp->bio_length = real_len;
- cbp->bio_done = gv_plex_done;
- cbp->bio_caller2 = parity->consumer;
- cbp->bio_driver1 = wp;
+ bcopy(addr, cbp->bio_data, wp->length);
wp->parity = cbp;
/*
* When the parity stripe is missing we just write out the data.
*/
} else if (type == REQ_TYPE_NOPARITY) {
- cbp = g_clone_bio(bp);
+ cbp = gv_raid5_clone_bio(bp, original, wp, addr, 1);
if (cbp == NULL)
return (ENOMEM);
- cbp->bio_offset = real_off;
- cbp->bio_length = real_len;
- cbp->bio_data = addr;
- cbp->bio_done = gv_plex_done;
- cbp->bio_caller2 = original->consumer;
- cbp->bio_driver1 = wp;
- GV_ENQUEUE(bp, cbp, pbp);
+ bioq_insert_tail(p->bqueue, cbp);
bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
bq->bp = cbp;
@@ -455,54 +522,33 @@ gv_build_raid5_req(struct gv_plex *p, struct gv_raid5_packet *wp,
*/
} else {
/* Read old parity. */
- cbp = g_clone_bio(bp);
+ cbp = gv_raid5_clone_bio(bp, parity, wp, NULL, 1);
if (cbp == NULL)
return (ENOMEM);
cbp->bio_cmd = BIO_READ;
- cbp->bio_data = g_malloc(real_len, M_WAITOK);
- cbp->bio_cflags |= GV_BIO_MALLOC;
- cbp->bio_offset = real_off;
- cbp->bio_length = real_len;
- cbp->bio_done = gv_plex_done;
- cbp->bio_caller2 = parity->consumer;
- cbp->bio_driver1 = wp;
- GV_ENQUEUE(bp, cbp, pbp);
+ bioq_insert_tail(p->bqueue, cbp);
bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
bq->bp = cbp;
TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
/* Read old data. */
- cbp = g_clone_bio(bp);
+ cbp = gv_raid5_clone_bio(bp, original, wp, NULL, 1);
if (cbp == NULL)
return (ENOMEM);
cbp->bio_cmd = BIO_READ;
- cbp->bio_data = g_malloc(real_len, M_WAITOK);
- cbp->bio_cflags |= GV_BIO_MALLOC;
- cbp->bio_offset = real_off;
- cbp->bio_length = real_len;
- cbp->bio_done = gv_plex_done;
- cbp->bio_caller2 = original->consumer;
- cbp->bio_driver1 = wp;
- GV_ENQUEUE(bp, cbp, pbp);
+ bioq_insert_tail(p->bqueue, cbp);
bq = g_malloc(sizeof(*bq), M_WAITOK | M_ZERO);
bq->bp = cbp;
TAILQ_INSERT_TAIL(&wp->bits, bq, queue);
/* Write new data. */
- cbp = g_clone_bio(bp);
+ cbp = gv_raid5_clone_bio(bp, original, wp, addr, 1);
if (cbp == NULL)
return (ENOMEM);
- cbp->bio_data = addr;
- cbp->bio_offset = real_off;
- cbp->bio_length = real_len;
- cbp->bio_done = gv_plex_done;
- cbp->bio_caller2 = original->consumer;
-
- cbp->bio_driver1 = wp;
/*
* We must not write the new data until the old data
@@ -512,16 +558,9 @@ gv_build_raid5_req(struct gv_plex *p, struct gv_raid5_packet *wp,
wp->waiting = cbp;
/* The final bio for the parity. */
- cbp = g_clone_bio(bp);
+ cbp = gv_raid5_clone_bio(bp, parity, wp, NULL, 1);
if (cbp == NULL)
return (ENOMEM);
- cbp->bio_data = g_malloc(real_len, M_WAITOK | M_ZERO);
- cbp->bio_cflags |= GV_BIO_MALLOC;
- cbp->bio_offset = real_off;
- cbp->bio_length = real_len;
- cbp->bio_done = gv_plex_done;
- cbp->bio_caller2 = parity->consumer;
- cbp->bio_driver1 = wp;
/* Remember that this is the BIO for the parity data. */
wp->parity = cbp;
@@ -535,21 +574,36 @@ gv_build_raid5_req(struct gv_plex *p, struct gv_raid5_packet *wp,
return (0);
}
-/* Calculate the offsets in the various subdisks for a RAID5 request. */
-int
+/*
+ * Calculate the offsets in the various subdisks for a RAID5 request. Also take
+ * care of new subdisks in an expanded RAID5 array.
+ * XXX: This assumes that the new subdisks are inserted after the others (which
+ * is okay as long as plex_offset is larger). If subdisks are inserted into the
+ * plexlist before, we get problems.
+ */
+static int
gv_raid5_offset(struct gv_plex *p, off_t boff, off_t bcount, off_t *real_off,
- off_t *real_len, int *sdno, int *psdno)
+ off_t *real_len, int *sdno, int *psdno, int growing)
{
- int sd, psd;
+ struct gv_sd *s;
+ int sd, psd, sdcount;
off_t len_left, stripeend, stripeoff, stripestart;
+ sdcount = p->sdcount;
+ if (growing) {
+ LIST_FOREACH(s, &p->subdisks, in_plex) {
+ if (s->flags & GV_SD_GROW)
+ sdcount--;
+ }
+ }
+
/* The number of the subdisk containing the parity stripe. */
- psd = p->sdcount - 1 - ( boff / (p->stripesize * (p->sdcount - 1))) %
- p->sdcount;
+ psd = sdcount - 1 - ( boff / (p->stripesize * (sdcount - 1))) %
+ sdcount;
KASSERT(psdno >= 0, ("gv_raid5_offset: psdno < 0"));
/* Offset of the start address from the start of the stripe. */
- stripeoff = boff % (p->stripesize * (p->sdcount - 1));
+ stripeoff = boff % (p->stripesize * (sdcount - 1));
KASSERT(stripeoff >= 0, ("gv_raid5_offset: stripeoff < 0"));
/* The number of the subdisk where the stripe resides. */
@@ -561,7 +615,7 @@ gv_raid5_offset(struct gv_plex *p, off_t boff, off_t bcount, off_t *real_off,
sd++;
/* The offset of the stripe on this subdisk. */
- stripestart = (boff - stripeoff) / (p->sdcount - 1);
+ stripestart = (boff - stripeoff) / (sdcount - 1);
KASSERT(stripestart >= 0, ("gv_raid5_offset: stripestart < 0"));
stripeoff %= p->stripesize;
@@ -582,3 +636,27 @@ gv_raid5_offset(struct gv_plex *p, off_t boff, off_t bcount, off_t *real_off,
return (0);
}
+
+static struct bio *
+gv_raid5_clone_bio(struct bio *bp, struct gv_sd *s, struct gv_raid5_packet *wp,
+ caddr_t addr, int use_wp)
+{
+ struct bio *cbp;
+
+ cbp = g_clone_bio(bp);
+ if (cbp == NULL)
+ return (NULL);
+ if (addr == NULL) {
+ cbp->bio_data = g_malloc(wp->length, M_WAITOK | M_ZERO);
+ cbp->bio_cflags |= GV_BIO_MALLOC;
+ } else
+ cbp->bio_data = addr;
+ cbp->bio_offset = wp->lockbase + s->drive_offset;
+ cbp->bio_length = wp->length;
+ cbp->bio_done = gv_done;
+ cbp->bio_caller1 = s;
+ if (use_wp)
+ cbp->bio_caller2 = wp;
+
+ return (cbp);
+}
diff --git a/sys/geom/vinum/geom_vinum_raid5.h b/sys/geom/vinum/geom_vinum_raid5.h
index 804920e..d7d55b2 100644
--- a/sys/geom/vinum/geom_vinum_raid5.h
+++ b/sys/geom/vinum/geom_vinum_raid5.h
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2004 Lukas Ertl
+ * Copyright (c) 2004, 2007 Lukas Ertl
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -35,26 +35,10 @@
* transaction (read or write).
*/
-#define GV_ENQUEUE(bp, cbp, pbp) \
- do { \
- if (bp->bio_driver1 == NULL) { \
- bp->bio_driver1 = cbp; \
- } else { \
- pbp = bp->bio_driver1; \
- while (pbp->bio_caller1 != NULL) \
- pbp = pbp->bio_caller1; \
- pbp->bio_caller1 = cbp; \
- } \
- } while (0)
-
struct gv_raid5_packet {
caddr_t data; /* Data buffer of this sub-request- */
off_t length; /* Size of data buffer. */
off_t lockbase; /* Deny access to our plex offset. */
- off_t offset; /* The drive offset of the subdisk. */
- int bufmalloc; /* Flag if data buffer was malloced. */
- int active; /* Count of active subrequests. */
- int rqcount; /* Count of subrequests. */
struct bio *bio; /* Pointer to the original bio. */
struct bio *parity; /* The bio containing the parity data. */
@@ -64,14 +48,8 @@ struct gv_raid5_packet {
TAILQ_ENTRY(gv_raid5_packet) list; /* Entry in plex's packet list. */
};
+struct gv_raid5_packet * gv_raid5_start(struct gv_plex *, struct bio *,
+ caddr_t, off_t, off_t);
int gv_stripe_active(struct gv_plex *, struct bio *);
-int gv_build_raid5_req(struct gv_plex *, struct gv_raid5_packet *,
- struct bio *, caddr_t, off_t, off_t);
-int gv_check_raid5(struct gv_plex *, struct gv_raid5_packet *,
- struct bio *, caddr_t, off_t, off_t);
-int gv_rebuild_raid5(struct gv_plex *, struct gv_raid5_packet *,
- struct bio *, caddr_t, off_t, off_t);
-void gv_raid5_worker(void *);
-void gv_plex_done(struct bio *);
#endif /* !_GEOM_VINUM_RAID5_H_ */
diff --git a/sys/geom/vinum/geom_vinum_rename.c b/sys/geom/vinum/geom_vinum_rename.c
index ee5fc9c..53c173f 100644
--- a/sys/geom/vinum/geom_vinum_rename.c
+++ b/sys/geom/vinum/geom_vinum_rename.c
@@ -34,22 +34,11 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/libkern.h>
-#include <sys/kernel.h>
#include <sys/malloc.h>
#include <geom/geom.h>
#include <geom/vinum/geom_vinum_var.h>
#include <geom/vinum/geom_vinum.h>
-#include <geom/vinum/geom_vinum_share.h>
-
-static int gv_rename_drive(struct gv_softc *, struct gctl_req *,
- struct gv_drive *, char *, int);
-static int gv_rename_plex(struct gv_softc *, struct gctl_req *,
- struct gv_plex *, char *, int);
-static int gv_rename_sd(struct gv_softc *, struct gctl_req *,
- struct gv_sd *, char *, int);
-static int gv_rename_vol(struct gv_softc *, struct gctl_req *,
- struct gv_volume *, char *, int);
void
gv_rename(struct g_geom *gp, struct gctl_req *req)
@@ -59,8 +48,8 @@ gv_rename(struct g_geom *gp, struct gctl_req *req)
struct gv_plex *p;
struct gv_sd *s;
struct gv_drive *d;
- char *newname, *object;
- int err, *flags, type;
+ char *newname, *object, *name;
+ int *flags, type;
sc = gp->softc;
@@ -90,9 +79,9 @@ gv_rename(struct g_geom *gp, struct gctl_req *req)
gctl_error(req, "unknown volume '%s'", object);
return;
}
- err = gv_rename_vol(sc, req, v, newname, *flags);
- if (err)
- return;
+ name = g_malloc(GV_MAXVOLNAME, M_WAITOK | M_ZERO);
+ strlcpy(name, newname, GV_MAXVOLNAME);
+ gv_post_event(sc, GV_EVENT_RENAME_VOL, v, name, *flags, 0);
break;
case GV_TYPE_PLEX:
p = gv_find_plex(sc, object);
@@ -100,9 +89,9 @@ gv_rename(struct g_geom *gp, struct gctl_req *req)
gctl_error(req, "unknown plex '%s'", object);
return;
}
- err = gv_rename_plex(sc, req, p, newname, *flags);
- if (err)
- return;
+ name = g_malloc(GV_MAXPLEXNAME, M_WAITOK | M_ZERO);
+ strlcpy(name, newname, GV_MAXPLEXNAME);
+ gv_post_event(sc, GV_EVENT_RENAME_PLEX, p, name, *flags, 0);
break;
case GV_TYPE_SD:
s = gv_find_sd(sc, object);
@@ -110,9 +99,9 @@ gv_rename(struct g_geom *gp, struct gctl_req *req)
gctl_error(req, "unknown subdisk '%s'", object);
return;
}
- err = gv_rename_sd(sc, req, s, newname, *flags);
- if (err)
- return;
+ name = g_malloc(GV_MAXSDNAME, M_WAITOK | M_ZERO);
+ strlcpy(name, newname, GV_MAXSDNAME);
+ gv_post_event(sc, GV_EVENT_RENAME_SD, s, name, *flags, 0);
break;
case GV_TYPE_DRIVE:
d = gv_find_drive(sc, object);
@@ -120,122 +109,88 @@ gv_rename(struct g_geom *gp, struct gctl_req *req)
gctl_error(req, "unknown drive '%s'", object);
return;
}
- err = gv_rename_drive(sc, req, d, newname, *flags);
- if (err)
- return;
+ name = g_malloc(GV_MAXDRIVENAME, M_WAITOK | M_ZERO);
+ strlcpy(name, newname, GV_MAXDRIVENAME);
+ gv_post_event(sc, GV_EVENT_RENAME_DRIVE, d, name, *flags, 0);
break;
default:
gctl_error(req, "unknown object '%s'", object);
return;
}
-
- gv_save_config_all(sc);
}
-static int
-gv_rename_drive(struct gv_softc *sc, struct gctl_req *req, struct gv_drive *d, char *newname, int flags)
+int
+gv_rename_drive(struct gv_softc *sc, struct gv_drive *d, char *newname,
+ int flags)
{
struct gv_sd *s;
g_topology_assert();
KASSERT(d != NULL, ("gv_rename_drive: NULL d"));
- if (gv_object_type(sc, newname) != -1) {
- gctl_error(req, "drive name '%s' already in use", newname);
- return (-1);
+ if (gv_object_type(sc, newname) != GV_ERR_NOTFOUND) {
+ G_VINUM_DEBUG(1, "drive name '%s' already in use", newname);
+ return (GV_ERR_NAMETAKEN);
}
- strncpy(d->name, newname, GV_MAXDRIVENAME);
- strncpy(d->hdr->label.name, newname, GV_MAXDRIVENAME);
-
- /* XXX can we rename providers here? */
+ strlcpy(d->name, newname, sizeof(d->name));
+ if (d->hdr != NULL)
+ strlcpy(d->hdr->label.name, newname, sizeof(d->hdr->label.name));
LIST_FOREACH(s, &d->subdisks, from_drive)
- strncpy(s->drive, d->name, GV_MAXDRIVENAME);
+ strlcpy(s->drive, d->name, sizeof(s->drive));
return (0);
}
-static int
-gv_rename_plex(struct gv_softc *sc, struct gctl_req *req, struct gv_plex *p, char *newname, int flags)
+int
+gv_rename_plex(struct gv_softc *sc, struct gv_plex *p, char *newname, int flags)
{
+ char newsd[GV_MAXSDNAME];
struct gv_sd *s;
- char *plexnum, *plexnump, *oldplex, *oldplexp;
- char *newsd, *oldsd, *oldsdp;
+ char *ptr;
int err;
g_topology_assert();
KASSERT(p != NULL, ("gv_rename_plex: NULL p"));
- err = 0;
-
- if (gv_object_type(sc, newname) != -1) {
- gctl_error(req, "plex name '%s' already in use", newname);
- return (-1);
+ if (gv_object_type(sc, newname) != GV_ERR_NOTFOUND) {
+ G_VINUM_DEBUG(1, "plex name '%s' already in use", newname);
+ return (GV_ERR_NAMETAKEN);
}
- /* Needed for sanity checking. */
- plexnum = g_malloc(GV_MAXPLEXNAME, M_WAITOK | M_ZERO);
- strncpy(plexnum, newname, GV_MAXPLEXNAME);
- plexnump = plexnum;
-
- oldplex = g_malloc(GV_MAXPLEXNAME, M_WAITOK | M_ZERO);
- strncpy(oldplex, p->name, GV_MAXPLEXNAME);
- oldplexp = oldplex;
-
/*
* Locate the plex number part of the plex names.
- *
- * XXX: can we be sure that the current plex name has the format
- * 'foo.pX'?
+ * XXX: might be a good idea to sanitize input a bit more
*/
- strsep(&oldplexp, ".");
- strsep(&plexnump, ".");
- if (plexnump == NULL || *plexnump == '\0') {
- gctl_error(req, "proposed plex name '%s' is not a valid plex "
+ ptr = strrchr(newname, '.');
+ if (ptr == NULL) {
+ G_VINUM_DEBUG(0, "proposed plex name '%s' is not a valid plex "
"name", newname);
- err = -1;
- goto failure;
+ return (GV_ERR_INVNAME);
}
- if (strcmp(oldplexp, plexnump)) {
- gctl_error(req, "current and proposed plex numbers (%s, %s) "
- "do not match", plexnump, oldplexp);
- err = -1;
- goto failure;
- }
-
- strncpy(p->name, newname, GV_MAXPLEXNAME);
- /* XXX can we rename providers here? */
+ strlcpy(p->name, newname, sizeof(p->name));
/* Fix up references and potentially rename subdisks. */
LIST_FOREACH(s, &p->subdisks, in_plex) {
- strncpy(s->plex, p->name, GV_MAXPLEXNAME);
+ strlcpy(s->plex, p->name, sizeof(s->plex));
if (flags && GV_FLAG_R) {
- newsd = g_malloc(GV_MAXSDNAME, M_WAITOK | M_ZERO);
- oldsd = g_malloc(GV_MAXSDNAME, M_WAITOK | M_ZERO);
- oldsdp = oldsd;
- strncpy(oldsd, s->name, GV_MAXSDNAME);
/*
- * XXX: can we be sure that the current sd name has the
- * format 'foo.pX.sY'?
+ * Look for the two last dots in the string, and assume
+ * that the old value was ok.
*/
- strsep(&oldsdp, ".");
- strsep(&oldsdp, ".");
- snprintf(newsd, GV_MAXSDNAME, "%s.%s", p->name, oldsdp);
- err = gv_rename_sd(sc, req, s, newsd, flags);
- g_free(newsd);
- g_free(oldsd);
+ ptr = strrchr(s->name, '.');
+ if (ptr == NULL)
+ return (GV_ERR_INVNAME);
+ ptr++;
+ snprintf(newsd, sizeof(newsd), "%s.%s", p->name, ptr);
+ err = gv_rename_sd(sc, s, newsd, flags);
if (err)
- goto failure;
+ return (err);
}
}
-
-failure:
- g_free(plexnum);
- g_free(oldplex);
-
- return (err);
+ return (0);
}
/*
@@ -243,106 +198,64 @@ failure:
* since there are no structures below a subdisk. Similarly, we don't have to
* clean up any references elsewhere to the subdisk's name.
*/
-static int
-gv_rename_sd(struct gv_softc *sc, struct gctl_req *req, struct gv_sd *s, char * newname, int flags)
+int
+gv_rename_sd(struct gv_softc *sc, struct gv_sd *s, char *newname, int flags)
{
- char *new, *newp, *old, *oldp;
- int err;
+ char *dot1, *dot2;
g_topology_assert();
KASSERT(s != NULL, ("gv_rename_sd: NULL s"));
- err = 0;
-
- if (gv_object_type(sc, newname) != -1) {
- gctl_error(req, "subdisk name %s already in use", newname);
- return (-1);
+ if (gv_object_type(sc, newname) != GV_ERR_NOTFOUND) {
+ G_VINUM_DEBUG(1, "subdisk name %s already in use", newname);
+ return (GV_ERR_NAMETAKEN);
}
- /* Needed for sanity checking. */
- new = g_malloc(GV_MAXSDNAME, M_WAITOK | M_ZERO);
- strncpy(new, newname, GV_MAXSDNAME);
- newp = new;
-
- old = g_malloc(GV_MAXSDNAME, M_WAITOK | M_ZERO);
- strncpy(old, s->name, GV_MAXSDNAME);
- oldp = old;
-
- /*
- * Locate the sd number part of the sd names.
- *
- * XXX: can we be sure that the current sd name has the format
- * 'foo.pX.sY'?
- */
- strsep(&oldp, ".");
- strsep(&oldp, ".");
- strsep(&newp, ".");
- if (newp == NULL || *newp == '\0') {
- gctl_error(req, "proposed sd name '%s' is not a valid sd name",
+ /* Locate the sd number part of the sd names. */
+ dot1 = strchr(newname, '.');
+ if (dot1 == NULL || (dot2 = strchr(dot1 + 1, '.')) == NULL) {
+ G_VINUM_DEBUG(0, "proposed sd name '%s' is not a valid sd name",
newname);
- err = -1;
- goto fail;
+ return (GV_ERR_INVNAME);
}
- strsep(&newp, ".");
- if (newp == NULL || *newp == '\0') {
- gctl_error(req, "proposed sd name '%s' is not a valid sd name",
- newname);
- err = -1;
- goto fail;
- }
- if (strcmp(newp, oldp)) {
- gctl_error(req, "current and proposed sd numbers (%s, %s) do "
- "not match", oldp, newp);
- err = -1;
- goto fail;
- }
-
- strncpy(s->name, newname, GV_MAXSDNAME);
-
- /* XXX: can we rename providers here? */
-
-fail:
- g_free(new);
- g_free(old);
-
- return (err);
+ strlcpy(s->name, newname, sizeof(s->name));
+ return (0);
}
-static int
-gv_rename_vol(struct gv_softc *sc, struct gctl_req *req, struct gv_volume *v, char *newname, int flags)
+int
+gv_rename_vol(struct gv_softc *sc, struct gv_volume *v, char *newname,
+ int flags)
{
+ struct g_provider *pp;
struct gv_plex *p;
- char *new, *old, *oldp;
+ char newplex[GV_MAXPLEXNAME], *ptr;
int err;
g_topology_assert();
KASSERT(v != NULL, ("gv_rename_vol: NULL v"));
+ pp = v->provider;
+ KASSERT(pp != NULL, ("gv_rename_vol: NULL pp"));
- if (gv_object_type(sc, newname) != -1) {
- gctl_error(req, "volume name %s already in use", newname);
- return (-1);
+ if (gv_object_type(sc, newname) != GV_ERR_NOTFOUND) {
+ G_VINUM_DEBUG(1, "volume name %s already in use", newname);
+ return (GV_ERR_NAMETAKEN);
}
/* Rename the volume. */
- strncpy(v->name, newname, GV_MAXVOLNAME);
+ strlcpy(v->name, newname, sizeof(v->name));
/* Fix up references and potentially rename plexes. */
LIST_FOREACH(p, &v->plexes, in_volume) {
- strncpy(p->volume, v->name, GV_MAXVOLNAME);
+ strlcpy(p->volume, v->name, sizeof(p->volume));
if (flags && GV_FLAG_R) {
- new = g_malloc(GV_MAXPLEXNAME, M_WAITOK | M_ZERO);
- old = g_malloc(GV_MAXPLEXNAME, M_WAITOK | M_ZERO);
- oldp = old;
- strncpy(old, p->name, GV_MAXPLEXNAME);
/*
- * XXX: can we be sure that the current plex name has
- * the format 'foo.pX'?
+ * Look for the last dot in the string, and assume that
+ * the old value was ok.
*/
- strsep(&oldp, ".");
- snprintf(new, GV_MAXPLEXNAME, "%s.%s", v->name, oldp);
- err = gv_rename_plex(sc, req, p, new, flags);
- g_free(new);
- g_free(old);
+ ptr = strrchr(p->name, '.');
+ ptr++;
+ snprintf(newplex, sizeof(newplex), "%s.%s", v->name, ptr);
+ err = gv_rename_plex(sc, p, newplex, flags);
if (err)
return (err);
}
diff --git a/sys/geom/vinum/geom_vinum_rm.c b/sys/geom/vinum/geom_vinum_rm.c
index d7748da..2b1d36b 100644
--- a/sys/geom/vinum/geom_vinum_rm.c
+++ b/sys/geom/vinum/geom_vinum_rm.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2004 Lukas Ertl
+ * Copyright (c) 2004, 2007 Lukas Ertl
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -30,20 +30,11 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/libkern.h>
-#include <sys/kernel.h>
#include <sys/malloc.h>
#include <geom/geom.h>
#include <geom/vinum/geom_vinum_var.h>
#include <geom/vinum/geom_vinum.h>
-#include <geom/vinum/geom_vinum_share.h>
-
-static int gv_rm_drive(struct gv_softc *, struct gctl_req *,
- struct gv_drive *, int);
-static int gv_rm_plex(struct gv_softc *, struct gctl_req *,
- struct gv_plex *, int);
-static int gv_rm_vol(struct gv_softc *, struct gctl_req *,
- struct gv_volume *, int);
/* General 'remove' routine. */
void
@@ -56,7 +47,7 @@ gv_remove(struct g_geom *gp, struct gctl_req *req)
struct gv_drive *d;
int *argc, *flags;
char *argv, buf[20];
- int i, type, err;
+ int i, type;
argc = gctl_get_paraml(req, "argc", sizeof(*argc));
@@ -73,6 +64,8 @@ gv_remove(struct g_geom *gp, struct gctl_req *req)
sc = gp->softc;
+ /* XXX config locking */
+
for (i = 0; i < *argc; i++) {
snprintf(buf, sizeof(buf), "argv%d", i);
argv = gctl_get_param(req, buf, NULL);
@@ -82,184 +75,173 @@ gv_remove(struct g_geom *gp, struct gctl_req *req)
switch (type) {
case GV_TYPE_VOL:
v = gv_find_vol(sc, argv);
- if (v == NULL) {
- gctl_error(req, "unknown volume '%s'", argv);
+
+ /*
+ * If this volume has plexes, we want a recursive
+ * removal.
+ */
+ if (!LIST_EMPTY(&v->plexes) && !(*flags & GV_FLAG_R)) {
+ gctl_error(req, "volume '%s' has attached "
+ "plexes - need recursive removal", v->name);
return;
}
- err = gv_rm_vol(sc, req, v, *flags);
- if (err)
- return;
+
+ gv_post_event(sc, GV_EVENT_RM_VOLUME, v, NULL, 0, 0);
break;
+
case GV_TYPE_PLEX:
p = gv_find_plex(sc, argv);
- if (p == NULL) {
- gctl_error(req, "unknown plex '%s'", argv);
+
+ /*
+ * If this plex has subdisks, we want a recursive
+ * removal.
+ */
+ if (!LIST_EMPTY(&p->subdisks) &&
+ !(*flags & GV_FLAG_R)) {
+ gctl_error(req, "plex '%s' has attached "
+ "subdisks - need recursive removal",
+ p->name);
return;
}
- err = gv_rm_plex(sc, req, p, *flags);
- if (err)
+
+ /* Don't allow removal of the only plex of a volume. */
+ if (p->vol_sc != NULL && p->vol_sc->plexcount == 1) {
+ gctl_error(req, "plex '%s' is still attached "
+ "to volume '%s'", p->name, p->volume);
return;
+ }
+
+ gv_post_event(sc, GV_EVENT_RM_PLEX, p, NULL, 0, 0);
break;
+
case GV_TYPE_SD:
s = gv_find_sd(sc, argv);
- if (s == NULL) {
- gctl_error(req, "unknown subdisk '%s'", argv);
+
+ /* Don't allow removal if attached to a plex. */
+ if (s->plex_sc != NULL) {
+ gctl_error(req, "subdisk '%s' is still attached"
+ " to plex '%s'", s->name, s->plex_sc->name);
return;
}
- err = gv_rm_sd(sc, req, s, *flags);
- if (err)
- return;
+
+ gv_post_event(sc, GV_EVENT_RM_SD, s, NULL, 0, 0);
break;
+
case GV_TYPE_DRIVE:
d = gv_find_drive(sc, argv);
- if (d == NULL) {
- gctl_error(req, "unknown drive '%s'", argv);
+ /* We don't allow to remove open drives. */
+ if (gv_consumer_is_open(d->consumer) &&
+ !(*flags & GV_FLAG_F)) {
+ gctl_error(req, "drive '%s' is open", d->name);
return;
}
- err = gv_rm_drive(sc, req, d, *flags);
- if (err)
+
+ /* A drive with subdisks needs a recursive removal. */
+/* if (!LIST_EMPTY(&d->subdisks) &&
+ !(*flags & GV_FLAG_R)) {
+ gctl_error(req, "drive '%s' still has subdisks"
+ " - need recursive removal", d->name);
return;
+ }*/
+
+ gv_post_event(sc, GV_EVENT_RM_DRIVE, d, NULL, *flags,
+ 0);
break;
+
default:
gctl_error(req, "unknown object '%s'", argv);
return;
}
}
- gv_save_config_all(sc);
+ gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
}
/* Resets configuration */
int
-gv_resetconfig(struct g_geom *gp, struct gctl_req *req)
+gv_resetconfig(struct gv_softc *sc)
{
- struct gv_softc *sc;
struct gv_drive *d, *d2;
struct gv_volume *v, *v2;
struct gv_plex *p, *p2;
struct gv_sd *s, *s2;
- int flags;
-
- d = NULL;
- d2 = NULL;
- p = NULL;
- p2 = NULL;
- s = NULL;
- s2 = NULL;
- flags = GV_FLAG_R;
- sc = gp->softc;
- /* First loop through to make sure no volumes are up */
- LIST_FOREACH_SAFE(v, &sc->volumes, volume, v2) {
- if (gv_is_open(v->geom)) {
- gctl_error(req, "volume '%s' is busy", v->name);
- return (-1);
+
+ /* First make sure nothing is open. */
+ LIST_FOREACH_SAFE(d, &sc->drives, drive, d2) {
+ if (gv_consumer_is_open(d->consumer)) {
+ return (GV_ERR_ISBUSY);
}
}
/* Then if not, we remove everything. */
- LIST_FOREACH_SAFE(v, &sc->volumes, volume, v2)
- gv_rm_vol(sc, req, v, flags);
- LIST_FOREACH_SAFE(p, &sc->plexes, plex, p2)
- gv_rm_plex(sc, req, p, flags);
LIST_FOREACH_SAFE(s, &sc->subdisks, sd, s2)
- gv_rm_sd(sc, req, s, flags);
+ gv_rm_sd(sc, s);
LIST_FOREACH_SAFE(d, &sc->drives, drive, d2)
- gv_rm_drive(sc, req, d, flags);
- gv_save_config_all(sc);
+ gv_rm_drive(sc, d, 0);
+ LIST_FOREACH_SAFE(p, &sc->plexes, plex, p2)
+ gv_rm_plex(sc, p);
+ LIST_FOREACH_SAFE(v, &sc->volumes, volume, v2)
+ gv_rm_vol(sc, v);
+
+ gv_post_event(sc, GV_EVENT_SAVE_CONFIG, sc, NULL, 0, 0);
+
return (0);
}
/* Remove a volume. */
-static int
-gv_rm_vol(struct gv_softc *sc, struct gctl_req *req, struct gv_volume *v, int flags)
+void
+gv_rm_vol(struct gv_softc *sc, struct gv_volume *v)
{
- struct g_geom *gp;
+ struct g_provider *pp;
struct gv_plex *p, *p2;
- int err;
- g_topology_assert();
KASSERT(v != NULL, ("gv_rm_vol: NULL v"));
-
- /* If this volume has plexes, we want a recursive removal. */
- if (!LIST_EMPTY(&v->plexes) && !(flags & GV_FLAG_R)) {
- gctl_error(req, "volume '%s' has attached plexes", v->name);
- return (-1);
- }
-
- gp = v->geom;
+ pp = v->provider;
+ KASSERT(pp != NULL, ("gv_rm_vol: NULL pp"));
/* Check if any of our consumers is open. */
- if (gp != NULL && gv_is_open(gp)) {
- gctl_error(req, "volume '%s' is busy", v->name);
- return (-1);
+ if (gv_provider_is_open(pp)) {
+ G_VINUM_DEBUG(0, "Unable to remove %s: volume still in use",
+ v->name);
+ return;
}
/* Remove the plexes our volume has. */
- LIST_FOREACH_SAFE(p, &v->plexes, in_volume, p2) {
- v->plexcount--;
- LIST_REMOVE(p, in_volume);
- p->vol_sc = NULL;
-
- err = gv_rm_plex(sc, req, p, flags);
- if (err)
- return (err);
- }
+ LIST_FOREACH_SAFE(p, &v->plexes, in_volume, p2)
+ gv_rm_plex(sc, p);
- /* Clean up and let our geom fade away. */
+ /* Clean up. */
LIST_REMOVE(v, volume);
- gv_kill_vol_thread(v);
g_free(v);
- if (gp != NULL) {
- gp->softc = NULL;
- g_wither_geom(gp, ENXIO);
- }
- return (0);
+ /* Get rid of the volume's provider. */
+ if (pp != NULL) {
+ g_topology_lock();
+ pp->flags |= G_PF_WITHER;
+ g_orphan_provider(pp, ENXIO);
+ g_topology_unlock();
+ }
}
/* Remove a plex. */
-static int
-gv_rm_plex(struct gv_softc *sc, struct gctl_req *req, struct gv_plex *p, int flags)
+void
+gv_rm_plex(struct gv_softc *sc, struct gv_plex *p)
{
- struct g_geom *gp;
struct gv_volume *v;
struct gv_sd *s, *s2;
- int err;
-
- g_topology_assert();
KASSERT(p != NULL, ("gv_rm_plex: NULL p"));
-
- /* If this plex has subdisks, we want a recursive removal. */
- if (!LIST_EMPTY(&p->subdisks) && !(flags & GV_FLAG_R)) {
- gctl_error(req, "plex '%s' has attached subdisks", p->name);
- return (-1);
- }
-
- if (p->vol_sc != NULL && p->vol_sc->plexcount == 1) {
- gctl_error(req, "plex '%s' is still attached to volume '%s'",
- p->name, p->volume);
- return (-1);
- }
-
- gp = p->geom;
+ v = p->vol_sc;
/* Check if any of our consumers is open. */
- if (gp != NULL && gv_is_open(gp)) {
- gctl_error(req, "plex '%s' is busy", p->name);
- return (-1);
+ if (v != NULL && gv_provider_is_open(v->provider) && v->plexcount < 2) {
+ G_VINUM_DEBUG(0, "Unable to remove %s: volume still in use",
+ p->name);
+ return;
}
/* Remove the subdisks our plex has. */
- LIST_FOREACH_SAFE(s, &p->subdisks, in_plex, s2) {
-#if 0
- LIST_REMOVE(s, in_plex);
- s->plex_sc = NULL;
-#endif
-
- err = gv_rm_sd(sc, req, s, flags);
- if (err)
- return (err);
- }
+ LIST_FOREACH_SAFE(s, &p->subdisks, in_plex, s2)
+ gv_rm_sd(sc, s);
v = p->vol_sc;
/* Clean up and let our geom fade away. */
@@ -272,35 +254,25 @@ gv_rm_plex(struct gv_softc *sc, struct gctl_req *req, struct gv_plex *p, int fla
gv_update_vol_size(v, gv_vol_size(v));
}
- gv_kill_plex_thread(p);
g_free(p);
-
- if (gp != NULL) {
- gp->softc = NULL;
- g_wither_geom(gp, ENXIO);
- }
-
- return (0);
}
/* Remove a subdisk. */
-int
-gv_rm_sd(struct gv_softc *sc, struct gctl_req *req, struct gv_sd *s, int flags)
+void
+gv_rm_sd(struct gv_softc *sc, struct gv_sd *s)
{
- struct g_provider *pp;
struct gv_plex *p;
struct gv_volume *v;
KASSERT(s != NULL, ("gv_rm_sd: NULL s"));
- pp = s->provider;
p = s->plex_sc;
v = NULL;
/* Clean up. */
if (p != NULL) {
LIST_REMOVE(s, in_plex);
-
+ s->plex_sc = NULL;
p->sdcount--;
/* Update the plexsize. */
p->size = gv_plex_size(p);
@@ -310,77 +282,64 @@ gv_rm_sd(struct gv_softc *sc, struct gctl_req *req, struct gv_sd *s, int flags)
gv_update_vol_size(v, gv_vol_size(v));
}
}
- if (s->drive_sc)
+ if (s->drive_sc && !(s->drive_sc->flags & GV_DRIVE_REFERENCED))
LIST_REMOVE(s, from_drive);
LIST_REMOVE(s, sd);
gv_free_sd(s);
g_free(s);
-
- /* If the subdisk has a provider we need to clean up this one too. */
- if (pp != NULL) {
- pp->flags |= G_PF_WITHER;
- g_orphan_provider(pp, ENXIO);
- }
-
- return (0);
}
/* Remove a drive. */
-static int
-gv_rm_drive(struct gv_softc *sc, struct gctl_req *req, struct gv_drive *d, int flags)
+void
+gv_rm_drive(struct gv_softc *sc, struct gv_drive *d, int flags)
{
- struct g_geom *gp;
struct g_consumer *cp;
struct gv_freelist *fl, *fl2;
struct gv_plex *p;
struct gv_sd *s, *s2;
struct gv_volume *v;
+ struct gv_drive *d2;
int err;
KASSERT(d != NULL, ("gv_rm_drive: NULL d"));
- gp = d->geom;
- KASSERT(gp != NULL, ("gv_rm_drive: NULL gp"));
- /* We don't allow to remove open drives. */
- if (gv_is_open(gp)) {
- gctl_error(req, "drive '%s' is open", d->name);
- return (-1);
- }
+ cp = d->consumer;
- /* A drive with subdisks needs a recursive removal. */
- if (!LIST_EMPTY(&d->subdisks) && !(flags & GV_FLAG_R)) {
- gctl_error(req, "drive '%s' still has subdisks", d->name);
- return (-1);
- }
+ if (cp != NULL) {
+ g_topology_lock();
+ err = g_access(cp, 0, 1, 0);
+ g_topology_unlock();
- cp = LIST_FIRST(&gp->consumer);
- err = g_access(cp, 0, 1, 0);
- if (err) {
- G_VINUM_DEBUG(0, "%s: unable to access '%s', errno: "
- "%d", __func__, cp->provider->name, err);
- return (err);
- }
+ if (err) {
+ G_VINUM_DEBUG(0, "%s: couldn't access '%s', "
+ "errno: %d", __func__, cp->provider->name, err);
+ return;
+ }
- /* Clear the Vinum Magic. */
- d->hdr->magic = GV_NOMAGIC;
- g_topology_unlock();
- err = gv_write_header(cp, d->hdr);
- if (err) {
- G_VINUM_DEBUG(0, "%s: unable to write header to '%s'"
- ", errno: %d", __func__, cp->provider->name, err);
- d->hdr->magic = GV_MAGIC;
+ /* Clear the Vinum Magic. */
+ d->hdr->magic = GV_NOMAGIC;
+ err = gv_write_header(cp, d->hdr);
+ if (err)
+ G_VINUM_DEBUG(0, "gv_rm_drive: couldn't write header to"
+ " '%s', errno: %d", cp->provider->name, err);
+
+ g_topology_lock();
+ g_access(cp, -cp->acr, -cp->acw, -cp->ace);
+ g_detach(cp);
+ g_destroy_consumer(cp);
+ g_topology_unlock();
}
- g_topology_lock();
- g_access(cp, 0, -1, 0);
/* Remove all associated subdisks, plexes, volumes. */
- if (!LIST_EMPTY(&d->subdisks)) {
- LIST_FOREACH_SAFE(s, &d->subdisks, from_drive, s2) {
- p = s->plex_sc;
- if (p != NULL) {
- v = p->vol_sc;
- if (v != NULL)
- gv_rm_vol(sc, req, v, flags);
+ if (flags & GV_FLAG_R) {
+ if (!LIST_EMPTY(&d->subdisks)) {
+ LIST_FOREACH_SAFE(s, &d->subdisks, from_drive, s2) {
+ p = s->plex_sc;
+ if (p != NULL) {
+ v = p->vol_sc;
+ if (v != NULL)
+ gv_rm_vol(sc, v);
+ }
}
}
}
@@ -390,15 +349,33 @@ gv_rm_drive(struct gv_softc *sc, struct gctl_req *req, struct gv_drive *d, int f
LIST_REMOVE(fl, freelist);
g_free(fl);
}
- LIST_REMOVE(d, drive);
- gv_kill_drive_thread(d);
- gp = d->geom;
- d->geom = NULL;
+ LIST_REMOVE(d, drive);
g_free(d->hdr);
+
+ /* Put ourself into referenced state if we have subdisks. */
+ if (d->sdcount > 0) {
+ d->consumer = NULL;
+ d->hdr = NULL;
+ d->flags |= GV_DRIVE_REFERENCED;
+ snprintf(d->device, sizeof(d->device), "???");
+ d->size = 0;
+ d->avail = 0;
+ d->freelist_entries = 0;
+ LIST_FOREACH(s, &d->subdisks, from_drive) {
+ s->flags |= GV_SD_TASTED;
+ gv_set_sd_state(s, GV_SD_DOWN, GV_SETSTATE_FORCE);
+ }
+ /* Shuffle around so we keep gv_is_newer happy. */
+ LIST_REMOVE(d, drive);
+ d2 = LIST_FIRST(&sc->drives);
+ if (d2 == NULL)
+ LIST_INSERT_HEAD(&sc->drives, d, drive);
+ else
+ LIST_INSERT_AFTER(d2, d, drive);
+ return;
+ }
g_free(d);
- gv_save_config_all(sc);
- g_wither_geom(gp, ENXIO);
- return (err);
+ gv_save_config(sc);
}
diff --git a/sys/geom/vinum/geom_vinum_share.c b/sys/geom/vinum/geom_vinum_share.c
index ec97c5e..c3ddd4c 100644
--- a/sys/geom/vinum/geom_vinum_share.c
+++ b/sys/geom/vinum/geom_vinum_share.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2004 Lukas Ertl
+ * Copyright (c) 2004, 2007 Lukas Ertl
* Copyright (c) 1997, 1998, 1999
* Nan Yang Computer Services Limited. All rights reserved.
*
@@ -45,10 +45,6 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#ifdef _KERNEL
-#include <sys/bio.h>
-#include <sys/conf.h>
-#include <sys/kernel.h>
-#include <sys/kthread.h>
#include <sys/malloc.h>
#include <sys/systm.h>
@@ -63,7 +59,6 @@ __FBSDID("$FreeBSD$");
#define g_free free
#endif /* _KERNEL */
-#include <sys/lock.h>
#include <sys/mutex.h>
#include <sys/queue.h>
@@ -237,6 +232,8 @@ gv_sdstatei(char *buf)
return (GV_SD_UP);
else if (!strcmp(buf, "reviving"))
return (GV_SD_REVIVING);
+ else if (!strcmp(buf, "initializing"))
+ return (GV_SD_INITIALIZING);
else if (!strcmp(buf, "stale"))
return (GV_SD_STALE);
else
@@ -273,6 +270,8 @@ gv_plexstatei(char *buf)
return (GV_PLEX_INITIALIZING);
else if (!strcmp(buf, "degraded"))
return (GV_PLEX_DEGRADED);
+ else if (!strcmp(buf, "growable"))
+ return (GV_PLEX_GROWABLE);
else
return (GV_PLEX_DOWN);
}
@@ -288,6 +287,8 @@ gv_plexstate(int state)
return "initializing";
case GV_PLEX_DEGRADED:
return "degraded";
+ case GV_PLEX_GROWABLE:
+ return "growable";
case GV_PLEX_UP:
return "up";
default:
@@ -378,14 +379,13 @@ gv_new_drive(int max, char *token[])
return (NULL);
#ifdef _KERNEL
- d = g_malloc(sizeof(struct gv_drive), M_WAITOK | M_ZERO);
-
+ d = g_malloc(sizeof(struct gv_drive), M_NOWAIT);
#else
d = malloc(sizeof(struct gv_drive));
+#endif
if (d == NULL)
return (NULL);
bzero(d, sizeof(struct gv_drive));
-#endif
errors = 0;
for (j = 1; j < max; j++) {
@@ -406,10 +406,10 @@ gv_new_drive(int max, char *token[])
if (strncmp(ptr, "/dev/", 5) == 0)
ptr += 5;
- strncpy(d->device, ptr, GV_MAXDRIVENAME);
+ strlcpy(d->device, ptr, sizeof(d->device));
} else {
/* We assume this is the drive name. */
- strncpy(d->name, token[j], GV_MAXDRIVENAME);
+ strlcpy(d->name, token[j], sizeof(d->name));
}
}
@@ -435,14 +435,13 @@ gv_new_volume(int max, char *token[])
return (NULL);
#ifdef _KERNEL
- v = g_malloc(sizeof(struct gv_volume), M_WAITOK | M_ZERO);
-
+ v = g_malloc(sizeof(struct gv_volume), M_NOWAIT);
#else
v = malloc(sizeof(struct gv_volume));
+#endif
if (v == NULL)
return (NULL);
bzero(v, sizeof(struct gv_volume));
-#endif
errors = 0;
for (j = 1; j < max; j++) {
@@ -455,7 +454,7 @@ gv_new_volume(int max, char *token[])
v->state = gv_volstatei(token[j]);
} else {
/* We assume this is the volume name. */
- strncpy(v->name, token[j], GV_MAXVOLNAME);
+ strlcpy(v->name, token[j], sizeof(v->name));
}
}
@@ -481,13 +480,13 @@ gv_new_plex(int max, char *token[])
return (NULL);
#ifdef _KERNEL
- p = g_malloc(sizeof(struct gv_plex), M_WAITOK | M_ZERO);
+ p = g_malloc(sizeof(struct gv_plex), M_NOWAIT);
#else
p = malloc(sizeof(struct gv_plex));
+#endif
if (p == NULL)
return (NULL);
bzero(p, sizeof(struct gv_plex));
-#endif
errors = 0;
for (j = 1; j < max; j++) {
@@ -497,7 +496,7 @@ gv_new_plex(int max, char *token[])
errors++;
break;
}
- strncpy(p->name, token[j], GV_MAXPLEXNAME);
+ strlcpy(p->name, token[j], sizeof(p->name));
} else if (!strcmp(token[j], "org")) {
j++;
if (j >= max) {
@@ -532,7 +531,7 @@ gv_new_plex(int max, char *token[])
errors++;
break;
}
- strncpy(p->volume, token[j], GV_MAXVOLNAME);
+ strlcpy(p->volume, token[j], sizeof(p->volume));
} else {
errors++;
break;
@@ -555,16 +554,16 @@ gv_new_sd(int max, char *token[])
int j, errors;
if (token[1] == NULL || *token[1] == '\0')
- return NULL;
+ return (NULL);
#ifdef _KERNEL
- s = g_malloc(sizeof(struct gv_sd), M_WAITOK | M_ZERO);
+ s = g_malloc(sizeof(struct gv_sd), M_NOWAIT);
#else
s = malloc(sizeof(struct gv_sd));
+#endif
if (s == NULL)
- return NULL;
+ return (NULL);
bzero(s, sizeof(struct gv_sd));
-#endif
s->plex_offset = -1;
s->size = -1;
@@ -577,21 +576,21 @@ gv_new_sd(int max, char *token[])
errors++;
break;
}
- strncpy(s->name, token[j], GV_MAXSDNAME);
+ strlcpy(s->name, token[j], sizeof(s->name));
} else if (!strcmp(token[j], "drive")) {
j++;
if (j >= max) {
errors++;
break;
}
- strncpy(s->drive, token[j], GV_MAXDRIVENAME);
+ strlcpy(s->drive, token[j], sizeof(s->drive));
} else if (!strcmp(token[j], "plex")) {
j++;
if (j >= max) {
errors++;
break;
}
- strncpy(s->plex, token[j], GV_MAXPLEXNAME);
+ strlcpy(s->plex, token[j], sizeof(s->plex));
} else if (!strcmp(token[j], "state")) {
j++;
if (j >= max) {
diff --git a/sys/geom/vinum/geom_vinum_share.h b/sys/geom/vinum/geom_vinum_share.h
index f15f45d..9a93968 100644
--- a/sys/geom/vinum/geom_vinum_share.h
+++ b/sys/geom/vinum/geom_vinum_share.h
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2004 Lukas Ertl
+ * Copyright (c) 2004, 2007 Lukas Ertl
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
diff --git a/sys/geom/vinum/geom_vinum_state.c b/sys/geom/vinum/geom_vinum_state.c
index db40390..568c784 100644
--- a/sys/geom/vinum/geom_vinum_state.c
+++ b/sys/geom/vinum/geom_vinum_state.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2004 Lukas Ertl
+ * Copyright (c) 2004, 2007 Lukas Ertl
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -27,8 +27,6 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
-#include <sys/param.h>
-#include <sys/kernel.h>
#include <sys/libkern.h>
#include <sys/malloc.h>
@@ -43,8 +41,10 @@ gv_setstate(struct g_geom *gp, struct gctl_req *req)
struct gv_softc *sc;
struct gv_sd *s;
struct gv_drive *d;
+ struct gv_volume *v;
+ struct gv_plex *p;
char *obj, *state;
- int err, f, *flags, newstate, type;
+ int f, *flags, type;
f = 0;
obj = gctl_get_param(req, "object", NULL);
@@ -72,43 +72,52 @@ gv_setstate(struct g_geom *gp, struct gctl_req *req)
type = gv_object_type(sc, obj);
switch (type) {
case GV_TYPE_VOL:
+ if (gv_volstatei(state) < 0) {
+ gctl_error(req, "invalid volume state '%s'", state);
+ break;
+ }
+ v = gv_find_vol(sc, obj);
+ gv_post_event(sc, GV_EVENT_SET_VOL_STATE, v, NULL,
+ gv_volstatei(state), f);
+ break;
+
case GV_TYPE_PLEX:
- gctl_error(req, "volume or plex state cannot be set currently");
+ if (gv_plexstatei(state) < 0) {
+ gctl_error(req, "invalid plex state '%s'", state);
+ break;
+ }
+ p = gv_find_plex(sc, obj);
+ gv_post_event(sc, GV_EVENT_SET_PLEX_STATE, p, NULL,
+ gv_plexstatei(state), f);
break;
case GV_TYPE_SD:
- newstate = gv_sdstatei(state);
- if (newstate < 0) {
+ if (gv_sdstatei(state) < 0) {
gctl_error(req, "invalid subdisk state '%s'", state);
break;
}
s = gv_find_sd(sc, obj);
- err = gv_set_sd_state(s, newstate, f);
- if (err)
- gctl_error(req, "cannot set subdisk state");
+ gv_post_event(sc, GV_EVENT_SET_SD_STATE, s, NULL,
+ gv_sdstatei(state), f);
break;
case GV_TYPE_DRIVE:
- newstate = gv_drivestatei(state);
- if (newstate < 0) {
+ if (gv_drivestatei(state) < 0) {
gctl_error(req, "invalid drive state '%s'", state);
break;
}
d = gv_find_drive(sc, obj);
- err = gv_set_drive_state(d, newstate, f);
- if (err)
- gctl_error(req, "cannot set drive state");
+ gv_post_event(sc, GV_EVENT_SET_DRIVE_STATE, d, NULL,
+ gv_drivestatei(state), f);
break;
default:
gctl_error(req, "unknown object '%s'", obj);
break;
}
-
- return;
}
-/* Update drive state; return 0 if the state changes, otherwise -1. */
+/* Update drive state; return 0 if the state changes, otherwise error. */
int
gv_set_drive_state(struct gv_drive *d, int newstate, int flags)
{
@@ -123,9 +132,9 @@ gv_set_drive_state(struct gv_drive *d, int newstate, int flags)
return (0);
/* We allow to take down an open drive only with force. */
- if ((newstate == GV_DRIVE_DOWN) && gv_is_open(d->geom) &&
+ if ((newstate == GV_DRIVE_DOWN) && gv_consumer_is_open(d->consumer) &&
(!(flags & GV_SETSTATE_FORCE)))
- return (-1);
+ return (GV_ERR_ISBUSY);
d->state = newstate;
@@ -136,7 +145,7 @@ gv_set_drive_state(struct gv_drive *d, int newstate, int flags)
/* Save the config back to disk. */
if (flags & GV_SETSTATE_CONFIG)
- gv_save_config_all(d->vinumconf);
+ gv_save_config(d->vinumconf);
return (0);
}
@@ -165,14 +174,24 @@ gv_set_sd_state(struct gv_sd *s, int newstate, int flags)
* force.
*/
if ((s->plex_sc != NULL) && !(flags & GV_SETSTATE_FORCE))
- return (-1);
+ return (GV_ERR_ISATTACHED);
+ break;
+
+ case GV_SD_REVIVING:
+ case GV_SD_INITIALIZING:
+ /*
+ * Only do this if we're forced, since it usually is done
+ * internally, and then we do use the force flag.
+ */
+ if (!flags & GV_SETSTATE_FORCE)
+ return (GV_ERR_SETSTATE);
break;
case GV_SD_UP:
/* We can't bring the subdisk up if our drive is dead. */
d = s->drive_sc;
if ((d == NULL) || (d->state != GV_DRIVE_UP))
- return (-1);
+ return (GV_ERR_SETSTATE);
/* Check from where we want to be brought up. */
switch (s->state) {
@@ -201,12 +220,15 @@ gv_set_sd_state(struct gv_sd *s, int newstate, int flags)
if (p->org != GV_PLEX_RAID5)
break;
- else if (flags & GV_SETSTATE_FORCE)
+ else if (s->flags & GV_SD_CANGOUP) {
+ s->flags &= ~GV_SD_CANGOUP;
+ break;
+ } else if (flags & GV_SETSTATE_FORCE)
break;
else
s->state = GV_SD_STALE;
- status = -1;
+ status = GV_ERR_SETSTATE;
break;
case GV_SD_STALE:
@@ -221,21 +243,24 @@ gv_set_sd_state(struct gv_sd *s, int newstate, int flags)
if (p == NULL || flags & GV_SETSTATE_FORCE)
break;
- if ((p->org != GV_PLEX_RAID5) &&
- (p->vol_sc->plexcount == 1))
+ if ((p->org != GV_PLEX_RAID5 &&
+ p->vol_sc->plexcount == 1) ||
+ (p->flags & GV_PLEX_SYNCING &&
+ p->synced > 0 &&
+ p->org == GV_PLEX_RAID5))
break;
else
- return (-1);
+ return (GV_ERR_SETSTATE);
default:
- return (-1);
+ return (GV_ERR_INVSTATE);
}
break;
/* Other state transitions are only possible with force. */
default:
if (!(flags & GV_SETSTATE_FORCE))
- return (-1);
+ return (GV_ERR_SETSTATE);
}
/* We can change the state and do it. */
@@ -248,11 +273,102 @@ gv_set_sd_state(struct gv_sd *s, int newstate, int flags)
/* Save the config back to disk. */
if (flags & GV_SETSTATE_CONFIG)
- gv_save_config_all(s->vinumconf);
+ gv_save_config(s->vinumconf);
return (status);
}
+int
+gv_set_plex_state(struct gv_plex *p, int newstate, int flags)
+{
+ struct gv_volume *v;
+ int oldstate, plexdown;
+
+ KASSERT(p != NULL, ("gv_set_plex_state: NULL p"));
+
+ oldstate = p->state;
+ v = p->vol_sc;
+ plexdown = 0;
+
+ if (newstate == oldstate)
+ return (0);
+
+ switch (newstate) {
+ case GV_PLEX_UP:
+ /* Let update_plex handle if the plex can come up */
+ gv_update_plex_state(p);
+ if (p->state != GV_PLEX_UP && !(flags & GV_SETSTATE_FORCE))
+ return (GV_ERR_SETSTATE);
+ p->state = newstate;
+ break;
+ case GV_PLEX_DOWN:
+ /*
+ * Set state to GV_PLEX_DOWN only if no-one is using the plex,
+ * or if the state is forced.
+ */
+ if (v != NULL) {
+ /* If the only one up, force is needed. */
+ plexdown = gv_plexdown(v);
+ if ((v->plexcount == 1 ||
+ (v->plexcount - plexdown == 1)) &&
+ ((flags & GV_SETSTATE_FORCE) == 0))
+ return (GV_ERR_SETSTATE);
+ }
+ p->state = newstate;
+ break;
+ case GV_PLEX_DEGRADED:
+ /* Only used internally, so we have to be forced. */
+ if (flags & GV_SETSTATE_FORCE)
+ p->state = newstate;
+ break;
+ }
+
+ /* Update our volume if we have one. */
+ if (v != NULL)
+ gv_update_vol_state(v);
+
+ /* Save config. */
+ if (flags & GV_SETSTATE_CONFIG)
+ gv_save_config(p->vinumconf);
+ return (0);
+}
+
+int
+gv_set_vol_state(struct gv_volume *v, int newstate, int flags)
+{
+ int oldstate;
+
+ KASSERT(v != NULL, ("gv_set_vol_state: NULL v"));
+
+ oldstate = v->state;
+
+ if (newstate == oldstate)
+ return (0);
+
+ switch (newstate) {
+ case GV_VOL_UP:
+ /* Let update handle if the volume can come up. */
+ gv_update_vol_state(v);
+ if (v->state != GV_VOL_UP && !(flags & GV_SETSTATE_FORCE))
+ return (GV_ERR_SETSTATE);
+ v->state = newstate;
+ break;
+ case GV_VOL_DOWN:
+ /*
+ * Set state to GV_VOL_DOWN only if no-one is using the volume,
+ * or if the state should be forced.
+ */
+ if (!gv_provider_is_open(v->provider) &&
+ !(flags & GV_SETSTATE_FORCE))
+ return (GV_ERR_ISBUSY);
+ v->state = newstate;
+ break;
+ }
+ /* Save config */
+ if (flags & GV_SETSTATE_CONFIG)
+ gv_save_config(v->vinumconf);
+ return (0);
+}
/* Update the state of a subdisk based on its environment. */
void
@@ -268,15 +384,19 @@ gv_update_sd_state(struct gv_sd *s)
oldstate = s->state;
/* If our drive isn't up we cannot be up either. */
- if (d->state != GV_DRIVE_UP)
+ if (d->state != GV_DRIVE_UP) {
s->state = GV_SD_DOWN;
/* If this subdisk was just created, we assume it is good.*/
- else if (s->flags & GV_SD_NEWBORN) {
+ } else if (s->flags & GV_SD_NEWBORN) {
s->state = GV_SD_UP;
s->flags &= ~GV_SD_NEWBORN;
- } else if (s->state != GV_SD_UP)
- s->state = GV_SD_STALE;
- else
+ } else if (s->state != GV_SD_UP) {
+ if (s->flags & GV_SD_CANGOUP) {
+ s->state = GV_SD_UP;
+ s->flags &= ~GV_SD_CANGOUP;
+ } else
+ s->state = GV_SD_STALE;
+ } else
s->state = GV_SD_UP;
if (s->state != oldstate)
@@ -292,6 +412,7 @@ gv_update_sd_state(struct gv_sd *s)
void
gv_update_plex_state(struct gv_plex *p)
{
+ struct gv_sd *s;
int sdstates;
int oldstate;
@@ -316,13 +437,24 @@ gv_update_plex_state(struct gv_plex *p)
/* Some of our subdisks are initializing. */
} else if (sdstates & GV_SD_INITSTATE) {
- if (p->flags & GV_PLEX_SYNCING)
+
+ if (p->flags & GV_PLEX_SYNCING ||
+ p->flags & GV_PLEX_REBUILDING)
p->state = GV_PLEX_DEGRADED;
else
p->state = GV_PLEX_DOWN;
} else
p->state = GV_PLEX_DOWN;
+ if (p->state == GV_PLEX_UP) {
+ LIST_FOREACH(s, &p->subdisks, in_plex) {
+ if (s->flags & GV_SD_GROW) {
+ p->state = GV_PLEX_GROWABLE;
+ break;
+ }
+ }
+ }
+
if (p->state != oldstate)
G_VINUM_DEBUG(1, "plex %s state change: %s -> %s", p->name,
gv_plexstate(oldstate), gv_plexstate(p->state));
diff --git a/sys/geom/vinum/geom_vinum_subr.c b/sys/geom/vinum/geom_vinum_subr.c
index 848fbc7..70c7f3f 100644
--- a/sys/geom/vinum/geom_vinum_subr.c
+++ b/sys/geom/vinum/geom_vinum_subr.c
@@ -1,5 +1,6 @@
/*-
- * Copyright (c) 2004 Lukas Ertl
+ * Copyright (c) 2004, 2007 Lukas Ertl
+ * Copyright (c) 2007, 2009 Ulf Lilleengen
* Copyright (c) 1997, 1998, 1999
* Nan Yang Computer Services Limited. All rights reserved.
*
@@ -42,59 +43,28 @@
__FBSDID("$FreeBSD$");
#include <sys/param.h>
-#include <sys/conf.h>
-#include <sys/kernel.h>
-#include <sys/libkern.h>
#include <sys/malloc.h>
#include <sys/systm.h>
#include <geom/geom.h>
-#include <geom/geom_int.h>
#include <geom/vinum/geom_vinum_var.h>
#include <geom/vinum/geom_vinum.h>
#include <geom/vinum/geom_vinum_share.h>
-static off_t gv_plex_smallest_sd(struct gv_plex *, off_t);
+int gv_drive_is_newer(struct gv_softc *, struct gv_drive *);
+static off_t gv_plex_smallest_sd(struct gv_plex *);
-/* Find the VINUM class and it's associated geom. */
-struct g_geom *
-find_vinum_geom(void)
-{
- struct g_class *mp;
- struct g_geom *gp;
-
- g_topology_assert();
-
- gp = NULL;
-
- LIST_FOREACH(mp, &g_classes, class) {
- if (!strcmp(mp->name, "VINUM")) {
- gp = LIST_FIRST(&mp->geom);
- break;
- }
- }
-
- return (gp);
-}
-
-/*
- * Parse the vinum config provided in *buf and store it in *gp's softc.
- * If parameter 'merge' is non-zero, then the given config is merged into
- * *gp.
- */
void
-gv_parse_config(struct gv_softc *sc, u_char *buf, int merge)
+gv_parse_config(struct gv_softc *sc, char *buf, struct gv_drive *d)
{
char *aptr, *bptr, *cptr;
struct gv_volume *v, *v2;
struct gv_plex *p, *p2;
struct gv_sd *s, *s2;
- int tokens;
+ int error, is_newer, tokens;
char *token[GV_MAXARGS];
- g_topology_assert();
-
- KASSERT(sc != NULL, ("gv_parse_config: NULL softc"));
+ is_newer = gv_drive_is_newer(sc, d);
/* Until the end of the string *buf. */
for (aptr = buf; *aptr != '\0'; aptr = bptr) {
@@ -109,64 +79,95 @@ gv_parse_config(struct gv_softc *sc, u_char *buf, int merge)
tokens = gv_tokenize(cptr, token, GV_MAXARGS);
- if (tokens > 0) {
- if (!strcmp(token[0], "volume")) {
- v = gv_new_volume(tokens, token);
- if (v == NULL) {
- G_VINUM_DEBUG(0, "failed volume");
- break;
- }
+ if (tokens <= 0)
+ continue;
- if (merge) {
- v2 = gv_find_vol(sc, v->name);
- if (v2 != NULL) {
- g_free(v);
- continue;
- }
+ if (!strcmp(token[0], "volume")) {
+ v = gv_new_volume(tokens, token);
+ if (v == NULL) {
+ G_VINUM_DEBUG(0, "config parse failed volume");
+ break;
+ }
+
+ v2 = gv_find_vol(sc, v->name);
+ if (v2 != NULL) {
+ if (is_newer) {
+ v2->state = v->state;
+ G_VINUM_DEBUG(2, "newer volume found!");
}
+ g_free(v);
+ continue;
+ }
- v->vinumconf = sc;
- LIST_INIT(&v->plexes);
- LIST_INSERT_HEAD(&sc->volumes, v, volume);
+ gv_create_volume(sc, v);
- } else if (!strcmp(token[0], "plex")) {
- p = gv_new_plex(tokens, token);
- if (p == NULL) {
- G_VINUM_DEBUG(0, "failed plex");
- break;
- }
+ } else if (!strcmp(token[0], "plex")) {
+ p = gv_new_plex(tokens, token);
+ if (p == NULL) {
+ G_VINUM_DEBUG(0, "config parse failed plex");
+ break;
+ }
- if (merge) {
- p2 = gv_find_plex(sc, p->name);
- if (p2 != NULL) {
- g_free(p);
- continue;
- }
+ p2 = gv_find_plex(sc, p->name);
+ if (p2 != NULL) {
+ /* XXX */
+ if (is_newer) {
+ p2->state = p->state;
+ G_VINUM_DEBUG(2, "newer plex found!");
}
+ g_free(p);
+ continue;
+ }
- p->vinumconf = sc;
- LIST_INIT(&p->subdisks);
- LIST_INSERT_HEAD(&sc->plexes, p, plex);
+ error = gv_create_plex(sc, p);
+ if (error)
+ continue;
+ /*
+ * These flags were set in gv_create_plex() and are not
+ * needed here (on-disk config parsing).
+ */
+ p->flags &= ~GV_PLEX_ADDED;
+ p->flags &= ~GV_PLEX_NEWBORN;
- } else if (!strcmp(token[0], "sd")) {
- s = gv_new_sd(tokens, token);
+ } else if (!strcmp(token[0], "sd")) {
+ s = gv_new_sd(tokens, token);
- if (s == NULL) {
- G_VINUM_DEBUG(0, "failed subdisk");
- break;
- }
+ if (s == NULL) {
+ G_VINUM_DEBUG(0, "config parse failed subdisk");
+ break;
+ }
- if (merge) {
- s2 = gv_find_sd(sc, s->name);
- if (s2 != NULL) {
- g_free(s);
- continue;
- }
+ s2 = gv_find_sd(sc, s->name);
+ if (s2 != NULL) {
+ /* XXX */
+ if (is_newer) {
+ s2->state = s->state;
+ G_VINUM_DEBUG(2, "newer subdisk found!");
}
-
- s->vinumconf = sc;
- LIST_INSERT_HEAD(&sc->subdisks, s, sd);
+ g_free(s);
+ continue;
}
+
+ /*
+ * Signal that this subdisk was tasted, and could
+ * possibly reference a drive that isn't in our config
+ * yet.
+ */
+ s->flags |= GV_SD_TASTED;
+
+ if (s->state == GV_SD_UP)
+ s->flags |= GV_SD_CANGOUP;
+
+ error = gv_create_sd(sc, s);
+ if (error)
+ continue;
+
+ /*
+ * This flag was set in gv_create_sd() and is not
+ * needed here (on-disk config parsing).
+ */
+ s->flags &= ~GV_SD_NEWBORN;
+ s->flags &= ~GV_SD_GROW;
}
}
}
@@ -183,8 +184,6 @@ gv_format_config(struct gv_softc *sc, struct sbuf *sb, int ondisk, char *prefix)
struct gv_plex *p;
struct gv_volume *v;
- g_topology_assert();
-
/*
* We don't need the drive configuration if we're not writing the
* config to disk.
@@ -233,17 +232,20 @@ gv_format_config(struct gv_softc *sc, struct sbuf *sb, int ondisk, char *prefix)
sbuf_printf(sb, " state %s", gv_sdstate(s->state));
sbuf_printf(sb, "\n");
}
-
- return;
}
static off_t
-gv_plex_smallest_sd(struct gv_plex *p, off_t smallest)
+gv_plex_smallest_sd(struct gv_plex *p)
{
struct gv_sd *s;
+ off_t smallest;
KASSERT(p != NULL, ("gv_plex_smallest_sd: NULL p"));
+ s = LIST_FIRST(&p->subdisks);
+ if (s == NULL)
+ return (-1);
+ smallest = s->size;
LIST_FOREACH(s, &p->subdisks, in_plex) {
if (s->size < smallest)
smallest = s->size;
@@ -251,12 +253,29 @@ gv_plex_smallest_sd(struct gv_plex *p, off_t smallest)
return (smallest);
}
+/* Walk over plexes in a volume and count how many are down. */
int
-gv_sd_to_plex(struct gv_plex *p, struct gv_sd *s, int check)
+gv_plexdown(struct gv_volume *v)
{
- struct gv_sd *s2;
+ int plexdown;
+ struct gv_plex *p;
- g_topology_assert();
+ KASSERT(v != NULL, ("gv_plexdown: NULL v"));
+
+ plexdown = 0;
+
+ LIST_FOREACH(p, &v->plexes, plex) {
+ if (p->state == GV_PLEX_DOWN)
+ plexdown++;
+ }
+ return (plexdown);
+}
+
+int
+gv_sd_to_plex(struct gv_sd *s, struct gv_plex *p)
+{
+ struct gv_sd *s2;
+ off_t psizeorig, remainder, smallest;
/* If this subdisk was already given to this plex, do nothing. */
if (s->plex_sc == p)
@@ -264,15 +283,56 @@ gv_sd_to_plex(struct gv_plex *p, struct gv_sd *s, int check)
/* Check correct size of this subdisk. */
s2 = LIST_FIRST(&p->subdisks);
- if (s2 != NULL && gv_is_striped(p) && (s2->size != s->size)) {
- G_VINUM_DEBUG(0, "need equal sized subdisks for "
- "this plex organisation - %s (%jd) <-> %s (%jd)",
- s2->name, s2->size, s->name, s->size);
- return (-1);
+ /* Adjust the subdisk-size if necessary. */
+ if (s2 != NULL && gv_is_striped(p)) {
+ /* First adjust to the stripesize. */
+ remainder = s->size % p->stripesize;
+
+ if (remainder) {
+ G_VINUM_DEBUG(1, "size of sd %s is not a "
+ "multiple of plex stripesize, taking off "
+ "%jd bytes", s->name,
+ (intmax_t)remainder);
+ gv_adjust_freespace(s, remainder);
+ }
+
+ smallest = gv_plex_smallest_sd(p);
+ /* Then take off extra if other subdisks are smaller. */
+ remainder = s->size - smallest;
+
+ /*
+ * Don't allow a remainder below zero for running plexes, it's too
+ * painful, and if someone were to accidentally do this, the
+ * resulting array might be smaller than the original... not god
+ */
+ if (remainder < 0) {
+ if (!(p->flags & GV_PLEX_NEWBORN)) {
+ G_VINUM_DEBUG(0, "sd %s too small for plex %s!",
+ s->name, p->name);
+ return (GV_ERR_BADSIZE);
+ }
+ /* Adjust other subdisks. */
+ LIST_FOREACH(s2, &p->subdisks, in_plex) {
+ G_VINUM_DEBUG(1, "size of sd %s is to big, "
+ "taking off %jd bytes", s->name,
+ (intmax_t)remainder);
+ gv_adjust_freespace(s2, (remainder * -1));
+ }
+ } else if (remainder > 0) {
+ G_VINUM_DEBUG(1, "size of sd %s is to big, "
+ "taking off %jd bytes", s->name,
+ (intmax_t)remainder);
+ gv_adjust_freespace(s, remainder);
+ }
}
/* Find the correct plex offset for this subdisk, if needed. */
if (s->plex_offset == -1) {
+ /*
+ * First set it to 0 to catch the case where we had a detached
+ * subdisk that didn't get any good offset.
+ */
+ s->plex_offset = 0;
if (p->sdcount) {
LIST_FOREACH(s2, &p->subdisks, in_plex) {
if (gv_is_striped(p))
@@ -282,25 +342,7 @@ gv_sd_to_plex(struct gv_plex *p, struct gv_sd *s, int check)
s->plex_offset = s2->plex_offset +
s2->size;
}
- } else
- s->plex_offset = 0;
- }
-
- p->sdcount++;
-
- /* Adjust the size of our plex. */
- switch (p->org) {
- case GV_PLEX_CONCAT:
- case GV_PLEX_STRIPED:
- p->size += s->size;
- break;
-
- case GV_PLEX_RAID5:
- p->size = (p->sdcount - 1) * gv_plex_smallest_sd(p, s->size);
- break;
-
- default:
- break;
+ }
}
/* There are no subdisks for this plex yet, just insert it. */
@@ -321,6 +363,29 @@ gv_sd_to_plex(struct gv_plex *p, struct gv_sd *s, int check)
}
s->plex_sc = p;
+ /* Adjust the size of our plex. We check if the plex misses a subdisk,
+ * so we don't make the plex smaller than it actually should be.
+ */
+ psizeorig = p->size;
+ p->size = gv_plex_size(p);
+ /* Make sure the size is not changed. */
+ if (p->sddetached > 0) {
+ if (p->size < psizeorig) {
+ p->size = psizeorig;
+ /* We make sure wee need another subdisk. */
+ if (p->sddetached == 1)
+ p->sddetached++;
+ }
+ p->sddetached--;
+ } else {
+ if ((p->org == GV_PLEX_RAID5 ||
+ p->org == GV_PLEX_STRIPED) &&
+ !(p->flags & GV_PLEX_NEWBORN) &&
+ p->state >= GV_PLEX_DEGRADED) {
+ s->flags |= GV_SD_GROW;
+ }
+ p->sdcount++;
+ }
return (0);
}
@@ -328,21 +393,32 @@ gv_sd_to_plex(struct gv_plex *p, struct gv_sd *s, int check)
void
gv_update_vol_size(struct gv_volume *v, off_t size)
{
- struct g_geom *gp;
- struct g_provider *pp;
-
if (v == NULL)
return;
+ if (v->provider != NULL) {
+ g_topology_lock();
+ v->provider->mediasize = size;
+ g_topology_unlock();
+ }
+ v->size = size;
+}
- gp = v->geom;
- if (gp == NULL)
- return;
+/* Return how many subdisks that constitute the original plex. */
+int
+gv_sdcount(struct gv_plex *p, int growing)
+{
+ struct gv_sd *s;
+ int sdcount;
- LIST_FOREACH(pp, &gp->provider, provider) {
- pp->mediasize = size;
+ sdcount = p->sdcount;
+ if (growing) {
+ LIST_FOREACH(s, &p->subdisks, in_plex) {
+ if (s->flags & GV_SD_GROW)
+ sdcount--;
+ }
}
- v->size = size;
+ return (sdcount);
}
/* Calculates the plex size. */
@@ -351,14 +427,13 @@ gv_plex_size(struct gv_plex *p)
{
struct gv_sd *s;
off_t size;
+ int sdcount;
KASSERT(p != NULL, ("gv_plex_size: NULL p"));
- if (p->sdcount == 0)
- return (0);
-
/* Adjust the size of our plex. */
size = 0;
+ sdcount = gv_sdcount(p, 1);
switch (p->org) {
case GV_PLEX_CONCAT:
LIST_FOREACH(s, &p->subdisks, in_plex)
@@ -366,11 +441,11 @@ gv_plex_size(struct gv_plex *p)
break;
case GV_PLEX_STRIPED:
s = LIST_FIRST(&p->subdisks);
- size = p->sdcount * s->size;
+ size = ((s != NULL) ? (sdcount * s->size) : 0);
break;
case GV_PLEX_RAID5:
s = LIST_FIRST(&p->subdisks);
- size = (p->sdcount - 1) * s->size;
+ size = ((s != NULL) ? ((sdcount - 1) * s->size) : 0);
break;
}
@@ -391,7 +466,7 @@ gv_vol_size(struct gv_volume *v)
return (0);
minplexsize = p->size;
- LIST_FOREACH(p, &v->plexes, plex) {
+ LIST_FOREACH(p, &v->plexes, in_volume) {
if (p->size < minplexsize) {
minplexsize = p->size;
}
@@ -408,12 +483,9 @@ gv_update_plex_config(struct gv_plex *p)
KASSERT(p != NULL, ("gv_update_plex_config: NULL p"));
- /* This is what we want the plex to be. */
- state = GV_PLEX_UP;
-
/* The plex was added to an already running volume. */
if (p->flags & GV_PLEX_ADDED)
- state = GV_PLEX_DOWN;
+ gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
switch (p->org) {
case GV_PLEX_STRIPED:
@@ -430,7 +502,7 @@ gv_update_plex_config(struct gv_plex *p)
if (required_sds) {
if (p->sdcount < required_sds) {
- state = GV_PLEX_DOWN;
+ gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
}
/*
@@ -442,12 +514,13 @@ gv_update_plex_config(struct gv_plex *p)
G_VINUM_DEBUG(0, "subdisk size mismatch %s"
"(%jd) <> %s (%jd)", s->name, s->size,
s2->name, s2->size);
- state = GV_PLEX_DOWN;
+ gv_set_plex_state(p, GV_PLEX_DOWN,
+ GV_SETSTATE_FORCE);
}
}
- /* Trim subdisk sizes so that they match the stripe size. */
LIST_FOREACH(s, &p->subdisks, in_plex) {
+ /* Trim subdisk sizes to match the stripe size. */
remainder = s->size % p->stripesize;
if (remainder) {
G_VINUM_DEBUG(1, "size of sd %s is not a "
@@ -458,41 +531,32 @@ gv_update_plex_config(struct gv_plex *p)
}
}
- /* Adjust the size of our plex. */
- if (p->sdcount > 0) {
- p->size = 0;
- switch (p->org) {
- case GV_PLEX_CONCAT:
- LIST_FOREACH(s, &p->subdisks, in_plex)
- p->size += s->size;
- break;
-
- case GV_PLEX_STRIPED:
- s = LIST_FIRST(&p->subdisks);
- p->size = p->sdcount * s->size;
- break;
-
- case GV_PLEX_RAID5:
- s = LIST_FIRST(&p->subdisks);
- p->size = (p->sdcount - 1) * s->size;
- break;
-
- default:
- break;
- }
- }
-
+ p->size = gv_plex_size(p);
if (p->sdcount == 0)
- state = GV_PLEX_DOWN;
- else if ((p->flags & GV_PLEX_ADDED) ||
- ((p->org == GV_PLEX_RAID5) && (p->flags & GV_PLEX_NEWBORN))) {
+ gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
+ else if (p->org == GV_PLEX_RAID5 && p->flags & GV_PLEX_NEWBORN) {
+ LIST_FOREACH(s, &p->subdisks, in_plex)
+ gv_set_sd_state(s, GV_SD_UP, GV_SETSTATE_FORCE);
+ /* If added to a volume, we want the plex to be down. */
+ state = (p->flags & GV_PLEX_ADDED) ? GV_PLEX_DOWN : GV_PLEX_UP;
+ gv_set_plex_state(p, state, GV_SETSTATE_FORCE);
+ p->flags &= ~GV_PLEX_ADDED;
+ } else if (p->flags & GV_PLEX_ADDED) {
LIST_FOREACH(s, &p->subdisks, in_plex)
- s->state = GV_SD_STALE;
+ gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
+ gv_set_plex_state(p, GV_PLEX_DOWN, GV_SETSTATE_FORCE);
p->flags &= ~GV_PLEX_ADDED;
- p->flags &= ~GV_PLEX_NEWBORN;
- state = GV_PLEX_DOWN;
+ } else if (p->state == GV_PLEX_UP) {
+ LIST_FOREACH(s, &p->subdisks, in_plex) {
+ if (s->flags & GV_SD_GROW) {
+ gv_set_plex_state(p, GV_PLEX_GROWABLE,
+ GV_SETSTATE_FORCE);
+ break;
+ }
+ }
}
- p->state = state;
+ /* Our plex is grown up now. */
+ p->flags &= ~GV_PLEX_NEWBORN;
}
/*
@@ -500,76 +564,82 @@ gv_update_plex_config(struct gv_plex *p)
* freelist.
*/
int
-gv_sd_to_drive(struct gv_softc *sc, struct gv_drive *d, struct gv_sd *s,
- char *errstr, int errlen)
+gv_sd_to_drive(struct gv_sd *s, struct gv_drive *d)
{
struct gv_sd *s2;
struct gv_freelist *fl, *fl2;
off_t tmp;
int i;
- g_topology_assert();
-
fl2 = NULL;
- KASSERT(sc != NULL, ("gv_sd_to_drive: NULL softc"));
- KASSERT(d != NULL, ("gv_sd_to_drive: NULL drive"));
- KASSERT(s != NULL, ("gv_sd_to_drive: NULL subdisk"));
- KASSERT(errstr != NULL, ("gv_sd_to_drive: NULL errstr"));
- KASSERT(errlen >= ERRBUFSIZ, ("gv_sd_to_drive: short errlen (%d)",
- errlen));
+ /* Shortcut for "referenced" drives. */
+ if (d->flags & GV_DRIVE_REFERENCED) {
+ s->drive_sc = d;
+ return (0);
+ }
/* Check if this subdisk was already given to this drive. */
- if (s->drive_sc == d)
- return (0);
+ if (s->drive_sc != NULL) {
+ if (s->drive_sc == d) {
+ if (!(s->flags & GV_SD_TASTED)) {
+ return (0);
+ }
+ } else {
+ G_VINUM_DEBUG(0, "can't give sd '%s' to '%s' "
+ "(already on '%s')", s->name, d->name,
+ s->drive_sc->name);
+ return (GV_ERR_ISATTACHED);
+ }
+ }
/* Preliminary checks. */
- if (s->size > d->avail || d->freelist_entries == 0) {
- snprintf(errstr, errlen, "not enough space on '%s' for '%s'",
- d->name, s->name);
- return (-1);
+ if ((s->size > d->avail) || (d->freelist_entries == 0)) {
+ G_VINUM_DEBUG(0, "not enough space on '%s' for '%s'", d->name,
+ s->name);
+ return (GV_ERR_NOSPACE);
}
- /* No size given, autosize it. */
+ /* If no size was given for this subdisk, try to auto-size it... */
if (s->size == -1) {
/* Find the largest available slot. */
LIST_FOREACH(fl, &d->freelist, freelist) {
- if (fl->size >= s->size) {
- s->size = fl->size;
- s->drive_offset = fl->offset;
- fl2 = fl;
- }
+ if (fl->size < s->size)
+ continue;
+ s->size = fl->size;
+ s->drive_offset = fl->offset;
+ fl2 = fl;
}
/* No good slot found? */
if (s->size == -1) {
- snprintf(errstr, errlen, "could not autosize '%s' on "
- "'%s'", s->name, d->name);
- return (-1);
+ G_VINUM_DEBUG(0, "couldn't autosize '%s' on '%s'",
+ s->name, d->name);
+ return (GV_ERR_BADSIZE);
}
/*
- * Check if we have a free slot that's large enough for the given size.
+ * ... or check if we have a free slot that's large enough for the
+ * given size.
*/
} else {
i = 0;
LIST_FOREACH(fl, &d->freelist, freelist) {
- /* Yes, this subdisk fits. */
- if (fl->size >= s->size) {
- i++;
- /* Assign drive offset, if not given. */
- if (s->drive_offset == -1)
- s->drive_offset = fl->offset;
- fl2 = fl;
- break;
- }
+ if (fl->size < s->size)
+ continue;
+ /* Assign drive offset, if not given. */
+ if (s->drive_offset == -1)
+ s->drive_offset = fl->offset;
+ fl2 = fl;
+ i++;
+ break;
}
/* Couldn't find a good free slot. */
if (i == 0) {
- snprintf(errstr, errlen, "free slots to small for '%s' "
- "on '%s'", s->name, d->name);
- return (-1);
+ G_VINUM_DEBUG(0, "free slots to small for '%s' on '%s'",
+ s->name, d->name);
+ return (GV_ERR_NOSPACE);
}
}
@@ -604,9 +674,9 @@ gv_sd_to_drive(struct gv_softc *sc, struct gv_drive *d, struct gv_sd *s,
/* Couldn't find a good free slot. */
if (i == 0) {
- snprintf(errstr, errlen, "given drive_offset for '%s' "
- "won't fit on '%s'", s->name, d->name);
- return (-1);
+ G_VINUM_DEBUG(0, "given drive_offset for '%s' won't fit "
+ "on '%s'", s->name, d->name);
+ return (GV_ERR_NOSPACE);
}
}
@@ -617,49 +687,41 @@ gv_sd_to_drive(struct gv_softc *sc, struct gv_drive *d, struct gv_sd *s,
/* First, adjust the freelist. */
LIST_FOREACH(fl, &d->freelist, freelist) {
+ /* Look for the free slot that we have found before. */
+ if (fl != fl2)
+ continue;
+
+ /* The subdisk starts at the beginning of the free slot. */
+ if (fl->offset == s->drive_offset) {
+ fl->offset += s->size;
+ fl->size -= s->size;
+
+ /* The subdisk uses the whole slot, so remove it. */
+ if (fl->size == 0) {
+ d->freelist_entries--;
+ LIST_REMOVE(fl, freelist);
+ }
+ /*
+ * The subdisk does not start at the beginning of the free
+ * slot.
+ */
+ } else {
+ tmp = fl->offset + fl->size;
+ fl->size = s->drive_offset - fl->offset;
- /* This is the free slot that we have found before. */
- if (fl == fl2) {
-
- /*
- * The subdisk starts at the beginning of the free
- * slot.
- */
- if (fl->offset == s->drive_offset) {
- fl->offset += s->size;
- fl->size -= s->size;
-
- /*
- * The subdisk uses the whole slot, so remove
- * it.
- */
- if (fl->size == 0) {
- d->freelist_entries--;
- LIST_REMOVE(fl, freelist);
- }
/*
- * The subdisk does not start at the beginning of the
- * free slot.
+ * The subdisk didn't use the complete rest of the free
+ * slot, so we need to split it.
*/
- } else {
- tmp = fl->offset + fl->size;
- fl->size = s->drive_offset - fl->offset;
-
- /*
- * The subdisk didn't use the complete rest of
- * the free slot, so we need to split it.
- */
- if (s->drive_offset + s->size != tmp) {
- fl2 = g_malloc(sizeof(*fl2),
- M_WAITOK | M_ZERO);
- fl2->offset = s->drive_offset + s->size;
- fl2->size = tmp - fl2->offset;
- LIST_INSERT_AFTER(fl, fl2, freelist);
- d->freelist_entries++;
- }
+ if (s->drive_offset + s->size != tmp) {
+ fl2 = g_malloc(sizeof(*fl2), M_WAITOK | M_ZERO);
+ fl2->offset = s->drive_offset + s->size;
+ fl2->size = tmp - fl2->offset;
+ LIST_INSERT_AFTER(fl, fl2, freelist);
+ d->freelist_entries++;
}
- break;
}
+ break;
}
/*
@@ -685,6 +747,8 @@ gv_sd_to_drive(struct gv_softc *sc, struct gv_drive *d, struct gv_sd *s,
d->sdcount++;
d->avail -= s->size;
+ s->flags &= ~GV_SD_TASTED;
+
/* Link back from the subdisk to this drive. */
s->drive_sc = d;
@@ -869,17 +933,65 @@ gv_find_drive(struct gv_softc *sc, char *name)
return (NULL);
}
+/* Find a drive given a device. */
+struct gv_drive *
+gv_find_drive_device(struct gv_softc *sc, char *device)
+{
+ struct gv_drive *d;
+
+ LIST_FOREACH(d, &sc->drives, drive) {
+ if(!strcmp(d->device, device))
+ return (d);
+ }
+
+ return (NULL);
+}
+
/* Check if any consumer of the given geom is open. */
int
-gv_is_open(struct g_geom *gp)
+gv_consumer_is_open(struct g_consumer *cp)
{
- struct g_consumer *cp;
+ if (cp == NULL)
+ return (0);
- if (gp == NULL)
+ if (cp->acr || cp->acw || cp->ace)
+ return (1);
+
+ return (0);
+}
+
+int
+gv_provider_is_open(struct g_provider *pp)
+{
+ if (pp == NULL)
return (0);
- LIST_FOREACH(cp, &gp->consumer, consumer) {
- if (cp->acr || cp->acw || cp->ace)
+ if (pp->acr || pp->acw || pp->ace)
+ return (1);
+
+ return (0);
+}
+
+/*
+ * Compare the modification dates of the drives.
+ * Return 1 if a > b, 0 otherwise.
+ */
+int
+gv_drive_is_newer(struct gv_softc *sc, struct gv_drive *d)
+{
+ struct gv_drive *d2;
+ struct timeval *a, *b;
+
+ KASSERT(!LIST_EMPTY(&sc->drives),
+ ("gv_is_drive_newer: empty drive list"));
+
+ a = &d->hdr->label.last_update;
+ LIST_FOREACH(d2, &sc->drives, drive) {
+ if ((d == d2) || (d2->state != GV_DRIVE_UP) ||
+ (d2->hdr == NULL))
+ continue;
+ b = &d2->hdr->label.last_update;
+ if (timevalcmp(a, b, >))
return (1);
}
@@ -915,58 +1027,255 @@ gv_object_type(struct gv_softc *sc, char *name)
return (GV_TYPE_DRIVE);
}
- return (-1);
+ return (GV_ERR_NOTFOUND);
}
void
-gv_kill_drive_thread(struct gv_drive *d)
+gv_setup_objects(struct gv_softc *sc)
{
- if (d->flags & GV_DRIVE_THREAD_ACTIVE) {
- d->flags |= GV_DRIVE_THREAD_DIE;
- wakeup(d);
- while (!(d->flags & GV_DRIVE_THREAD_DEAD))
- tsleep(d, PRIBIO, "gv_die", hz);
- d->flags &= ~GV_DRIVE_THREAD_ACTIVE;
- d->flags &= ~GV_DRIVE_THREAD_DIE;
- d->flags &= ~GV_DRIVE_THREAD_DEAD;
- g_free(d->bqueue);
- d->bqueue = NULL;
- mtx_destroy(&d->bqueue_mtx);
+ struct g_provider *pp;
+ struct gv_volume *v;
+ struct gv_plex *p;
+ struct gv_sd *s;
+ struct gv_drive *d;
+
+ LIST_FOREACH(s, &sc->subdisks, sd) {
+ d = gv_find_drive(sc, s->drive);
+ if (d != NULL)
+ gv_sd_to_drive(s, d);
+ p = gv_find_plex(sc, s->plex);
+ if (p != NULL)
+ gv_sd_to_plex(s, p);
+ gv_update_sd_state(s);
+ }
+
+ LIST_FOREACH(p, &sc->plexes, plex) {
+ gv_update_plex_config(p);
+ v = gv_find_vol(sc, p->volume);
+ if (v != NULL && p->vol_sc != v) {
+ p->vol_sc = v;
+ v->plexcount++;
+ LIST_INSERT_HEAD(&v->plexes, p, in_volume);
+ }
+ gv_update_plex_config(p);
+ }
+
+ LIST_FOREACH(v, &sc->volumes, volume) {
+ v->size = gv_vol_size(v);
+ if (v->provider == NULL) {
+ g_topology_lock();
+ pp = g_new_providerf(sc->geom, "gvinum/%s", v->name);
+ pp->mediasize = v->size;
+ pp->sectorsize = 512; /* XXX */
+ g_error_provider(pp, 0);
+ v->provider = pp;
+ pp->private = v;
+ g_topology_unlock();
+ } else if (v->provider->mediasize != v->size) {
+ g_topology_lock();
+ v->provider->mediasize = v->size;
+ g_topology_unlock();
+ }
+ v->flags &= ~GV_VOL_NEWBORN;
+ gv_update_vol_state(v);
}
}
void
-gv_kill_plex_thread(struct gv_plex *p)
+gv_cleanup(struct gv_softc *sc)
{
- if (p->flags & GV_PLEX_THREAD_ACTIVE) {
- p->flags |= GV_PLEX_THREAD_DIE;
- wakeup(p);
- while (!(p->flags & GV_PLEX_THREAD_DEAD))
- tsleep(p, PRIBIO, "gv_die", hz);
- p->flags &= ~GV_PLEX_THREAD_ACTIVE;
- p->flags &= ~GV_PLEX_THREAD_DIE;
- p->flags &= ~GV_PLEX_THREAD_DEAD;
+ struct gv_volume *v, *v2;
+ struct gv_plex *p, *p2;
+ struct gv_sd *s, *s2;
+ struct gv_drive *d, *d2;
+ struct gv_freelist *fl, *fl2;
+
+ mtx_lock(&sc->config_mtx);
+ LIST_FOREACH_SAFE(v, &sc->volumes, volume, v2) {
+ LIST_REMOVE(v, volume);
+ g_free(v->wqueue);
+ g_free(v);
+ }
+ LIST_FOREACH_SAFE(p, &sc->plexes, plex, p2) {
+ LIST_REMOVE(p, plex);
g_free(p->bqueue);
+ g_free(p->rqueue);
g_free(p->wqueue);
- p->bqueue = NULL;
- p->wqueue = NULL;
- mtx_destroy(&p->bqueue_mtx);
+ g_free(p);
+ }
+ LIST_FOREACH_SAFE(s, &sc->subdisks, sd, s2) {
+ LIST_REMOVE(s, sd);
+ g_free(s);
}
+ LIST_FOREACH_SAFE(d, &sc->drives, drive, d2) {
+ LIST_FOREACH_SAFE(fl, &d->freelist, freelist, fl2) {
+ LIST_REMOVE(fl, freelist);
+ g_free(fl);
+ }
+ LIST_REMOVE(d, drive);
+ g_free(d->hdr);
+ g_free(d);
+ }
+ mtx_destroy(&sc->config_mtx);
}
-void
-gv_kill_vol_thread(struct gv_volume *v)
+/* General 'attach' routine. */
+int
+gv_attach_plex(struct gv_plex *p, struct gv_volume *v, int rename)
+{
+ struct gv_sd *s;
+ struct gv_softc *sc;
+
+ g_topology_assert();
+
+ sc = p->vinumconf;
+ KASSERT(sc != NULL, ("NULL sc"));
+
+ if (p->vol_sc != NULL) {
+ G_VINUM_DEBUG(1, "unable to attach %s: already attached to %s",
+ p->name, p->volume);
+ return (GV_ERR_ISATTACHED);
+ }
+
+ /* Stale all subdisks of this plex. */
+ LIST_FOREACH(s, &p->subdisks, in_plex) {
+ if (s->state != GV_SD_STALE)
+ gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
+ }
+ /* Attach to volume. Make sure volume is not up and running. */
+ if (gv_provider_is_open(v->provider)) {
+ G_VINUM_DEBUG(1, "unable to attach %s: volume %s is busy",
+ p->name, v->name);
+ return (GV_ERR_ISBUSY);
+ }
+ p->vol_sc = v;
+ strlcpy(p->volume, v->name, sizeof(p->volume));
+ v->plexcount++;
+ if (rename) {
+ snprintf(p->name, sizeof(p->name), "%s.p%d", v->name,
+ v->plexcount);
+ }
+ LIST_INSERT_HEAD(&v->plexes, p, in_volume);
+
+ /* Get plex up again. */
+ gv_update_vol_size(v, gv_vol_size(v));
+ gv_set_plex_state(p, GV_PLEX_UP, 0);
+ gv_save_config(p->vinumconf);
+ return (0);
+}
+
+int
+gv_attach_sd(struct gv_sd *s, struct gv_plex *p, off_t offset, int rename)
+{
+ struct gv_sd *s2;
+ int error, sdcount;
+
+ g_topology_assert();
+
+ /* If subdisk is attached, don't do it. */
+ if (s->plex_sc != NULL) {
+ G_VINUM_DEBUG(1, "unable to attach %s: already attached to %s",
+ s->name, s->plex);
+ return (GV_ERR_ISATTACHED);
+ }
+
+ gv_set_sd_state(s, GV_SD_STALE, GV_SETSTATE_FORCE);
+ /* First check that this subdisk has a correct offset. If none other
+ * starts at the same, and it's correct module stripesize, it is */
+ if (offset != -1 && offset % p->stripesize != 0)
+ return (GV_ERR_BADOFFSET);
+ LIST_FOREACH(s2, &p->subdisks, in_plex) {
+ if (s2->plex_offset == offset)
+ return (GV_ERR_BADOFFSET);
+ }
+
+ /* Attach the subdisk to the plex at given offset. */
+ s->plex_offset = offset;
+ strlcpy(s->plex, p->name, sizeof(s->plex));
+
+ sdcount = p->sdcount;
+ error = gv_sd_to_plex(s, p);
+ if (error)
+ return (error);
+ gv_update_plex_config(p);
+
+ if (rename) {
+ snprintf(s->name, sizeof(s->name), "%s.s%d", s->plex,
+ p->sdcount);
+ }
+ if (p->vol_sc != NULL)
+ gv_update_vol_size(p->vol_sc, gv_vol_size(p->vol_sc));
+ gv_save_config(p->vinumconf);
+ /* We don't update the subdisk state since the user might have to
+ * initiate a rebuild/sync first. */
+ return (0);
+}
+
+/* Detach a plex from a volume. */
+int
+gv_detach_plex(struct gv_plex *p, int flags)
{
- if (v->flags & GV_VOL_THREAD_ACTIVE) {
- v->flags |= GV_VOL_THREAD_DIE;
- wakeup(v);
- while (!(v->flags & GV_VOL_THREAD_DEAD))
- tsleep(v, PRIBIO, "gv_die", hz);
- v->flags &= ~GV_VOL_THREAD_ACTIVE;
- v->flags &= ~GV_VOL_THREAD_DIE;
- v->flags &= ~GV_VOL_THREAD_DEAD;
- g_free(v->bqueue);
- v->bqueue = NULL;
- mtx_destroy(&v->bqueue_mtx);
+ struct gv_volume *v;
+
+ g_topology_assert();
+ v = p->vol_sc;
+
+ if (v == NULL) {
+ G_VINUM_DEBUG(1, "unable to detach %s: already detached",
+ p->name);
+ return (0); /* Not an error. */
+ }
+
+ /*
+ * Only proceed if forced or volume inactive.
+ */
+ if (!(flags & GV_FLAG_F) && (gv_provider_is_open(v->provider) ||
+ p->state == GV_PLEX_UP)) {
+ G_VINUM_DEBUG(1, "unable to detach %s: volume %s is busy",
+ p->name, p->volume);
+ return (GV_ERR_ISBUSY);
}
+ v->plexcount--;
+ /* Make sure someone don't read us when gone. */
+ v->last_read_plex = NULL;
+ LIST_REMOVE(p, in_volume);
+ p->vol_sc = NULL;
+ memset(p->volume, 0, GV_MAXVOLNAME);
+ gv_update_vol_size(v, gv_vol_size(v));
+ gv_save_config(p->vinumconf);
+ return (0);
+}
+
+/* Detach a subdisk from a plex. */
+int
+gv_detach_sd(struct gv_sd *s, int flags)
+{
+ struct gv_plex *p;
+
+ g_topology_assert();
+ p = s->plex_sc;
+
+ if (p == NULL) {
+ G_VINUM_DEBUG(1, "unable to detach %s: already detached",
+ s->name);
+ return (0); /* Not an error. */
+ }
+
+ /*
+ * Don't proceed if we're not forcing, and the plex is up, or degraded
+ * with this subdisk up.
+ */
+ if (!(flags & GV_FLAG_F) && ((p->state > GV_PLEX_DEGRADED) ||
+ ((p->state == GV_PLEX_DEGRADED) && (s->state == GV_SD_UP)))) {
+ G_VINUM_DEBUG(1, "unable to detach %s: plex %s is busy",
+ s->name, s->plex);
+ return (GV_ERR_ISBUSY);
+ }
+
+ LIST_REMOVE(s, in_plex);
+ s->plex_sc = NULL;
+ memset(s->plex, 0, GV_MAXPLEXNAME);
+ p->sddetached++;
+ gv_save_config(s->vinumconf);
+ return (0);
}
diff --git a/sys/geom/vinum/geom_vinum_var.h b/sys/geom/vinum/geom_vinum_var.h
index 5a86568..124944f 100644
--- a/sys/geom/vinum/geom_vinum_var.h
+++ b/sys/geom/vinum/geom_vinum_var.h
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2004 Lukas Ertl
+ * Copyright (c) 2004, 2007 Lukas Ertl
* Copyright (c) 1997, 1998, 1999
* Nan Yang Computer Services Limited. All rights reserved.
*
@@ -112,11 +112,29 @@
#define GV_BIO_MALLOC 0x02
#define GV_BIO_ONHOLD 0x04
#define GV_BIO_SYNCREQ 0x08
-#define GV_BIO_SUCCEED 0x10
+#define GV_BIO_INIT 0x10
#define GV_BIO_REBUILD 0x20
#define GV_BIO_CHECK 0x40
#define GV_BIO_PARITY 0x80
#define GV_BIO_RETRY 0x100
+#define GV_BIO_INTERNAL \
+ (GV_BIO_SYNCREQ | GV_BIO_INIT | GV_BIO_REBUILD |GV_BIO_CHECK)
+
+/* Error codes to be used within gvinum. */
+#define GV_ERR_SETSTATE (-1) /* Error setting state. */
+#define GV_ERR_BADSIZE (-2) /* Object has wrong size. */
+#define GV_ERR_INVTYPE (-3) /* Invalid object type. */
+#define GV_ERR_CREATE (-4) /* Error creating gvinum object. */
+#define GV_ERR_ISBUSY (-5) /* Object is busy. */
+#define GV_ERR_ISATTACHED (-6) /* Object is attached to another. */
+#define GV_ERR_INVFLAG (-7) /* Invalid flag passed. */
+#define GV_ERR_INVSTATE (-8) /* Invalid state. */
+#define GV_ERR_NOTFOUND (-9) /* Object not found. */
+#define GV_ERR_NAMETAKEN (-10) /* Object name is taken. */
+#define GV_ERR_NOSPACE (-11) /* No space left on drive/subdisk. */
+#define GV_ERR_BADOFFSET (-12) /* Invalid offset specified. */
+#define GV_ERR_INVNAME (-13) /* Invalid object name. */
+#define GV_ERR_PLEXORG (-14) /* Invalid plex organization. */
/*
* hostname is 256 bytes long, but we don't need to shlep multiple copies in
@@ -162,16 +180,65 @@ struct gv_bioq {
TAILQ_ENTRY(gv_bioq) queue;
};
+#define GV_EVENT_DRIVE_TASTED 1
+#define GV_EVENT_DRIVE_LOST 2
+#define GV_EVENT_THREAD_EXIT 3
+#define GV_EVENT_CREATE_DRIVE 4
+#define GV_EVENT_CREATE_VOLUME 5
+#define GV_EVENT_CREATE_PLEX 6
+#define GV_EVENT_CREATE_SD 7
+#define GV_EVENT_SAVE_CONFIG 8
+#define GV_EVENT_RM_VOLUME 9
+#define GV_EVENT_RM_PLEX 10
+#define GV_EVENT_RM_SD 11
+#define GV_EVENT_RM_DRIVE 12
+#define GV_EVENT_SET_SD_STATE 13
+#define GV_EVENT_SET_DRIVE_STATE 14
+#define GV_EVENT_SET_VOL_STATE 15
+#define GV_EVENT_SET_PLEX_STATE 16
+#define GV_EVENT_RESET_CONFIG 17
+#define GV_EVENT_PARITY_REBUILD 18
+#define GV_EVENT_PARITY_CHECK 19
+#define GV_EVENT_START_PLEX 20
+#define GV_EVENT_START_VOLUME 21
+#define GV_EVENT_ATTACH_PLEX 22
+#define GV_EVENT_ATTACH_SD 23
+#define GV_EVENT_DETACH_PLEX 24
+#define GV_EVENT_DETACH_SD 25
+#define GV_EVENT_RENAME_VOL 26
+#define GV_EVENT_RENAME_PLEX 27
+#define GV_EVENT_RENAME_SD 28
+#define GV_EVENT_RENAME_DRIVE 29
+#define GV_EVENT_MOVE_SD 30
+#define GV_EVENT_SETUP_OBJECTS 31
+
+#ifdef _KERNEL
+struct gv_event {
+ int type;
+ void *arg1;
+ void *arg2;
+ intmax_t arg3;
+ intmax_t arg4;
+ TAILQ_ENTRY(gv_event) events;
+};
+#endif
+
/* This struct contains the main vinum config. */
struct gv_softc {
- /*struct mtx config_mtx; XXX not yet */
-
/* Linked lists of all objects in our setup. */
LIST_HEAD(,gv_drive) drives; /* All drives. */
LIST_HEAD(,gv_plex) plexes; /* All plexes. */
LIST_HEAD(,gv_sd) subdisks; /* All subdisks. */
LIST_HEAD(,gv_volume) volumes; /* All volumes. */
+ TAILQ_HEAD(,gv_event) equeue; /* Event queue. */
+ struct mtx queue_mtx; /* Queue lock. */
+ struct mtx config_mtx; /* Configuration lock. */
+#ifdef _KERNEL
+ struct bio_queue_head *bqueue; /* BIO queue. */
+#else
+ char *padding;
+#endif
struct g_geom *geom; /* Pointer to our VINUM geom. */
};
@@ -188,26 +255,19 @@ struct gv_drive {
int sdcount; /* Number of subdisks. */
int flags;
-#define GV_DRIVE_THREAD_ACTIVE 0x01 /* Drive has an active worker thread. */
-#define GV_DRIVE_THREAD_DIE 0x02 /* Signal the worker thread to die. */
-#define GV_DRIVE_THREAD_DEAD 0x04 /* The worker thread has died. */
-#define GV_DRIVE_NEWBORN 0x08 /* The drive was just created. */
+#define GV_DRIVE_REFERENCED 0x01 /* The drive isn't really existing,
+ but was referenced by a subdisk
+ during taste. */
+
+ struct gv_hdr *hdr; /* The drive header. */
- struct gv_hdr *hdr; /* The drive header. */
+ struct g_consumer *consumer; /* Consumer attached to this drive. */
int freelist_entries; /* Count of freelist entries. */
LIST_HEAD(,gv_freelist) freelist; /* List of freelist entries. */
LIST_HEAD(,gv_sd) subdisks; /* Subdisks on this drive. */
LIST_ENTRY(gv_drive) drive; /* Entry in the vinum config. */
-#ifdef _KERNEL
- struct bio_queue_head *bqueue; /* BIO queue of this drive. */
-#else
- char *padding;
-#endif
- struct mtx bqueue_mtx; /* Mtx. to protect the queue. */
-
- struct g_geom *geom; /* The geom of this drive. */
struct gv_softc *vinumconf; /* Pointer to the vinum conf. */
};
@@ -230,8 +290,10 @@ struct gv_sd {
int init_error; /* Flag error on initialization. */
int flags;
-#define GV_SD_NEWBORN 0x01 /* Subdisk was just created. */
-#define GV_SD_INITCANCEL 0x02 /* Cancel initialization process. */
+#define GV_SD_NEWBORN 0x01 /* Subdisk is created by user. */
+#define GV_SD_TASTED 0x02 /* Subdisk is created during taste. */
+#define GV_SD_CANGOUP 0x04 /* Subdisk can go up immediately. */
+#define GV_SD_GROW 0x08 /* Subdisk is added to striped plex. */
char drive[GV_MAXDRIVENAME]; /* Name of underlying drive. */
char plex[GV_MAXPLEXNAME]; /* Name of associated plex. */
@@ -239,9 +301,6 @@ struct gv_sd {
struct gv_drive *drive_sc; /* Pointer to underlying drive. */
struct gv_plex *plex_sc; /* Pointer to associated plex. */
- struct g_provider *provider; /* The provider this sd represents. */
- struct g_consumer *consumer; /* Consumer attached to our provider. */
-
LIST_ENTRY(gv_sd) from_drive; /* Subdisk list of underlying drive. */
LIST_ENTRY(gv_sd) in_plex; /* Subdisk list of associated plex. */
LIST_ENTRY(gv_sd) sd; /* Entry in the vinum config. */
@@ -257,7 +316,8 @@ struct gv_plex {
#define GV_PLEX_DOWN 0
#define GV_PLEX_INITIALIZING 1
#define GV_PLEX_DEGRADED 2
-#define GV_PLEX_UP 3
+#define GV_PLEX_GROWABLE 3
+#define GV_PLEX_UP 4
int org; /* The plex organisation. */
#define GV_PLEX_DISORG 0
@@ -270,6 +330,7 @@ struct gv_plex {
char volume[GV_MAXVOLNAME]; /* Name of associated volume. */
struct gv_volume *vol_sc; /* Pointer to associated volume. */
+ int sddetached; /* Number of detached subdisks. */
int sdcount; /* Number of subdisks in this plex. */
int sddown; /* Number of subdisks that are down. */
int flags;
@@ -279,26 +340,25 @@ struct gv_plex {
#define GV_PLEX_THREAD_DIE 0x08 /* Signal the RAID5 thread to die. */
#define GV_PLEX_THREAD_DEAD 0x10 /* The RAID5 thread has died. */
#define GV_PLEX_NEWBORN 0x20 /* The plex was just created. */
+#define GV_PLEX_REBUILDING 0x40 /* The plex is rebuilding. */
+#define GV_PLEX_GROWING 0x80 /* The plex is growing. */
off_t synced; /* Count of synced bytes. */
- struct mtx bqueue_mtx; /* Lock for the BIO queue. */
-#ifdef _KERNEL
- struct bio_queue_head *bqueue; /* BIO queue. */
- struct bio_queue_head *wqueue; /* Waiting BIO queue. */
-#else
- char *bpad, *wpad;
-#endif
TAILQ_HEAD(,gv_raid5_packet) packets; /* RAID5 sub-requests. */
LIST_HEAD(,gv_sd) subdisks; /* List of attached subdisks. */
LIST_ENTRY(gv_plex) in_volume; /* Plex list of associated volume. */
LIST_ENTRY(gv_plex) plex; /* Entry in the vinum config. */
- struct g_provider *provider; /* The provider this plex represents. */
- struct g_consumer *consumer; /* Consumer attached to our provider. */
+#ifdef _KERNEL
+ struct bio_queue_head *bqueue; /* BIO queue. */
+ struct bio_queue_head *wqueue; /* Waiting BIO queue. */
+ struct bio_queue_head *rqueue; /* Rebuild waiting BIO queue. */
+#else
+ char *bpad, *wpad, *rpad; /* Padding for userland. */
+#endif
- struct g_geom *geom; /* The geom of this plex. */
struct gv_softc *vinumconf; /* Pointer to the vinum config. */
};
@@ -315,19 +375,20 @@ struct gv_volume {
#define GV_VOL_THREAD_ACTIVE 0x01 /* Volume has an active thread. */
#define GV_VOL_THREAD_DIE 0x02 /* Signal the thread to die. */
#define GV_VOL_THREAD_DEAD 0x04 /* The thread has died. */
+#define GV_VOL_NEWBORN 0x08 /* The volume was just created. */
- struct mtx bqueue_mtx; /* Lock for the BIO queue. */
-#ifdef _KERNEL
- struct bio_queue_head *bqueue; /* BIO queue. */
+ LIST_HEAD(,gv_plex) plexes; /* List of attached plexes. */
+ LIST_ENTRY(gv_volume) volume; /* Entry in vinum config. */
+
+ struct g_provider *provider; /* Provider of this volume. */
+
+#ifdef _KERNEL
+ struct bio_queue_head *wqueue; /* BIO delayed request queue. */
#else
- char *padding;
+ char *wpad; /* Padding for userland. */
#endif
- LIST_HEAD(,gv_plex) plexes; /* List of attached plexes. */
- LIST_ENTRY(gv_volume) volume; /* Entry in vinum config. */
-
struct gv_plex *last_read_plex;
- struct g_geom *geom; /* The geom of this volume. */
struct gv_softc *vinumconf; /* Pointer to the vinum config. */
};
diff --git a/sys/geom/vinum/geom_vinum_volume.c b/sys/geom/vinum/geom_vinum_volume.c
index 88face2..32b353b 100644
--- a/sys/geom/vinum/geom_vinum_volume.c
+++ b/sys/geom/vinum/geom_vinum_volume.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2004 Lukas Ertl
+ * Copyright (c) 2007 Lukas Ertl
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -29,416 +29,136 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/bio.h>
-#include <sys/conf.h>
-#include <sys/kernel.h>
-#include <sys/kthread.h>
-#include <sys/libkern.h>
#include <sys/lock.h>
#include <sys/malloc.h>
-#include <sys/module.h>
-#include <sys/mutex.h>
#include <sys/systm.h>
#include <geom/geom.h>
#include <geom/vinum/geom_vinum_var.h>
#include <geom/vinum/geom_vinum.h>
-static void gv_vol_completed_request(struct gv_volume *, struct bio *);
-static void gv_vol_normal_request(struct gv_volume *, struct bio *);
-
-static void
-gv_volume_orphan(struct g_consumer *cp)
-{
- struct g_geom *gp;
- struct gv_volume *v;
- int error;
-
- g_topology_assert();
- gp = cp->geom;
- g_trace(G_T_TOPOLOGY, "gv_volume_orphan(%s)", gp->name);
- if (cp->acr != 0 || cp->acw != 0 || cp->ace != 0)
- g_access(cp, -cp->acr, -cp->acw, -cp->ace);
- error = cp->provider->error;
- if (error == 0)
- error = ENXIO;
- g_detach(cp);
- g_destroy_consumer(cp);
- if (!LIST_EMPTY(&gp->consumer))
- return;
- v = gp->softc;
- if (v != NULL) {
- gv_kill_vol_thread(v);
- v->geom = NULL;
- }
- gp->softc = NULL;
- g_wither_geom(gp, error);
-}
-
-/* We end up here after the requests to our plexes are done. */
-static void
-gv_volume_done(struct bio *bp)
-{
- struct gv_volume *v;
-
- v = bp->bio_from->geom->softc;
- bp->bio_cflags |= GV_BIO_DONE;
- mtx_lock(&v->bqueue_mtx);
- bioq_insert_tail(v->bqueue, bp);
- wakeup(v);
- mtx_unlock(&v->bqueue_mtx);
-}
-
-static void
-gv_volume_start(struct bio *bp)
-{
- struct gv_volume *v;
-
- switch(bp->bio_cmd) {
- case BIO_READ:
- case BIO_WRITE:
- case BIO_DELETE:
- break;
- case BIO_GETATTR:
- default:
- g_io_deliver(bp, EOPNOTSUPP);
- return;
- }
-
- v = bp->bio_to->geom->softc;
- if (v->state != GV_VOL_UP) {
- g_io_deliver(bp, ENXIO);
- return;
- }
-
- mtx_lock(&v->bqueue_mtx);
- bioq_disksort(v->bqueue, bp);
- wakeup(v);
- mtx_unlock(&v->bqueue_mtx);
-}
-
-static void
-gv_vol_worker(void *arg)
+void
+gv_volume_flush(struct gv_volume *v)
{
+ struct gv_softc *sc;
struct bio *bp;
- struct gv_volume *v;
- v = arg;
KASSERT(v != NULL, ("NULL v"));
- mtx_lock(&v->bqueue_mtx);
- for (;;) {
- /* We were signaled to exit. */
- if (v->flags & GV_VOL_THREAD_DIE)
- break;
-
- /* Take the first BIO from our queue. */
- bp = bioq_takefirst(v->bqueue);
- if (bp == NULL) {
- msleep(v, &v->bqueue_mtx, PRIBIO, "-", hz/10);
- continue;
- }
- mtx_unlock(&v->bqueue_mtx);
+ sc = v->vinumconf;
+ KASSERT(sc != NULL, ("NULL sc"));
- if (bp->bio_cflags & GV_BIO_DONE)
- gv_vol_completed_request(v, bp);
- else
- gv_vol_normal_request(v, bp);
-
- mtx_lock(&v->bqueue_mtx);
+ bp = bioq_takefirst(v->wqueue);
+ while (bp != NULL) {
+ gv_volume_start(sc, bp);
+ bp = bioq_takefirst(v->wqueue);
}
- mtx_unlock(&v->bqueue_mtx);
- v->flags |= GV_VOL_THREAD_DEAD;
- wakeup(v);
-
- kproc_exit(ENXIO);
}
-static void
-gv_vol_completed_request(struct gv_volume *v, struct bio *bp)
+void
+gv_volume_start(struct gv_softc *sc, struct bio *bp)
{
- struct bio *pbp;
struct g_geom *gp;
- struct g_consumer *cp, *cp2;
-
- pbp = bp->bio_parent;
-
- if (pbp->bio_error == 0)
- pbp->bio_error = bp->bio_error;
-
- switch (pbp->bio_cmd) {
- case BIO_READ:
- if (bp->bio_error == 0)
- break;
-
- if (pbp->bio_cflags & GV_BIO_RETRY)
- break;
-
- /* Check if we have another plex left. */
- cp = bp->bio_from;
- gp = cp->geom;
- cp2 = LIST_NEXT(cp, consumer);
- if (cp2 == NULL)
- break;
-
- if (LIST_NEXT(cp2, consumer) == NULL)
- pbp->bio_cflags |= GV_BIO_RETRY;
+ struct gv_volume *v;
+ struct gv_plex *p, *lp;
+ int numwrites;
- g_destroy_bio(bp);
- pbp->bio_children--;
- mtx_lock(&v->bqueue_mtx);
- bioq_disksort(v->bqueue, pbp);
- mtx_unlock(&v->bqueue_mtx);
+ gp = sc->geom;
+ v = bp->bio_to->private;
+ if (v == NULL || v->state != GV_VOL_UP) {
+ g_io_deliver(bp, ENXIO);
return;
-
- case BIO_WRITE:
- case BIO_DELETE:
- /* Remember if this write request succeeded. */
- if (bp->bio_error == 0)
- pbp->bio_cflags |= GV_BIO_SUCCEED;
- break;
}
- /* When the original request is finished, we deliver it. */
- pbp->bio_inbed++;
- if (pbp->bio_inbed == pbp->bio_children) {
- if (pbp->bio_cflags & GV_BIO_SUCCEED)
- pbp->bio_error = 0;
- pbp->bio_completed = bp->bio_length;
- g_io_deliver(pbp, pbp->bio_error);
- }
-
- g_destroy_bio(bp);
-}
-
-static void
-gv_vol_normal_request(struct gv_volume *v, struct bio *bp)
-{
- struct bio_queue_head queue;
- struct g_geom *gp;
- struct gv_plex *p, *lp;
- struct bio *cbp;
-
- gp = v->geom;
-
switch (bp->bio_cmd) {
case BIO_READ:
- cbp = g_clone_bio(bp);
- if (cbp == NULL) {
- g_io_deliver(bp, ENOMEM);
- return;
- }
- cbp->bio_done = gv_volume_done;
/*
- * Try to find a good plex where we can send the request to.
- * The plex either has to be up, or it's a degraded RAID5 plex.
+ * Try to find a good plex where we can send the request to,
+ * round-robin-style. The plex either has to be up, or it's a
+ * degraded RAID5 plex. Check if we have delayed requests. Put
+ * this request on the delayed queue if so. This makes sure that
+ * we don't read old values.
*/
+ if (bioq_first(v->wqueue) != NULL) {
+ bioq_insert_tail(v->wqueue, bp);
+ break;
+ }
lp = v->last_read_plex;
if (lp == NULL)
lp = LIST_FIRST(&v->plexes);
p = LIST_NEXT(lp, in_volume);
+ if (p == NULL)
+ p = LIST_FIRST(&v->plexes);
do {
- if (p == NULL)
- p = LIST_FIRST(&v->plexes);
+ if (p == NULL) {
+ p = lp;
+ break;
+ }
if ((p->state > GV_PLEX_DEGRADED) ||
(p->state >= GV_PLEX_DEGRADED &&
p->org == GV_PLEX_RAID5))
break;
p = LIST_NEXT(p, in_volume);
+ if (p == NULL)
+ p = LIST_FIRST(&v->plexes);
} while (p != lp);
- if (p == NULL ||
+ if ((p == NULL) ||
(p->org == GV_PLEX_RAID5 && p->state < GV_PLEX_DEGRADED) ||
(p->org != GV_PLEX_RAID5 && p->state <= GV_PLEX_DEGRADED)) {
- g_destroy_bio(cbp);
- bp->bio_children--;
g_io_deliver(bp, ENXIO);
return;
}
- g_io_request(cbp, p->consumer);
v->last_read_plex = p;
+ /* Hand it down to the plex logic. */
+ gv_plex_start(p, bp);
break;
case BIO_WRITE:
case BIO_DELETE:
- bioq_init(&queue);
+ /* Delay write-requests if any plex is synchronizing. */
LIST_FOREACH(p, &v->plexes, in_volume) {
- if (p->state < GV_PLEX_DEGRADED)
- continue;
- cbp = g_clone_bio(bp);
- if (cbp == NULL) {
- for (cbp = bioq_first(&queue); cbp != NULL;
- cbp = bioq_first(&queue)) {
- bioq_remove(&queue, cbp);
- g_destroy_bio(cbp);
- }
- if (bp->bio_error == 0)
- bp->bio_error = ENOMEM;
- g_io_deliver(bp, bp->bio_error);
+ if (p->flags & GV_PLEX_SYNCING) {
+ bioq_insert_tail(v->wqueue, bp);
return;
}
- bioq_insert_tail(&queue, cbp);
- cbp->bio_done = gv_volume_done;
- cbp->bio_caller1 = p->consumer;
- }
- /* Fire off all sub-requests. */
- for (cbp = bioq_first(&queue); cbp != NULL;
- cbp = bioq_first(&queue)) {
- bioq_remove(&queue, cbp);
- g_io_request(cbp, cbp->bio_caller1);
}
- break;
- }
-}
-
-static int
-gv_volume_access(struct g_provider *pp, int dr, int dw, int de)
-{
- struct g_geom *gp;
- struct g_consumer *cp, *cp2;
- int error;
- gp = pp->geom;
-
- error = ENXIO;
- LIST_FOREACH(cp, &gp->consumer, consumer) {
- error = g_access(cp, dr, dw, de);
- if (error) {
- LIST_FOREACH(cp2, &gp->consumer, consumer) {
- if (cp == cp2)
- break;
- g_access(cp2, -dr, -dw, -de);
- }
- return (error);
+ numwrites = 0;
+ /* Give the BIO to each plex of this volume. */
+ LIST_FOREACH(p, &v->plexes, in_volume) {
+ if (p->state < GV_PLEX_DEGRADED)
+ continue;
+ gv_plex_start(p, bp);
+ numwrites++;
}
+ if (numwrites == 0)
+ g_io_deliver(bp, ENXIO);
+ break;
}
- return (error);
}
-static struct g_geom *
-gv_volume_taste(struct g_class *mp, struct g_provider *pp, int flags __unused)
+void
+gv_bio_done(struct gv_softc *sc, struct bio *bp)
{
- struct g_geom *gp;
- struct g_provider *pp2;
- struct g_consumer *cp, *ocp;
- struct gv_softc *sc;
struct gv_volume *v;
struct gv_plex *p;
- int error, first;
-
- g_trace(G_T_TOPOLOGY, "gv_volume_taste(%s, %s)", mp->name, pp->name);
- g_topology_assert();
-
- /* First, find the VINUM class and its associated geom. */
- gp = find_vinum_geom();
- if (gp == NULL)
- return (NULL);
-
- sc = gp->softc;
- KASSERT(sc != NULL, ("gv_volume_taste: NULL sc"));
-
- gp = pp->geom;
-
- /* We only want to attach to plexes. */
- if (strcmp(gp->class->name, "VINUMPLEX"))
- return (NULL);
-
- first = 0;
- p = gp->softc;
-
- /* Let's see if the volume this plex wants is already configured. */
- v = gv_find_vol(sc, p->volume);
- if (v == NULL)
- return (NULL);
- if (v->geom == NULL) {
- gp = g_new_geomf(mp, "%s", p->volume);
- gp->start = gv_volume_start;
- gp->orphan = gv_volume_orphan;
- gp->access = gv_volume_access;
- gp->softc = v;
- first++;
- } else
- gp = v->geom;
-
- /* Create bio queue, queue mutex, and worker thread, if necessary. */
- if (v->bqueue == NULL) {
- v->bqueue = g_malloc(sizeof(struct bio_queue_head),
- M_WAITOK | M_ZERO);
- bioq_init(v->bqueue);
- }
- if (mtx_initialized(&v->bqueue_mtx) == 0)
- mtx_init(&v->bqueue_mtx, "gv_plex", NULL, MTX_DEF);
-
- if (!(v->flags & GV_VOL_THREAD_ACTIVE)) {
- kproc_create(gv_vol_worker, v, NULL, 0, 0, "gv_v %s",
- v->name);
- v->flags |= GV_VOL_THREAD_ACTIVE;
- }
-
- /*
- * Create a new consumer and attach it to the plex geom. Since this
- * volume might already have a plex attached, we need to adjust the
- * access counts of the new consumer.
- */
- ocp = LIST_FIRST(&gp->consumer);
- cp = g_new_consumer(gp);
- g_attach(cp, pp);
- if ((ocp != NULL) && (ocp->acr > 0 || ocp->acw > 0 || ocp->ace > 0)) {
- error = g_access(cp, ocp->acr, ocp->acw, ocp->ace);
- if (error) {
- G_VINUM_DEBUG(0, "failed g_access %s -> %s; "
- "errno %d", v->name, p->name, error);
- g_detach(cp);
- g_destroy_consumer(cp);
- if (first)
- g_destroy_geom(gp);
- return (NULL);
- }
- }
-
- p->consumer = cp;
-
- if (p->vol_sc != v) {
- p->vol_sc = v;
- v->plexcount++;
- LIST_INSERT_HEAD(&v->plexes, p, in_volume);
- }
-
- /* We need to setup a new VINUMVOLUME geom. */
- if (first) {
- pp2 = g_new_providerf(gp, "gvinum/%s", v->name);
- pp2->mediasize = pp->mediasize;
- pp2->sectorsize = pp->sectorsize;
- g_error_provider(pp2, 0);
- v->size = pp2->mediasize;
- v->geom = gp;
- return (gp);
+ struct gv_sd *s;
+
+ s = bp->bio_caller1;
+ KASSERT(s != NULL, ("gv_bio_done: NULL s"));
+ p = s->plex_sc;
+ KASSERT(p != NULL, ("gv_bio_done: NULL p"));
+ v = p->vol_sc;
+ KASSERT(v != NULL, ("gv_bio_done: NULL v"));
+
+ switch (p->org) {
+ case GV_PLEX_CONCAT:
+ case GV_PLEX_STRIPED:
+ gv_plex_normal_done(p, bp);
+ break;
+ case GV_PLEX_RAID5:
+ gv_plex_raid5_done(p, bp);
+ break;
}
-
- return (NULL);
}
-
-static int
-gv_volume_destroy_geom(struct gctl_req *req, struct g_class *mp,
- struct g_geom *gp)
-{
- struct gv_volume *v;
-
- g_trace(G_T_TOPOLOGY, "gv_volume_destroy_geom: %s", gp->name);
- g_topology_assert();
-
- v = gp->softc;
- gv_kill_vol_thread(v);
- g_wither_geom(gp, ENXIO);
- return (0);
-}
-
-#define VINUMVOLUME_CLASS_NAME "VINUMVOLUME"
-
-static struct g_class g_vinum_volume_class = {
- .name = VINUMVOLUME_CLASS_NAME,
- .version = G_VERSION,
- .taste = gv_volume_taste,
- .destroy_geom = gv_volume_destroy_geom,
-};
-
-DECLARE_GEOM_CLASS(g_vinum_volume_class, g_vinum_volume);
diff --git a/sys/modules/geom/geom_vinum/Makefile b/sys/modules/geom/geom_vinum/Makefile
index fe14515..48ac962 100644
--- a/sys/modules/geom/geom_vinum/Makefile
+++ b/sys/modules/geom/geom_vinum/Makefile
@@ -3,10 +3,10 @@
.PATH: ${.CURDIR}/../../../geom/vinum
KMOD= geom_vinum
-SRCS= geom_vinum.c geom_vinum_drive.c geom_vinum_plex.c \
+SRCS= geom_vinum.c geom_vinum_create.c geom_vinum_drive.c geom_vinum_plex.c \
geom_vinum_volume.c geom_vinum_subr.c geom_vinum_raid5.c \
geom_vinum_share.c geom_vinum_list.c geom_vinum_rm.c \
geom_vinum_init.c geom_vinum_state.c geom_vinum_rename.c \
- geom_vinum_move.c
+ geom_vinum_move.c geom_vinum_events.c
.include <bsd.kmod.mk>
OpenPOWER on IntegriCloud