diff options
-rw-r--r-- | sbin/geom/class/raid3/geom_raid3.c | 17 | ||||
-rw-r--r-- | sbin/geom/class/raid3/graid3.8 | 22 | ||||
-rw-r--r-- | sys/geom/raid3/g_raid3.c | 202 | ||||
-rw-r--r-- | sys/geom/raid3/g_raid3.h | 9 | ||||
-rw-r--r-- | sys/geom/raid3/g_raid3_ctl.c | 34 |
5 files changed, 173 insertions, 111 deletions
diff --git a/sbin/geom/class/raid3/geom_raid3.c b/sbin/geom/class/raid3/geom_raid3.c index 55b1056..c7120ed 100644 --- a/sbin/geom/class/raid3/geom_raid3.c +++ b/sbin/geom/class/raid3/geom_raid3.c @@ -58,6 +58,8 @@ struct g_command class_commands[] = { { 'd', "dynamic", NULL, G_TYPE_NONE }, { 'h', "hardcode", NULL, G_TYPE_NONE }, { 'n', "noautosync", NULL, G_TYPE_NONE }, + { 'r', "round_robin", NULL, G_TYPE_NONE }, + { 'R', "noround_robin", NULL, G_TYPE_NONE }, G_OPT_SENTINEL } }, @@ -73,6 +75,7 @@ struct g_command class_commands[] = { { { 'h', "hardcode", NULL, G_TYPE_NONE }, { 'n', "noautosync", NULL, G_TYPE_NONE }, + { 'r', "round_robin", NULL, G_TYPE_NONE }, G_OPT_SENTINEL } }, @@ -99,10 +102,10 @@ void usage(const char *comm) { fprintf(stderr, - "usage: %s label [-hnv] name prov prov prov [prov [...]]\n" + "usage: %s label [-hnrv] name prov prov prov [prov [...]]\n" " %s clear [-v] prov [prov [...]]\n" " %s dump prov [prov [...]]\n" - " %s configure [-adhnv] name\n" + " %s configure [-adhnrRv] name\n" " %s rebuild [-v] name prov\n" " %s insert [-hv] <-n number> name prov\n" " %s remove [-v] <-n number> name\n" @@ -141,7 +144,8 @@ raid3_label(struct gctl_req *req) u_char sector[512]; const char *str; char param[16]; - int *hardcode, *nargs, *noautosync, error, i; + int *hardcode, *nargs, *noautosync, *round_robin; + int error, i; unsigned sectorsize; off_t mediasize; @@ -184,6 +188,13 @@ raid3_label(struct gctl_req *req) } if (*noautosync) md.md_mflags |= G_RAID3_DEVICE_FLAG_NOAUTOSYNC; + round_robin = gctl_get_paraml(req, "round_robin", sizeof(*round_robin)); + if (round_robin == NULL) { + gctl_error(req, "No '%s' argument.", "round_robin"); + return; + } + if (*round_robin) + md.md_mflags |= G_RAID3_DEVICE_FLAG_ROUND_ROBIN; hardcode = gctl_get_paraml(req, "hardcode", sizeof(*hardcode)); if (hardcode == NULL) { gctl_error(req, "No '%s' argument.", "hardcode"); diff --git a/sbin/geom/class/raid3/graid3.8 b/sbin/geom/class/raid3/graid3.8 index f38732d..713f979 100644 --- a/sbin/geom/class/raid3/graid3.8 +++ b/sbin/geom/class/raid3/graid3.8 @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd Aug 18, 2004 +.Dd Aug 20, 2004 .Dt GRAID3 8 .Os .Sh NAME @@ -33,7 +33,7 @@ .Sh SYNOPSIS .Nm .Cm label -.Op Fl hnv +.Op Fl hnrv .Ar name .Ar prov .Ar prov @@ -46,7 +46,7 @@ .Op Ar prov Op Ar ... .Nm .Cm configure -.Op Fl adhnv +.Op Fl adhnrRv .Ar name .Nm .Cm rebuild @@ -106,6 +106,12 @@ Additional options include: Hardcode providers' names in metadata. .It Fl n Turn off autosynchronization of stale components. +.It Fl r +Use parity component for reading in round-robin fashion. +Without this option parity component is not used at all for reading operations +when device is in complete state. +With this option specified random I/O read operations are even 40% faster, +but sequential reads are slower. .El .It Cm clear Clear metadata on the given providers. @@ -122,6 +128,10 @@ Do not hardcode providers' names in metadata. Hardcode providers' names in metadata. .It Fl n Turn off autosynchronization of stale components. +.It Fl r +Turn on round-robin reading. +.It Fl R +Turn off round-robin reading. .El .It Cm rebuild Rebuild the given component forcibly. @@ -167,10 +177,10 @@ Additional options include: Be more verbose. .El .Sh EXAMPLES -Use 3 disks to setup a RAID3 array. Create a file system, mount it, then unmount -it and stop device: +Use 3 disks to setup a RAID3 array (use round-robin reading feature). +Create a file system, mount it, then unmount it and stop device: .Bd -literal -offset indent -graid3 label -v data da0 da1 da2 +graid3 label -v -r data da0 da1 da2 newfs /dev/raid3/data mount /dev/raid3/data /mnt [...] diff --git a/sys/geom/raid3/g_raid3.c b/sys/geom/raid3/g_raid3.c index 1878146..5c60e05 100644 --- a/sys/geom/raid3/g_raid3.c +++ b/sys/geom/raid3/g_raid3.c @@ -872,122 +872,104 @@ g_raid3_gather(struct bio *pbp) { struct g_raid3_softc *sc; struct g_raid3_disk *disk; - struct bio *bp, *cbp; + struct bio *xbp, *fbp, *cbp; off_t atom, cadd, padd, left; sc = pbp->bio_to->geom->softc; - if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_DEGRADED) != 0) { + /* + * Find bio for which we have to calculate data. + * While going through this path, check if all requests + * succeeded, if not, deny whole request. + * If we're in COMPLETE mode, we allow one request to fail, + * so if we find one, we're sending it to the parity consumer. + * If there are more failed requests, we deny whole request. + */ + xbp = fbp = NULL; + G_RAID3_FOREACH_BIO(pbp, cbp) { + if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0) { + KASSERT(xbp == NULL, ("More than one parity bio.")); + xbp = cbp; + } + if (cbp->bio_error == 0) + continue; /* - * Find bio for which we should calculate data. - * While going through this path, check if all requests - * succeeded, if not, deny whole request. + * Found failed request. */ - bp = NULL; - G_RAID3_FOREACH_BIO(pbp, cbp) { - if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0) { - KASSERT(bp == NULL, - ("More than one parity bio.")); - bp = cbp; - } - if (cbp->bio_error == 0) - continue; + G_RAID3_LOGREQ(0, cbp, "Request failed."); + disk = cbp->bio_caller2; + if (disk != NULL) { /* - * Found failed request. + * Actually this is pointless to bump syncid, + * because whole device is fucked up. */ - if (pbp->bio_error == 0) - pbp->bio_error = cbp->bio_error; - disk = cbp->bio_caller2; - if (disk != NULL) { + sc->sc_bump_syncid = G_RAID3_BUMP_IMMEDIATELY; + g_raid3_event_send(disk, + G_RAID3_DISK_STATE_DISCONNECTED, + G_RAID3_EVENT_DONTWAIT); + } + if (fbp == NULL) { + if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_DEGRADED) != 0) { /* - * Actually this is pointless to bump syncid, - * because whole device is fucked up. + * We are already in degraded mode, so we can't + * accept any failures. */ - sc->sc_bump_syncid = G_RAID3_BUMP_IMMEDIATELY; - g_raid3_event_send(disk, - G_RAID3_DISK_STATE_DISCONNECTED, - G_RAID3_EVENT_DONTWAIT); + if (pbp->bio_error == 0) + pbp->bio_error = fbp->bio_error; + } else { + fbp = cbp; } - } - KASSERT(bp != NULL, ("NULL parity bio.")); - if (pbp->bio_error != 0) { + } else { /* - * Deny whole request. + * Next failed request, that's too many. */ - goto finish; + if (pbp->bio_error == 0) + pbp->bio_error = fbp->bio_error; } + } + if (pbp->bio_error != 0) + goto finish; + if (fbp != NULL) { + struct g_consumer *cp; + /* - * Calculate parity. + * One request failed, so send the same request to + * the parity consumer. */ - G_RAID3_FOREACH_BIO(pbp, cbp) { - if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0) - continue; - g_raid3_xor(cbp->bio_data, bp->bio_data, bp->bio_data, - bp->bio_length); + disk = pbp->bio_driver2; + if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) { + pbp->bio_error = fbp->bio_error; + goto finish; } - bp->bio_cflags &= ~G_RAID3_BIO_CFLAG_PARITY; - } else { + pbp->bio_pflags |= G_RAID3_BIO_PFLAG_DEGRADED; + pbp->bio_inbed--; + fbp->bio_flags &= ~(BIO_DONE | BIO_ERROR); + if (disk->d_no == sc->sc_ndisks - 1) + fbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY; + fbp->bio_error = 0; + fbp->bio_completed = 0; + fbp->bio_children = 0; + fbp->bio_inbed = 0; + cp = disk->d_consumer; + fbp->bio_caller2 = disk; + fbp->bio_to = cp->provider; + G_RAID3_LOGREQ(3, fbp, "Sending request (recover)."); + KASSERT(cp->acr > 0 && cp->ace > 0, + ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, + cp->acr, cp->acw, cp->ace)); + g_io_request(fbp, cp); + return; + } + if (xbp != NULL) { /* - * If we're in COMPLETE mode, we allow one request to fail, - * so if we find one, we're sending it to the parity consumer. - * If there are more failed requests, we deny whole request. + * Calculate parity. */ - bp = NULL; G_RAID3_FOREACH_BIO(pbp, cbp) { - if (cbp->bio_error == 0) + if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0) continue; - /* - * Found failed request. - */ - G_RAID3_LOGREQ(0, cbp, "Request failed."); - disk = cbp->bio_caller2; - if (disk != NULL) { - sc->sc_bump_syncid = G_RAID3_BUMP_IMMEDIATELY; - g_raid3_event_send(disk, - G_RAID3_DISK_STATE_DISCONNECTED, - G_RAID3_EVENT_DONTWAIT); - } - if (bp == NULL) - bp = cbp; - else { - /* - * Next failed request, that's too many. - */ - if (pbp->bio_error == 0) - pbp->bio_error = bp->bio_error; - } - } - if (pbp->bio_error != 0) - goto finish; - if (bp != NULL) { - struct g_consumer *cp; - - /* - * One request failed, so send the same request to - * the parity consumer. - */ - disk = &sc->sc_disks[sc->sc_ndisks - 1]; - if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) { - pbp->bio_error = bp->bio_error; - goto finish; - } - pbp->bio_pflags |= G_RAID3_BIO_PFLAG_DEGRADED; - pbp->bio_inbed--; - bp->bio_flags &= ~(BIO_DONE | BIO_ERROR); - bp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY; - bp->bio_error = 0; - bp->bio_completed = 0; - bp->bio_children = 0; - bp->bio_inbed = 0; - cp = disk->d_consumer; - bp->bio_caller2 = disk; - bp->bio_to = cp->provider; - G_RAID3_LOGREQ(3, bp, "Sending request (parity)."); - KASSERT(cp->acr > 0 && cp->ace > 0, - ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, - cp->acr, cp->acw, cp->ace)); - g_io_request(bp, cp); - return; + g_raid3_xor(cbp->bio_data, xbp->bio_data, xbp->bio_data, + xbp->bio_length); } + xbp->bio_cflags &= ~G_RAID3_BIO_CFLAG_PARITY; } atom = sc->sc_sectorsize / (sc->sc_ndisks - 1); cadd = padd = 0; @@ -1304,6 +1286,7 @@ g_raid3_register_request(struct bio *pbp) struct bio *cbp; off_t offset, length; u_int n, ndisks; + int round_robin; ndisks = 0; sc = pbp->bio_to->geom->softc; @@ -1318,12 +1301,19 @@ g_raid3_register_request(struct bio *pbp) switch (pbp->bio_cmd) { case BIO_READ: ndisks = sc->sc_ndisks - 1; + pbp->bio_driver2 = &sc->sc_disks[sc->sc_ndisks - 1]; break; case BIO_WRITE: case BIO_DELETE: ndisks = sc->sc_ndisks; break; } + if (sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE && + (sc->sc_flags & G_RAID3_DEVICE_FLAG_ROUND_ROBIN) != 0) { + round_robin = 1; + } else { + round_robin = 0; + } for (n = 0; n < ndisks; n++) { disk = &sc->sc_disks[n]; cbp = g_raid3_clone_bio(sc, pbp); @@ -1345,6 +1335,17 @@ g_raid3_register_request(struct bio *pbp) disk = &sc->sc_disks[sc->sc_ndisks - 1]; cbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY; pbp->bio_pflags |= G_RAID3_BIO_PFLAG_DEGRADED; + } else if (round_robin && + disk->d_no == sc->sc_round_robin) { + /* + * In round-robin mode skip one data component + * and use parity component when reading. + */ + pbp->bio_driver2 = disk; + disk = &sc->sc_disks[sc->sc_ndisks - 1]; + cbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY; + sc->sc_round_robin++; + round_robin = 0; } break; case BIO_WRITE: @@ -1382,6 +1383,14 @@ g_raid3_register_request(struct bio *pbp) } switch (pbp->bio_cmd) { case BIO_READ: + if (round_robin) { + /* + * If we are in round-robin mode and 'round_robin' is + * still 1, it means, that we skipped parity component + * for this read and must reset sc_round_robin field. + */ + sc->sc_round_robin = 0; + } G_RAID3_FOREACH_BIO(pbp, cbp) { disk = cbp->bio_caller2; cp = disk->d_consumer; @@ -2474,6 +2483,7 @@ g_raid3_create(struct g_class *mp, const struct g_raid3_metadata *md) sc->sc_mediasize = md->md_mediasize; sc->sc_sectorsize = md->md_sectorsize; sc->sc_ndisks = md->md_all; + sc->sc_round_robin = 0; sc->sc_flags = md->md_mflags; sc->sc_bump_syncid = 0; for (n = 0; n < sc->sc_ndisks; n++) @@ -2752,6 +2762,8 @@ g_raid3_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, } \ } while (0) ADD_FLAG(G_RAID3_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC"); + ADD_FLAG(G_RAID3_DEVICE_FLAG_ROUND_ROBIN, + "ROUND-ROBIN"); #undef ADD_FLAG } sbuf_printf(sb, "</Flags>\n"); diff --git a/sys/geom/raid3/g_raid3.h b/sys/geom/raid3/g_raid3.h index 2e1a595..8084751 100644 --- a/sys/geom/raid3/g_raid3.h +++ b/sys/geom/raid3/g_raid3.h @@ -35,7 +35,7 @@ #define G_RAID3_CLASS_NAME "RAID3" #define G_RAID3_MAGIC "GEOM::RAID3" -#define G_RAID3_VERSION 0 +#define G_RAID3_VERSION 1 #define G_RAID3_DISK_FLAG_DIRTY 0x0000000000000001ULL #define G_RAID3_DISK_FLAG_SYNCHRONIZING 0x0000000000000002ULL @@ -46,7 +46,9 @@ G_RAID3_DISK_FLAG_FORCE_SYNC) #define G_RAID3_DEVICE_FLAG_NOAUTOSYNC 0x0000000000000001ULL -#define G_RAID3_DEVICE_FLAG_MASK (G_RAID3_DEVICE_FLAG_NOAUTOSYNC) +#define G_RAID3_DEVICE_FLAG_ROUND_ROBIN 0x0000000000000002ULL +#define G_RAID3_DEVICE_FLAG_MASK (G_RAID3_DEVICE_FLAG_NOAUTOSYNC | \ + G_RAID3_DEVICE_FLAG_ROUND_ROBIN) #ifdef _KERNEL extern u_int g_raid3_debug; @@ -162,6 +164,7 @@ struct g_raid3_softc { struct g_raid3_disk *sc_disks; u_int sc_ndisks; /* Number of disks. */ + u_int sc_round_robin; struct g_raid3_disk *sc_syncdisk; uma_zone_t sc_zone_64k; @@ -281,6 +284,8 @@ raid3_metadata_dump(const struct g_raid3_metadata *md) else { if ((md->md_mflags & G_RAID3_DEVICE_FLAG_NOAUTOSYNC) != 0) printf(" NOAUTOSYNC"); + if ((md->md_mflags & G_RAID3_DEVICE_FLAG_ROUND_ROBIN) != 0) + printf(" ROUND-ROBIN"); } printf("\n"); printf(" dflags:"); diff --git a/sys/geom/raid3/g_raid3_ctl.c b/sys/geom/raid3/g_raid3_ctl.c index bb9bf21..7692ce8 100644 --- a/sys/geom/raid3/g_raid3_ctl.c +++ b/sys/geom/raid3/g_raid3_ctl.c @@ -93,7 +93,8 @@ g_raid3_ctl_configure(struct gctl_req *req, struct g_class *mp) struct g_raid3_softc *sc; struct g_raid3_disk *disk; const char *name; - int *nargs, *autosync, *noautosync, do_sync = 0; + int *nargs, do_sync = 0; + int *autosync, *noautosync, *round_robin, *noround_robin; u_int n; g_topology_assert(); @@ -122,15 +123,31 @@ g_raid3_ctl_configure(struct gctl_req *req, struct g_class *mp) gctl_error(req, "No '%s' argument.", "noautosync"); return; } - if (!*autosync && !*noautosync) { - gctl_error(req, "Nothing has changed."); - return; - } if (*autosync && *noautosync) { gctl_error(req, "'%s' and '%s' specified.", "autosync", "noautosync"); return; } + round_robin = gctl_get_paraml(req, "round_robin", sizeof(*round_robin)); + if (round_robin == NULL) { + gctl_error(req, "No '%s' argument.", "round_robin"); + return; + } + noround_robin = gctl_get_paraml(req, "noround_robin", + sizeof(*noround_robin)); + if (noround_robin == NULL) { + gctl_error(req, "No '%s' argument.", "noround_robin"); + return; + } + if (*round_robin && *noround_robin) { + gctl_error(req, "'%s' and '%s' specified.", "round_robin", + "noround_robin"); + return; + } + if (!*autosync && !*noautosync && !*round_robin && !*noround_robin) { + gctl_error(req, "Nothing has changed."); + return; + } if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOAUTOSYNC) != 0) { if (*autosync) { sc->sc_flags &= ~G_RAID3_DEVICE_FLAG_NOAUTOSYNC; @@ -140,6 +157,13 @@ g_raid3_ctl_configure(struct gctl_req *req, struct g_class *mp) if (*noautosync) sc->sc_flags |= G_RAID3_DEVICE_FLAG_NOAUTOSYNC; } + if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_ROUND_ROBIN) != 0) { + if (*noround_robin) + sc->sc_flags &= ~G_RAID3_DEVICE_FLAG_ROUND_ROBIN; + } else { + if (*round_robin) + sc->sc_flags |= G_RAID3_DEVICE_FLAG_ROUND_ROBIN; + } for (n = 0; n < sc->sc_ndisks; n++) { disk = &sc->sc_disks[n]; if (do_sync) { |