diff options
author | pjd <pjd@FreeBSD.org> | 2004-08-21 18:11:46 +0000 |
---|---|---|
committer | pjd <pjd@FreeBSD.org> | 2004-08-21 18:11:46 +0000 |
commit | c3c6740d1ae2ca36dd507650dbed6962cc1b206d (patch) | |
tree | c7a13feeb4a36d3ad1e0855d38be72d1ee949ab6 /sys/geom/raid3 | |
parent | fbfced34e485ed42ca65816dbf7029b29af13d53 (diff) | |
download | FreeBSD-src-c3c6740d1ae2ca36dd507650dbed6962cc1b206d.zip FreeBSD-src-c3c6740d1ae2ca36dd507650dbed6962cc1b206d.tar.gz |
Implement new reading algorithm, which will use parity component for reading
as well, even if device is in complete state.
I observe 40% of speed-up with this option for random read operations,
but slowdown for sequential reads.
Basically, without this option reading from a RAID3 device built from 5
components (c0-c4) looks like this:
Request no. Used components
1 c0+c1+c2+c3
2 c0+c1+c2+c3
3 c0+c1+c2+c3
With the new feature:
Request no. Used components
1 c0+c1+c2+c3
2 (c1^c2^c3^c4)+c1+c2+c3
3 c0+(c0^c2^c3^c4)+c2+c3
4 c0+c1+(c0^c1^c3^c4)+c3
5 c0+c1+c2+(c0^c1^c2^c4)
6 c0+c1+c2+c3
[...]
Diffstat (limited to 'sys/geom/raid3')
-rw-r--r-- | sys/geom/raid3/g_raid3.c | 202 | ||||
-rw-r--r-- | sys/geom/raid3/g_raid3.h | 9 | ||||
-rw-r--r-- | sys/geom/raid3/g_raid3_ctl.c | 34 |
3 files changed, 143 insertions, 102 deletions
diff --git a/sys/geom/raid3/g_raid3.c b/sys/geom/raid3/g_raid3.c index 1878146..5c60e05 100644 --- a/sys/geom/raid3/g_raid3.c +++ b/sys/geom/raid3/g_raid3.c @@ -872,122 +872,104 @@ g_raid3_gather(struct bio *pbp) { struct g_raid3_softc *sc; struct g_raid3_disk *disk; - struct bio *bp, *cbp; + struct bio *xbp, *fbp, *cbp; off_t atom, cadd, padd, left; sc = pbp->bio_to->geom->softc; - if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_DEGRADED) != 0) { + /* + * Find bio for which we have to calculate data. + * While going through this path, check if all requests + * succeeded, if not, deny whole request. + * If we're in COMPLETE mode, we allow one request to fail, + * so if we find one, we're sending it to the parity consumer. + * If there are more failed requests, we deny whole request. + */ + xbp = fbp = NULL; + G_RAID3_FOREACH_BIO(pbp, cbp) { + if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0) { + KASSERT(xbp == NULL, ("More than one parity bio.")); + xbp = cbp; + } + if (cbp->bio_error == 0) + continue; /* - * Find bio for which we should calculate data. - * While going through this path, check if all requests - * succeeded, if not, deny whole request. + * Found failed request. */ - bp = NULL; - G_RAID3_FOREACH_BIO(pbp, cbp) { - if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0) { - KASSERT(bp == NULL, - ("More than one parity bio.")); - bp = cbp; - } - if (cbp->bio_error == 0) - continue; + G_RAID3_LOGREQ(0, cbp, "Request failed."); + disk = cbp->bio_caller2; + if (disk != NULL) { /* - * Found failed request. + * Actually this is pointless to bump syncid, + * because whole device is fucked up. */ - if (pbp->bio_error == 0) - pbp->bio_error = cbp->bio_error; - disk = cbp->bio_caller2; - if (disk != NULL) { + sc->sc_bump_syncid = G_RAID3_BUMP_IMMEDIATELY; + g_raid3_event_send(disk, + G_RAID3_DISK_STATE_DISCONNECTED, + G_RAID3_EVENT_DONTWAIT); + } + if (fbp == NULL) { + if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_DEGRADED) != 0) { /* - * Actually this is pointless to bump syncid, - * because whole device is fucked up. + * We are already in degraded mode, so we can't + * accept any failures. */ - sc->sc_bump_syncid = G_RAID3_BUMP_IMMEDIATELY; - g_raid3_event_send(disk, - G_RAID3_DISK_STATE_DISCONNECTED, - G_RAID3_EVENT_DONTWAIT); + if (pbp->bio_error == 0) + pbp->bio_error = fbp->bio_error; + } else { + fbp = cbp; } - } - KASSERT(bp != NULL, ("NULL parity bio.")); - if (pbp->bio_error != 0) { + } else { /* - * Deny whole request. + * Next failed request, that's too many. */ - goto finish; + if (pbp->bio_error == 0) + pbp->bio_error = fbp->bio_error; } + } + if (pbp->bio_error != 0) + goto finish; + if (fbp != NULL) { + struct g_consumer *cp; + /* - * Calculate parity. + * One request failed, so send the same request to + * the parity consumer. */ - G_RAID3_FOREACH_BIO(pbp, cbp) { - if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0) - continue; - g_raid3_xor(cbp->bio_data, bp->bio_data, bp->bio_data, - bp->bio_length); + disk = pbp->bio_driver2; + if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) { + pbp->bio_error = fbp->bio_error; + goto finish; } - bp->bio_cflags &= ~G_RAID3_BIO_CFLAG_PARITY; - } else { + pbp->bio_pflags |= G_RAID3_BIO_PFLAG_DEGRADED; + pbp->bio_inbed--; + fbp->bio_flags &= ~(BIO_DONE | BIO_ERROR); + if (disk->d_no == sc->sc_ndisks - 1) + fbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY; + fbp->bio_error = 0; + fbp->bio_completed = 0; + fbp->bio_children = 0; + fbp->bio_inbed = 0; + cp = disk->d_consumer; + fbp->bio_caller2 = disk; + fbp->bio_to = cp->provider; + G_RAID3_LOGREQ(3, fbp, "Sending request (recover)."); + KASSERT(cp->acr > 0 && cp->ace > 0, + ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, + cp->acr, cp->acw, cp->ace)); + g_io_request(fbp, cp); + return; + } + if (xbp != NULL) { /* - * If we're in COMPLETE mode, we allow one request to fail, - * so if we find one, we're sending it to the parity consumer. - * If there are more failed requests, we deny whole request. + * Calculate parity. */ - bp = NULL; G_RAID3_FOREACH_BIO(pbp, cbp) { - if (cbp->bio_error == 0) + if ((cbp->bio_cflags & G_RAID3_BIO_CFLAG_PARITY) != 0) continue; - /* - * Found failed request. - */ - G_RAID3_LOGREQ(0, cbp, "Request failed."); - disk = cbp->bio_caller2; - if (disk != NULL) { - sc->sc_bump_syncid = G_RAID3_BUMP_IMMEDIATELY; - g_raid3_event_send(disk, - G_RAID3_DISK_STATE_DISCONNECTED, - G_RAID3_EVENT_DONTWAIT); - } - if (bp == NULL) - bp = cbp; - else { - /* - * Next failed request, that's too many. - */ - if (pbp->bio_error == 0) - pbp->bio_error = bp->bio_error; - } - } - if (pbp->bio_error != 0) - goto finish; - if (bp != NULL) { - struct g_consumer *cp; - - /* - * One request failed, so send the same request to - * the parity consumer. - */ - disk = &sc->sc_disks[sc->sc_ndisks - 1]; - if (disk->d_state != G_RAID3_DISK_STATE_ACTIVE) { - pbp->bio_error = bp->bio_error; - goto finish; - } - pbp->bio_pflags |= G_RAID3_BIO_PFLAG_DEGRADED; - pbp->bio_inbed--; - bp->bio_flags &= ~(BIO_DONE | BIO_ERROR); - bp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY; - bp->bio_error = 0; - bp->bio_completed = 0; - bp->bio_children = 0; - bp->bio_inbed = 0; - cp = disk->d_consumer; - bp->bio_caller2 = disk; - bp->bio_to = cp->provider; - G_RAID3_LOGREQ(3, bp, "Sending request (parity)."); - KASSERT(cp->acr > 0 && cp->ace > 0, - ("Consumer %s not opened (r%dw%de%d).", cp->provider->name, - cp->acr, cp->acw, cp->ace)); - g_io_request(bp, cp); - return; + g_raid3_xor(cbp->bio_data, xbp->bio_data, xbp->bio_data, + xbp->bio_length); } + xbp->bio_cflags &= ~G_RAID3_BIO_CFLAG_PARITY; } atom = sc->sc_sectorsize / (sc->sc_ndisks - 1); cadd = padd = 0; @@ -1304,6 +1286,7 @@ g_raid3_register_request(struct bio *pbp) struct bio *cbp; off_t offset, length; u_int n, ndisks; + int round_robin; ndisks = 0; sc = pbp->bio_to->geom->softc; @@ -1318,12 +1301,19 @@ g_raid3_register_request(struct bio *pbp) switch (pbp->bio_cmd) { case BIO_READ: ndisks = sc->sc_ndisks - 1; + pbp->bio_driver2 = &sc->sc_disks[sc->sc_ndisks - 1]; break; case BIO_WRITE: case BIO_DELETE: ndisks = sc->sc_ndisks; break; } + if (sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE && + (sc->sc_flags & G_RAID3_DEVICE_FLAG_ROUND_ROBIN) != 0) { + round_robin = 1; + } else { + round_robin = 0; + } for (n = 0; n < ndisks; n++) { disk = &sc->sc_disks[n]; cbp = g_raid3_clone_bio(sc, pbp); @@ -1345,6 +1335,17 @@ g_raid3_register_request(struct bio *pbp) disk = &sc->sc_disks[sc->sc_ndisks - 1]; cbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY; pbp->bio_pflags |= G_RAID3_BIO_PFLAG_DEGRADED; + } else if (round_robin && + disk->d_no == sc->sc_round_robin) { + /* + * In round-robin mode skip one data component + * and use parity component when reading. + */ + pbp->bio_driver2 = disk; + disk = &sc->sc_disks[sc->sc_ndisks - 1]; + cbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY; + sc->sc_round_robin++; + round_robin = 0; } break; case BIO_WRITE: @@ -1382,6 +1383,14 @@ g_raid3_register_request(struct bio *pbp) } switch (pbp->bio_cmd) { case BIO_READ: + if (round_robin) { + /* + * If we are in round-robin mode and 'round_robin' is + * still 1, it means, that we skipped parity component + * for this read and must reset sc_round_robin field. + */ + sc->sc_round_robin = 0; + } G_RAID3_FOREACH_BIO(pbp, cbp) { disk = cbp->bio_caller2; cp = disk->d_consumer; @@ -2474,6 +2483,7 @@ g_raid3_create(struct g_class *mp, const struct g_raid3_metadata *md) sc->sc_mediasize = md->md_mediasize; sc->sc_sectorsize = md->md_sectorsize; sc->sc_ndisks = md->md_all; + sc->sc_round_robin = 0; sc->sc_flags = md->md_mflags; sc->sc_bump_syncid = 0; for (n = 0; n < sc->sc_ndisks; n++) @@ -2752,6 +2762,8 @@ g_raid3_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, } \ } while (0) ADD_FLAG(G_RAID3_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC"); + ADD_FLAG(G_RAID3_DEVICE_FLAG_ROUND_ROBIN, + "ROUND-ROBIN"); #undef ADD_FLAG } sbuf_printf(sb, "</Flags>\n"); diff --git a/sys/geom/raid3/g_raid3.h b/sys/geom/raid3/g_raid3.h index 2e1a595..8084751 100644 --- a/sys/geom/raid3/g_raid3.h +++ b/sys/geom/raid3/g_raid3.h @@ -35,7 +35,7 @@ #define G_RAID3_CLASS_NAME "RAID3" #define G_RAID3_MAGIC "GEOM::RAID3" -#define G_RAID3_VERSION 0 +#define G_RAID3_VERSION 1 #define G_RAID3_DISK_FLAG_DIRTY 0x0000000000000001ULL #define G_RAID3_DISK_FLAG_SYNCHRONIZING 0x0000000000000002ULL @@ -46,7 +46,9 @@ G_RAID3_DISK_FLAG_FORCE_SYNC) #define G_RAID3_DEVICE_FLAG_NOAUTOSYNC 0x0000000000000001ULL -#define G_RAID3_DEVICE_FLAG_MASK (G_RAID3_DEVICE_FLAG_NOAUTOSYNC) +#define G_RAID3_DEVICE_FLAG_ROUND_ROBIN 0x0000000000000002ULL +#define G_RAID3_DEVICE_FLAG_MASK (G_RAID3_DEVICE_FLAG_NOAUTOSYNC | \ + G_RAID3_DEVICE_FLAG_ROUND_ROBIN) #ifdef _KERNEL extern u_int g_raid3_debug; @@ -162,6 +164,7 @@ struct g_raid3_softc { struct g_raid3_disk *sc_disks; u_int sc_ndisks; /* Number of disks. */ + u_int sc_round_robin; struct g_raid3_disk *sc_syncdisk; uma_zone_t sc_zone_64k; @@ -281,6 +284,8 @@ raid3_metadata_dump(const struct g_raid3_metadata *md) else { if ((md->md_mflags & G_RAID3_DEVICE_FLAG_NOAUTOSYNC) != 0) printf(" NOAUTOSYNC"); + if ((md->md_mflags & G_RAID3_DEVICE_FLAG_ROUND_ROBIN) != 0) + printf(" ROUND-ROBIN"); } printf("\n"); printf(" dflags:"); diff --git a/sys/geom/raid3/g_raid3_ctl.c b/sys/geom/raid3/g_raid3_ctl.c index bb9bf21..7692ce8 100644 --- a/sys/geom/raid3/g_raid3_ctl.c +++ b/sys/geom/raid3/g_raid3_ctl.c @@ -93,7 +93,8 @@ g_raid3_ctl_configure(struct gctl_req *req, struct g_class *mp) struct g_raid3_softc *sc; struct g_raid3_disk *disk; const char *name; - int *nargs, *autosync, *noautosync, do_sync = 0; + int *nargs, do_sync = 0; + int *autosync, *noautosync, *round_robin, *noround_robin; u_int n; g_topology_assert(); @@ -122,15 +123,31 @@ g_raid3_ctl_configure(struct gctl_req *req, struct g_class *mp) gctl_error(req, "No '%s' argument.", "noautosync"); return; } - if (!*autosync && !*noautosync) { - gctl_error(req, "Nothing has changed."); - return; - } if (*autosync && *noautosync) { gctl_error(req, "'%s' and '%s' specified.", "autosync", "noautosync"); return; } + round_robin = gctl_get_paraml(req, "round_robin", sizeof(*round_robin)); + if (round_robin == NULL) { + gctl_error(req, "No '%s' argument.", "round_robin"); + return; + } + noround_robin = gctl_get_paraml(req, "noround_robin", + sizeof(*noround_robin)); + if (noround_robin == NULL) { + gctl_error(req, "No '%s' argument.", "noround_robin"); + return; + } + if (*round_robin && *noround_robin) { + gctl_error(req, "'%s' and '%s' specified.", "round_robin", + "noround_robin"); + return; + } + if (!*autosync && !*noautosync && !*round_robin && !*noround_robin) { + gctl_error(req, "Nothing has changed."); + return; + } if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_NOAUTOSYNC) != 0) { if (*autosync) { sc->sc_flags &= ~G_RAID3_DEVICE_FLAG_NOAUTOSYNC; @@ -140,6 +157,13 @@ g_raid3_ctl_configure(struct gctl_req *req, struct g_class *mp) if (*noautosync) sc->sc_flags |= G_RAID3_DEVICE_FLAG_NOAUTOSYNC; } + if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_ROUND_ROBIN) != 0) { + if (*noround_robin) + sc->sc_flags &= ~G_RAID3_DEVICE_FLAG_ROUND_ROBIN; + } else { + if (*round_robin) + sc->sc_flags |= G_RAID3_DEVICE_FLAG_ROUND_ROBIN; + } for (n = 0; n < sc->sc_ndisks; n++) { disk = &sc->sc_disks[n]; if (do_sync) { |