diff options
-rw-r--r-- | sbin/geom/class/raid3/geom_raid3.c | 20 | ||||
-rw-r--r-- | sbin/geom/class/raid3/graid3.8 | 25 | ||||
-rw-r--r-- | sys/geom/raid3/g_raid3.c | 127 | ||||
-rw-r--r-- | sys/geom/raid3/g_raid3.h | 18 | ||||
-rw-r--r-- | sys/geom/raid3/g_raid3_ctl.c | 36 |
5 files changed, 202 insertions, 24 deletions
diff --git a/sbin/geom/class/raid3/geom_raid3.c b/sbin/geom/class/raid3/geom_raid3.c index c7120ed..8e507ec 100644 --- a/sbin/geom/class/raid3/geom_raid3.c +++ b/sbin/geom/class/raid3/geom_raid3.c @@ -60,6 +60,8 @@ struct g_command class_commands[] = { { 'n', "noautosync", NULL, G_TYPE_NONE }, { 'r', "round_robin", NULL, G_TYPE_NONE }, { 'R', "noround_robin", NULL, G_TYPE_NONE }, + { 'w', "verify", NULL, G_TYPE_NONE }, + { 'W', "noverify", NULL, G_TYPE_NONE }, G_OPT_SENTINEL } }, @@ -76,6 +78,7 @@ struct g_command class_commands[] = { { 'h', "hardcode", NULL, G_TYPE_NONE }, { 'n', "noautosync", NULL, G_TYPE_NONE }, { 'r', "round_robin", NULL, G_TYPE_NONE }, + { 'w', "verify", NULL, G_TYPE_NONE }, G_OPT_SENTINEL } }, @@ -102,10 +105,10 @@ void usage(const char *comm) { fprintf(stderr, - "usage: %s label [-hnrv] name prov prov prov [prov [...]]\n" + "usage: %s label [-hnrvw] name prov prov prov [prov [...]]\n" " %s clear [-v] prov [prov [...]]\n" " %s dump prov [prov [...]]\n" - " %s configure [-adhnrRv] name\n" + " %s configure [-adhnrRvwW] name\n" " %s rebuild [-v] name prov\n" " %s insert [-hv] <-n number> name prov\n" " %s remove [-v] <-n number> name\n" @@ -144,7 +147,7 @@ raid3_label(struct gctl_req *req) u_char sector[512]; const char *str; char param[16]; - int *hardcode, *nargs, *noautosync, *round_robin; + int *hardcode, *nargs, *noautosync, *round_robin, *verify; int error, i; unsigned sectorsize; off_t mediasize; @@ -195,6 +198,17 @@ raid3_label(struct gctl_req *req) } if (*round_robin) md.md_mflags |= G_RAID3_DEVICE_FLAG_ROUND_ROBIN; + verify = gctl_get_paraml(req, "verify", sizeof(*verify)); + if (verify == NULL) { + gctl_error(req, "No '%s' argument.", "verify"); + return; + } + if (*verify) + md.md_mflags |= G_RAID3_DEVICE_FLAG_VERIFY; + if (*round_robin && *verify) { + gctl_error(req, "Both '%c' and '%c' options given.", 'r', 'w'); + return; + } hardcode = gctl_get_paraml(req, "hardcode", sizeof(*hardcode)); if (hardcode == NULL) { gctl_error(req, "No '%s' argument.", "hardcode"); diff --git a/sbin/geom/class/raid3/graid3.8 b/sbin/geom/class/raid3/graid3.8 index 713f979..47a30f4 100644 --- a/sbin/geom/class/raid3/graid3.8 +++ b/sbin/geom/class/raid3/graid3.8 @@ -24,7 +24,7 @@ .\" .\" $FreeBSD$ .\" -.Dd Aug 20, 2004 +.Dd Aug 22, 2004 .Dt GRAID3 8 .Os .Sh NAME @@ -33,7 +33,7 @@ .Sh SYNOPSIS .Nm .Cm label -.Op Fl hnrv +.Op Fl hnrvw .Ar name .Ar prov .Ar prov @@ -46,7 +46,7 @@ .Op Ar prov Op Ar ... .Nm .Cm configure -.Op Fl adhnrRv +.Op Fl adhnrRvwW .Ar name .Nm .Cm rebuild @@ -112,6 +112,21 @@ Without this option parity component is not used at all for reading operations when device is in complete state. With this option specified random I/O read operations are even 40% faster, but sequential reads are slower. +One cannot not use this options if +.Fl w +option is also specified. +.It Fl w +Use verify reading feature. +When reading from device in complete state, read data also from parity component +and verify the data by comparing XORed regular data with parity data. +If verification fails, +.Er EIO +error is returned and value of sysctl +.Va kern.geom.raid3.parity_mismatch +is increased. +One cannot not use this options if +.Fl r +option is also specified. .El .It Cm clear Clear metadata on the given providers. @@ -132,6 +147,10 @@ Turn off autosynchronization of stale components. Turn on round-robin reading. .It Fl R Turn off round-robin reading. +.It Fl w +Turn on verify reading. +.It Fl W +Turn off verify reading. .El .It Cm rebuild Rebuild the given component forcibly. diff --git a/sys/geom/raid3/g_raid3.c b/sys/geom/raid3/g_raid3.c index 5c60e05..262727b 100644 --- a/sys/geom/raid3/g_raid3.c +++ b/sys/geom/raid3/g_raid3.c @@ -80,6 +80,9 @@ SYSCTL_UINT(_kern_geom_raid3, OID_AUTO, n4k, CTLFLAG_RD, &g_raid3_n4k, 0, SYSCTL_NODE(_kern_geom_raid3, OID_AUTO, stat, CTLFLAG_RW, 0, "GEOM_RAID3 statistics"); +static u_int g_raid3_parity_mismatch = 0; +SYSCTL_UINT(_kern_geom_raid3_stat, OID_AUTO, parity_mismatch, CTLFLAG_RD, + &g_raid3_parity_mismatch, 0, "Number of failures in VERIFY mode"); static u_int g_raid3_64k_requested = 0; SYSCTL_UINT(_kern_geom_raid3_stat, OID_AUTO, 64k_requested, CTLFLAG_RD, &g_raid3_64k_requested, 0, "Number of requested 64kB allocations"); @@ -214,6 +217,24 @@ _g_raid3_xor(uint64_t *src1, uint64_t *src2, uint64_t *dst, size_t size) } } +static int +g_raid3_is_zero(struct bio *bp) +{ + static const uint64_t zeros[] = { + 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0 + }; + u_char *addr; + ssize_t size; + + size = bp->bio_length; + addr = (u_char *)bp->bio_data; + for (; size > 0; size -= sizeof(zeros), addr += sizeof(zeros)) { + if (bcmp(addr, zeros, sizeof(zeros)) != 0) + return (0); + } + return (1); +} + /* * --- Events handling functions --- * Events in geom_raid3 are used to maintain disks and device status @@ -727,6 +748,46 @@ g_raid3_init_bio(struct bio *pbp) } static void +g_raid3_remove_bio(struct bio *cbp) +{ + struct bio *pbp, *bp; + + pbp = cbp->bio_parent; + if (G_RAID3_HEAD_BIO(pbp) == cbp) + G_RAID3_HEAD_BIO(pbp) = G_RAID3_NEXT_BIO(cbp); + else { + G_RAID3_FOREACH_BIO(pbp, bp) { + if (G_RAID3_NEXT_BIO(bp) == cbp) { + G_RAID3_NEXT_BIO(bp) = G_RAID3_NEXT_BIO(cbp); + break; + } + } + } + G_RAID3_NEXT_BIO(cbp) = NULL; +} + +static void +g_raid3_replace_bio(struct bio *sbp, struct bio *dbp) +{ + struct bio *pbp, *bp; + + g_raid3_remove_bio(sbp); + pbp = dbp->bio_parent; + G_RAID3_NEXT_BIO(sbp) = G_RAID3_NEXT_BIO(dbp); + if (G_RAID3_HEAD_BIO(pbp) == dbp) + G_RAID3_HEAD_BIO(pbp) = sbp; + else { + G_RAID3_FOREACH_BIO(pbp, bp) { + if (G_RAID3_NEXT_BIO(bp) == dbp) { + G_RAID3_NEXT_BIO(bp) = sbp; + break; + } + } + } + G_RAID3_NEXT_BIO(dbp) = NULL; +} + +static void g_raid3_destroy_bio(struct g_raid3_softc *sc, struct bio *cbp) { struct bio *bp, *pbp; @@ -751,10 +812,12 @@ g_raid3_destroy_bio(struct g_raid3_softc *sc, struct bio *cbp) if (G_RAID3_NEXT_BIO(bp) == cbp) break; } - KASSERT(bp != NULL, ("NULL bp")); - KASSERT(G_RAID3_NEXT_BIO(bp) != NULL, ("NULL bp->bio_driver1")); - G_RAID3_NEXT_BIO(bp) = G_RAID3_NEXT_BIO(cbp); - G_RAID3_NEXT_BIO(cbp) = NULL; + if (bp != NULL) { + KASSERT(G_RAID3_NEXT_BIO(bp) != NULL, + ("NULL bp->bio_driver1")); + G_RAID3_NEXT_BIO(bp) = G_RAID3_NEXT_BIO(cbp); + G_RAID3_NEXT_BIO(cbp) = NULL; + } g_destroy_bio(cbp); } } @@ -928,7 +991,12 @@ g_raid3_gather(struct bio *pbp) } if (pbp->bio_error != 0) goto finish; - if (fbp != NULL) { + if (fbp != NULL && (pbp->bio_pflags & G_RAID3_BIO_PFLAG_VERIFY) != 0) { + pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_VERIFY; + if (xbp != fbp) + g_raid3_replace_bio(xbp, fbp); + g_raid3_destroy_bio(sc, fbp); + } else if (fbp != NULL) { struct g_consumer *cp; /* @@ -970,6 +1038,14 @@ g_raid3_gather(struct bio *pbp) xbp->bio_length); } xbp->bio_cflags &= ~G_RAID3_BIO_CFLAG_PARITY; + if ((pbp->bio_pflags & G_RAID3_BIO_PFLAG_VERIFY) != 0) { + if (!g_raid3_is_zero(xbp)) { + g_raid3_parity_mismatch++; + pbp->bio_error = EIO; + goto finish; + } + g_raid3_destroy_bio(sc, xbp); + } } atom = sc->sc_sectorsize / (sc->sc_ndisks - 1); cadd = padd = 0; @@ -986,7 +1062,7 @@ finish: G_RAID3_LOGREQ(3, pbp, "Request finished."); else G_RAID3_LOGREQ(0, pbp, "Request failed."); - pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_DEGRADED; + pbp->bio_pflags &= ~G_RAID3_BIO_PFLAG_MASK; g_io_deliver(pbp, pbp->bio_error); while ((cbp = G_RAID3_HEAD_BIO(pbp)) != NULL) g_raid3_destroy_bio(sc, cbp); @@ -1286,7 +1362,7 @@ g_raid3_register_request(struct bio *pbp) struct bio *cbp; off_t offset, length; u_int n, ndisks; - int round_robin; + int round_robin, verify; ndisks = 0; sc = pbp->bio_to->geom->softc; @@ -1298,9 +1374,26 @@ g_raid3_register_request(struct bio *pbp) g_raid3_init_bio(pbp); length = pbp->bio_length / (sc->sc_ndisks - 1); offset = pbp->bio_offset / (sc->sc_ndisks - 1); + round_robin = verify = 0; switch (pbp->bio_cmd) { case BIO_READ: - ndisks = sc->sc_ndisks - 1; + if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_VERIFY) != 0 && + sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) { + pbp->bio_pflags |= G_RAID3_BIO_PFLAG_VERIFY; + verify = 1; + ndisks = sc->sc_ndisks; + } else { + verify = 0; + ndisks = sc->sc_ndisks - 1; + } + if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_ROUND_ROBIN) != 0 && + sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE) { + round_robin = 1; + } else { + round_robin = 0; + } + KASSERT(!round_robin || !verify, + ("ROUND-ROBIN and VERIFY are mutually exclusive.")); pbp->bio_driver2 = &sc->sc_disks[sc->sc_ndisks - 1]; break; case BIO_WRITE: @@ -1308,12 +1401,6 @@ g_raid3_register_request(struct bio *pbp) ndisks = sc->sc_ndisks; break; } - if (sc->sc_state == G_RAID3_DEVICE_STATE_COMPLETE && - (sc->sc_flags & G_RAID3_DEVICE_FLAG_ROUND_ROBIN) != 0) { - round_robin = 1; - } else { - round_robin = 0; - } for (n = 0; n < ndisks; n++) { disk = &sc->sc_disks[n]; cbp = g_raid3_clone_bio(sc, pbp); @@ -1346,6 +1433,8 @@ g_raid3_register_request(struct bio *pbp) cbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY; sc->sc_round_robin++; round_robin = 0; + } else if (verify && disk->d_no == sc->sc_ndisks - 1) { + cbp->bio_cflags |= G_RAID3_BIO_CFLAG_PARITY; } break; case BIO_WRITE: @@ -2355,6 +2444,15 @@ g_raid3_check_metadata(struct g_raid3_softc *sc, struct g_provider *pp, pp->name, sc->sc_name); return (EINVAL); } + if ((md->md_mflags & G_RAID3_DEVICE_FLAG_VERIFY) != 0 && + (md->md_mflags & G_RAID3_DEVICE_FLAG_ROUND_ROBIN) != 0) { + /* + * VERIFY and ROUND-ROBIN options are mutally exclusive. + */ + G_RAID3_DEBUG(1, "Both VERIFY and ROUND-ROBIN flags exist on " + "disk %s (device %s), skipping.", pp->name, sc->sc_name); + return (EINVAL); + } if ((md->md_dflags & ~G_RAID3_DISK_FLAG_MASK) != 0) { G_RAID3_DEBUG(1, "Invalid disk flags on disk %s (device %s), skipping.", @@ -2764,6 +2862,7 @@ g_raid3_dumpconf(struct sbuf *sb, const char *indent, struct g_geom *gp, ADD_FLAG(G_RAID3_DEVICE_FLAG_NOAUTOSYNC, "NOAUTOSYNC"); ADD_FLAG(G_RAID3_DEVICE_FLAG_ROUND_ROBIN, "ROUND-ROBIN"); + ADD_FLAG(G_RAID3_DEVICE_FLAG_VERIFY, "VERIFY"); #undef ADD_FLAG } sbuf_printf(sb, "</Flags>\n"); diff --git a/sys/geom/raid3/g_raid3.h b/sys/geom/raid3/g_raid3.h index 2f40ddd..c936710 100644 --- a/sys/geom/raid3/g_raid3.h +++ b/sys/geom/raid3/g_raid3.h @@ -39,8 +39,9 @@ * Version history: * 0 - Initial version number. * 1 - Added 'round-robin reading' algorithm. + * 2 - Added 'verify reading' algorithm. */ -#define G_RAID3_VERSION 1 +#define G_RAID3_VERSION 2 #define G_RAID3_DISK_FLAG_DIRTY 0x0000000000000001ULL #define G_RAID3_DISK_FLAG_SYNCHRONIZING 0x0000000000000002ULL @@ -52,8 +53,10 @@ #define G_RAID3_DEVICE_FLAG_NOAUTOSYNC 0x0000000000000001ULL #define G_RAID3_DEVICE_FLAG_ROUND_ROBIN 0x0000000000000002ULL +#define G_RAID3_DEVICE_FLAG_VERIFY 0x0000000000000004ULL #define G_RAID3_DEVICE_FLAG_MASK (G_RAID3_DEVICE_FLAG_NOAUTOSYNC | \ - G_RAID3_DEVICE_FLAG_ROUND_ROBIN) + G_RAID3_DEVICE_FLAG_ROUND_ROBIN | \ + G_RAID3_DEVICE_FLAG_VERIFY) #ifdef _KERNEL extern u_int g_raid3_debug; @@ -88,9 +91,18 @@ extern u_int g_raid3_debug; #define G_RAID3_BIO_CFLAG_PARITY 0x04 #define G_RAID3_BIO_CFLAG_NODISK 0x08 #define G_RAID3_BIO_CFLAG_REGSYNC 0x10 +#define G_RAID3_BIO_CFLAG_MASK (G_RAID3_BIO_CFLAG_REGULAR | \ + G_RAID3_BIO_CFLAG_SYNC | \ + G_RAID3_BIO_CFLAG_PARITY | \ + G_RAID3_BIO_CFLAG_NODISK | \ + G_RAID3_BIO_CFLAG_REGSYNC) #define G_RAID3_BIO_PFLAG_DEGRADED 0x01 #define G_RAID3_BIO_PFLAG_NOPARITY 0x02 +#define G_RAID3_BIO_PFLAG_VERIFY 0x04 +#define G_RAID3_BIO_PFLAG_MASK (G_RAID3_BIO_PFLAG_DEGRADED | \ + G_RAID3_BIO_PFLAG_NOPARITY | \ + G_RAID3_BIO_PFLAG_VERIFY) /* * Informations needed for synchronization. @@ -291,6 +303,8 @@ raid3_metadata_dump(const struct g_raid3_metadata *md) printf(" NOAUTOSYNC"); if ((md->md_mflags & G_RAID3_DEVICE_FLAG_ROUND_ROBIN) != 0) printf(" ROUND-ROBIN"); + if ((md->md_mflags & G_RAID3_DEVICE_FLAG_VERIFY) != 0) + printf(" VERIFY"); } printf("\n"); printf(" dflags:"); diff --git a/sys/geom/raid3/g_raid3_ctl.c b/sys/geom/raid3/g_raid3_ctl.c index 7692ce8..d4a20c0 100644 --- a/sys/geom/raid3/g_raid3_ctl.c +++ b/sys/geom/raid3/g_raid3_ctl.c @@ -94,7 +94,9 @@ g_raid3_ctl_configure(struct gctl_req *req, struct g_class *mp) struct g_raid3_disk *disk; const char *name; int *nargs, do_sync = 0; - int *autosync, *noautosync, *round_robin, *noround_robin; + int *autosync, *noautosync; + int *round_robin, *noround_robin; + int *verify, *noverify; u_int n; g_topology_assert(); @@ -144,7 +146,23 @@ g_raid3_ctl_configure(struct gctl_req *req, struct g_class *mp) "noround_robin"); return; } - if (!*autosync && !*noautosync && !*round_robin && !*noround_robin) { + verify = gctl_get_paraml(req, "verify", sizeof(*verify)); + if (verify == NULL) { + gctl_error(req, "No '%s' argument.", "verify"); + return; + } + noverify = gctl_get_paraml(req, "noverify", sizeof(*noverify)); + if (noverify == NULL) { + gctl_error(req, "No '%s' argument.", "noverify"); + return; + } + if (*verify && *noverify) { + gctl_error(req, "'%s' and '%s' specified.", "verify", + "noverify"); + return; + } + if (!*autosync && !*noautosync && !*round_robin && !*noround_robin && + !*verify && !*noverify) { gctl_error(req, "Nothing has changed."); return; } @@ -157,6 +175,13 @@ g_raid3_ctl_configure(struct gctl_req *req, struct g_class *mp) if (*noautosync) sc->sc_flags |= G_RAID3_DEVICE_FLAG_NOAUTOSYNC; } + if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_VERIFY) != 0) { + if (*noverify) + sc->sc_flags &= ~G_RAID3_DEVICE_FLAG_VERIFY; + } else { + if (*verify) + sc->sc_flags |= G_RAID3_DEVICE_FLAG_VERIFY; + } if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_ROUND_ROBIN) != 0) { if (*noround_robin) sc->sc_flags &= ~G_RAID3_DEVICE_FLAG_ROUND_ROBIN; @@ -164,6 +189,13 @@ g_raid3_ctl_configure(struct gctl_req *req, struct g_class *mp) if (*round_robin) sc->sc_flags |= G_RAID3_DEVICE_FLAG_ROUND_ROBIN; } + if ((sc->sc_flags & G_RAID3_DEVICE_FLAG_VERIFY) != 0 && + (sc->sc_flags & G_RAID3_DEVICE_FLAG_ROUND_ROBIN) != 0) { + /* + * VERIFY and ROUND-ROBIN options are mutally exclusive. + */ + sc->sc_flags &= ~G_RAID3_DEVICE_FLAG_ROUND_ROBIN; + } for (n = 0; n < sc->sc_ndisks; n++) { disk = &sc->sc_disks[n]; if (do_sync) { |