diff options
author | NeilBrown <neilb@suse.com> | 2016-11-18 16:16:11 +1100 |
---|---|---|
committer | Shaohua Li <shli@fb.com> | 2016-11-22 08:58:17 -0800 |
commit | 688834e6ae6b21e3d98b5cf2586aa4a9b515c3a0 (patch) | |
tree | 508a052e94aa8483545417395b5261b86c706587 /drivers/md/md.c | |
parent | 3bddb7f8f264ec58dc86e11ca97341c24f9d38f6 (diff) | |
download | op-kernel-dev-688834e6ae6b21e3d98b5cf2586aa4a9b515c3a0.zip op-kernel-dev-688834e6ae6b21e3d98b5cf2586aa4a9b515c3a0.tar.gz |
md/failfast: add failfast flag for md to be used by some personalities.
This patch just adds a 'failfast' per-device flag which can be stored
in v0.90 or v1.x metadata.
The flag is not used yet but the intent is that it can be used for
mirrored (raid1/raid10) arrays where low latency is more important
than keeping all devices on-line.
Setting the flag for a device effectively gives permission for that
device to be marked as Faulty and excluded from the array on the first
error. The underlying driver will be directed not to retry requests
that result in failures. There is a proviso that the device must not
be marked faulty if that would cause the array as a whole to fail, it
may only be marked Faulty if the array remains functional, but is
degraded.
Failures on read requests will cause the device to be marked
as Faulty immediately so that further reads will avoid that
device. No attempt will be made to correct read errors by
over-writing with the correct data.
It is expected that if transient errors, such as cable unplug, are
possible, then something in user-space will revalidate failed
devices and re-add them when they appear to be working again.
Signed-off-by: NeilBrown <neilb@suse.com>
Signed-off-by: Shaohua Li <shli@fb.com>
Diffstat (limited to 'drivers/md/md.c')
-rw-r--r-- | drivers/md/md.c | 27 |
1 files changed, 27 insertions, 0 deletions
diff --git a/drivers/md/md.c b/drivers/md/md.c index d3cef77..2cf0e89 100644 --- a/drivers/md/md.c +++ b/drivers/md/md.c @@ -1164,6 +1164,8 @@ static int super_90_validate(struct mddev *mddev, struct md_rdev *rdev) } if (desc->state & (1<<MD_DISK_WRITEMOSTLY)) set_bit(WriteMostly, &rdev->flags); + if (desc->state & (1<<MD_DISK_FAILFAST)) + set_bit(FailFast, &rdev->flags); } else /* MULTIPATH are always insync */ set_bit(In_sync, &rdev->flags); return 0; @@ -1289,6 +1291,8 @@ static void super_90_sync(struct mddev *mddev, struct md_rdev *rdev) } if (test_bit(WriteMostly, &rdev2->flags)) d->state |= (1<<MD_DISK_WRITEMOSTLY); + if (test_bit(FailFast, &rdev2->flags)) + d->state |= (1<<MD_DISK_FAILFAST); } /* now set the "removed" and "faulty" bits on any missing devices */ for (i=0 ; i < mddev->raid_disks ; i++) { @@ -1673,6 +1677,8 @@ static int super_1_validate(struct mddev *mddev, struct md_rdev *rdev) } if (sb->devflags & WriteMostly1) set_bit(WriteMostly, &rdev->flags); + if (sb->devflags & FailFast1) + set_bit(FailFast, &rdev->flags); if (le32_to_cpu(sb->feature_map) & MD_FEATURE_REPLACEMENT) set_bit(Replacement, &rdev->flags); } else /* MULTIPATH are always insync */ @@ -1711,6 +1717,10 @@ static void super_1_sync(struct mddev *mddev, struct md_rdev *rdev) sb->chunksize = cpu_to_le32(mddev->chunk_sectors); sb->level = cpu_to_le32(mddev->level); sb->layout = cpu_to_le32(mddev->layout); + if (test_bit(FailFast, &rdev->flags)) + sb->devflags |= FailFast1; + else + sb->devflags &= ~FailFast1; if (test_bit(WriteMostly, &rdev->flags)) sb->devflags |= WriteMostly1; @@ -2557,6 +2567,8 @@ state_show(struct md_rdev *rdev, char *page) len += sprintf(page+len, "replacement%s", sep); if (test_bit(ExternalBbl, &flags)) len += sprintf(page+len, "external_bbl%s", sep); + if (test_bit(FailFast, &flags)) + len += sprintf(page+len, "failfast%s", sep); if (len) len -= strlen(sep); @@ -2579,6 +2591,7 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) * so that it gets rebuilt based on bitmap * write_error - sets WriteErrorSeen * -write_error - clears WriteErrorSeen + * {,-}failfast - set/clear FailFast */ int err = -EINVAL; if (cmd_match(buf, "faulty") && rdev->mddev->pers) { @@ -2637,6 +2650,12 @@ state_store(struct md_rdev *rdev, const char *buf, size_t len) } else if (cmd_match(buf, "insync") && rdev->raid_disk == -1) { set_bit(In_sync, &rdev->flags); err = 0; + } else if (cmd_match(buf, "failfast")) { + set_bit(FailFast, &rdev->flags); + err = 0; + } else if (cmd_match(buf, "-failfast")) { + clear_bit(FailFast, &rdev->flags); + err = 0; } else if (cmd_match(buf, "-insync") && rdev->raid_disk >= 0 && !test_bit(Journal, &rdev->flags)) { if (rdev->mddev->pers == NULL) { @@ -5942,6 +5961,8 @@ static int get_disk_info(struct mddev *mddev, void __user * arg) info.state |= (1<<MD_DISK_JOURNAL); if (test_bit(WriteMostly, &rdev->flags)) info.state |= (1<<MD_DISK_WRITEMOSTLY); + if (test_bit(FailFast, &rdev->flags)) + info.state |= (1<<MD_DISK_FAILFAST); } else { info.major = info.minor = 0; info.raid_disk = -1; @@ -6049,6 +6070,10 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info) set_bit(WriteMostly, &rdev->flags); else clear_bit(WriteMostly, &rdev->flags); + if (info->state & (1<<MD_DISK_FAILFAST)) + set_bit(FailFast, &rdev->flags); + else + clear_bit(FailFast, &rdev->flags); if (info->state & (1<<MD_DISK_JOURNAL)) { struct md_rdev *rdev2; @@ -6138,6 +6163,8 @@ static int add_new_disk(struct mddev *mddev, mdu_disk_info_t *info) if (info->state & (1<<MD_DISK_WRITEMOSTLY)) set_bit(WriteMostly, &rdev->flags); + if (info->state & (1<<MD_DISK_FAILFAST)) + set_bit(FailFast, &rdev->flags); if (!mddev->persistent) { pr_debug("md: nonpersistent superblock ...\n"); |