diff options
author | mav <mav@FreeBSD.org> | 2012-05-11 13:20:17 +0000 |
---|---|---|
committer | mav <mav@FreeBSD.org> | 2012-05-11 13:20:17 +0000 |
commit | 7e5e00e55fff2185e28629da87dec0d8a84aebe0 (patch) | |
tree | e98c36747e1b79bee243138c1315e78339da990c /sys/geom/raid | |
parent | 60e2e3c08cd431d8e161da65e2b224909783f748 (diff) | |
download | FreeBSD-src-7e5e00e55fff2185e28629da87dec0d8a84aebe0.zip FreeBSD-src-7e5e00e55fff2185e28629da87dec0d8a84aebe0.tar.gz |
- Prevent error status leak if write to some of the RAID1/1E volume disks
failed while write to some other succeeded. Instead mark disk as failed.
- Make RAID1E less aggressive in failing disks to avoid volume breakage.
MFC after: 2 weeks
Diffstat (limited to 'sys/geom/raid')
-rw-r--r-- | sys/geom/raid/tr_raid1.c | 11 | ||||
-rw-r--r-- | sys/geom/raid/tr_raid1e.c | 22 |
2 files changed, 29 insertions, 4 deletions
diff --git a/sys/geom/raid/tr_raid1.c b/sys/geom/raid/tr_raid1.c index ccd3157..02527f2 100644 --- a/sys/geom/raid/tr_raid1.c +++ b/sys/geom/raid/tr_raid1.c @@ -894,7 +894,16 @@ rebuild_round_done: g_raid_unlock_range(sd->sd_volume, bp->bio_offset, bp->bio_length); } - error = bp->bio_error; + if (pbp->bio_cmd != BIO_READ) { + if (pbp->bio_inbed == 1 || pbp->bio_error != 0) + pbp->bio_error = bp->bio_error; + if (bp->bio_error != 0) { + G_RAID_LOGREQ(0, bp, "Write failed: failing subdisk."); + g_raid_tr_raid1_fail_disk(sd->sd_softc, sd, sd->sd_disk); + } + error = pbp->bio_error; + } else + error = bp->bio_error; g_destroy_bio(bp); if (pbp->bio_children == pbp->bio_inbed) { pbp->bio_completed = pbp->bio_length; diff --git a/sys/geom/raid/tr_raid1e.c b/sys/geom/raid/tr_raid1e.c index 141ff7a..981e205 100644 --- a/sys/geom/raid/tr_raid1e.c +++ b/sys/geom/raid/tr_raid1e.c @@ -338,6 +338,9 @@ static void g_raid_tr_raid1e_fail_disk(struct g_raid_softc *sc, struct g_raid_subdisk *sd, struct g_raid_disk *disk) { + struct g_raid_volume *vol; + + vol = sd->sd_volume; /* * We don't fail the last disk in the pack, since it still has decent * data on it and that's better than failing the disk if it is the root @@ -347,8 +350,12 @@ g_raid_tr_raid1e_fail_disk(struct g_raid_softc *sc, struct g_raid_subdisk *sd, * the volume that has / on it. I can't think of a case where we'd * want the volume to go away on this kind of event. */ - if (g_raid_nsubdisks(sd->sd_volume, G_RAID_SUBDISK_S_ACTIVE) == 1 && - g_raid_get_subdisk(sd->sd_volume, G_RAID_SUBDISK_S_ACTIVE) == sd) + if ((g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_ACTIVE) + + g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_RESYNC) + + g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_STALE) + + g_raid_nsubdisks(vol, G_RAID_SUBDISK_S_UNINITIALIZED) < + vol->v_disks_count) && + (sd->sd_state >= G_RAID_SUBDISK_S_UNINITIALIZED)) return; g_raid_fail_disk(sc, sd, disk); } @@ -1113,7 +1120,16 @@ rebuild_round_done: G_RAID_LOGREQ(2, bp, "REMAP done %d.", bp->bio_error); g_raid_unlock_range(sd->sd_volume, virtual, bp->bio_length); } - error = bp->bio_error; + if (pbp->bio_cmd != BIO_READ) { + if (pbp->bio_inbed == 1 || pbp->bio_error != 0) + pbp->bio_error = bp->bio_error; + if (bp->bio_error != 0) { + G_RAID_LOGREQ(0, bp, "Write failed: failing subdisk."); + g_raid_tr_raid1e_fail_disk(sd->sd_softc, sd, sd->sd_disk); + } + error = pbp->bio_error; + } else + error = bp->bio_error; g_destroy_bio(bp); if (pbp->bio_children == pbp->bio_inbed) { pbp->bio_completed = pbp->bio_length; |