summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--drivers/md/dm-raid1.c101
1 files changed, 90 insertions, 11 deletions
diff --git a/drivers/md/dm-raid1.c b/drivers/md/dm-raid1.c
index 9978b9f..ec6d675 100644
--- a/drivers/md/dm-raid1.c
+++ b/drivers/md/dm-raid1.c
@@ -146,6 +146,7 @@ struct mirror_set {
region_t nr_regions;
int in_sync;
int log_failure;
+ atomic_t suspend;
atomic_t default_mirror; /* Default mirror */
@@ -372,6 +373,16 @@ static void complete_resync_work(struct region *reg, int success)
struct region_hash *rh = reg->rh;
rh->log->type->set_region_sync(rh->log, reg->key, success);
+
+ /*
+ * Dispatch the bios before we call 'wake_up_all'.
+ * This is important because if we are suspending,
+ * we want to know that recovery is complete and
+ * the work queue is flushed. If we wake_up_all
+ * before we dispatch_bios (queue bios and call wake()),
+ * then we risk suspending before the work queue
+ * has been properly flushed.
+ */
dispatch_bios(rh->ms, &reg->delayed_bios);
if (atomic_dec_and_test(&rh->recovery_in_flight))
wake_up_all(&_kmirrord_recovery_stopped);
@@ -1069,11 +1080,13 @@ static void do_writes(struct mirror_set *ms, struct bio_list *writes)
/*
* Dispatch io.
*/
- if (unlikely(ms->log_failure))
+ if (unlikely(ms->log_failure)) {
+ spin_lock_irq(&ms->lock);
+ bio_list_merge(&ms->failures, &sync);
+ spin_unlock_irq(&ms->lock);
+ } else
while ((bio = bio_list_pop(&sync)))
- bio_endio(bio, -EIO);
- else while ((bio = bio_list_pop(&sync)))
- do_write(ms, bio);
+ do_write(ms, bio);
while ((bio = bio_list_pop(&recover)))
rh_delay(&ms->rh, bio);
@@ -1091,8 +1104,46 @@ static void do_failures(struct mirror_set *ms, struct bio_list *failures)
if (!failures->head)
return;
- while ((bio = bio_list_pop(failures)))
- __bio_mark_nosync(ms, bio, bio->bi_size, 0);
+ if (!ms->log_failure) {
+ while ((bio = bio_list_pop(failures)))
+ __bio_mark_nosync(ms, bio, bio->bi_size, 0);
+ return;
+ }
+
+ /*
+ * If the log has failed, unattempted writes are being
+ * put on the failures list. We can't issue those writes
+ * until a log has been marked, so we must store them.
+ *
+ * If a 'noflush' suspend is in progress, we can requeue
+ * the I/O's to the core. This give userspace a chance
+ * to reconfigure the mirror, at which point the core
+ * will reissue the writes. If the 'noflush' flag is
+ * not set, we have no choice but to return errors.
+ *
+ * Some writes on the failures list may have been
+ * submitted before the log failure and represent a
+ * failure to write to one of the devices. It is ok
+ * for us to treat them the same and requeue them
+ * as well.
+ */
+ if (dm_noflush_suspending(ms->ti)) {
+ while ((bio = bio_list_pop(failures)))
+ bio_endio(bio, DM_ENDIO_REQUEUE);
+ return;
+ }
+
+ if (atomic_read(&ms->suspend)) {
+ while ((bio = bio_list_pop(failures)))
+ bio_endio(bio, -EIO);
+ return;
+ }
+
+ spin_lock_irq(&ms->lock);
+ bio_list_merge(&ms->failures, failures);
+ spin_unlock_irq(&ms->lock);
+
+ wake(ms);
}
static void trigger_event(struct work_struct *work)
@@ -1176,6 +1227,8 @@ static struct mirror_set *alloc_context(unsigned int nr_mirrors,
ms->nr_mirrors = nr_mirrors;
ms->nr_regions = dm_sector_div_up(ti->len, region_size);
ms->in_sync = 0;
+ ms->log_failure = 0;
+ atomic_set(&ms->suspend, 0);
atomic_set(&ms->default_mirror, DEFAULT_MIRROR);
ms->io_client = dm_io_client_create(DM_IO_PAGES);
@@ -1511,26 +1564,51 @@ static int mirror_end_io(struct dm_target *ti, struct bio *bio,
return 0;
}
-static void mirror_postsuspend(struct dm_target *ti)
+static void mirror_presuspend(struct dm_target *ti)
{
struct mirror_set *ms = (struct mirror_set *) ti->private;
struct dirty_log *log = ms->rh.log;
+ atomic_set(&ms->suspend, 1);
+
+ /*
+ * We must finish up all the work that we've
+ * generated (i.e. recovery work).
+ */
rh_stop_recovery(&ms->rh);
- /* Wait for all I/O we generated to complete */
wait_event(_kmirrord_recovery_stopped,
!atomic_read(&ms->rh.recovery_in_flight));
+ if (log->type->presuspend && log->type->presuspend(log))
+ /* FIXME: need better error handling */
+ DMWARN("log presuspend failed");
+
+ /*
+ * Now that recovery is complete/stopped and the
+ * delayed bios are queued, we need to wait for
+ * the worker thread to complete. This way,
+ * we know that all of our I/O has been pushed.
+ */
+ flush_workqueue(ms->kmirrord_wq);
+}
+
+static void mirror_postsuspend(struct dm_target *ti)
+{
+ struct mirror_set *ms = ti->private;
+ struct dirty_log *log = ms->rh.log;
+
if (log->type->postsuspend && log->type->postsuspend(log))
/* FIXME: need better error handling */
- DMWARN("log suspend failed");
+ DMWARN("log postsuspend failed");
}
static void mirror_resume(struct dm_target *ti)
{
- struct mirror_set *ms = (struct mirror_set *) ti->private;
+ struct mirror_set *ms = ti->private;
struct dirty_log *log = ms->rh.log;
+
+ atomic_set(&ms->suspend, 0);
if (log->type->resume && log->type->resume(log))
/* FIXME: need better error handling */
DMWARN("log resume failed");
@@ -1564,7 +1642,7 @@ static int mirror_status(struct dm_target *ti, status_type_t type,
DMEMIT("%d", ms->nr_mirrors);
for (m = 0; m < ms->nr_mirrors; m++)
DMEMIT(" %s %llu", ms->mirror[m].dev->name,
- (unsigned long long)ms->mirror[m].offset);
+ (unsigned long long)ms->mirror[m].offset);
if (ms->features & DM_RAID1_HANDLE_ERRORS)
DMEMIT(" 1 handle_errors");
@@ -1581,6 +1659,7 @@ static struct target_type mirror_target = {
.dtr = mirror_dtr,
.map = mirror_map,
.end_io = mirror_end_io,
+ .presuspend = mirror_presuspend,
.postsuspend = mirror_postsuspend,
.resume = mirror_resume,
.status = mirror_status,
OpenPOWER on IntegriCloud