diff options
author | Tejun Heo <tj@kernel.org> | 2010-09-03 11:56:18 +0200 |
---|---|---|
committer | Jens Axboe <jaxboe@fusionio.com> | 2010-09-10 12:35:38 +0200 |
commit | e9c7469bb4f502dafc092166201bea1ad5fc0fbf (patch) | |
tree | 04202b0bb88623d3005c909eaafcb280778902da /drivers/md/raid5.c | |
parent | 7bc9fddab074d6bb630344e1969e28d20b140621 (diff) | |
download | op-kernel-dev-e9c7469bb4f502dafc092166201bea1ad5fc0fbf.zip op-kernel-dev-e9c7469bb4f502dafc092166201bea1ad5fc0fbf.tar.gz |
md: implment REQ_FLUSH/FUA support
This patch converts md to support REQ_FLUSH/FUA instead of now
deprecated REQ_HARDBARRIER. In the core part (md.c), the following
changes are notable.
* Unlike REQ_HARDBARRIER, REQ_FLUSH/FUA don't interfere with
processing of other requests and thus there is no reason to mark the
queue congested while FLUSH/FUA is in progress.
* REQ_FLUSH/FUA failures are final and its users don't need retry
logic. Retry logic is removed.
* Preflush needs to be issued to all member devices but FUA writes can
be handled the same way as other writes - their processing can be
deferred to request_queue of member devices. md_barrier_request()
is renamed to md_flush_request() and simplified accordingly.
For linear, raid0 and multipath, the core changes are enough. raid1,
5 and 10 need the following conversions.
* raid1: Handling of FLUSH/FUA bio's can simply be deferred to
request_queues of member devices. Barrier related logic removed.
* raid5: Queue draining logic dropped. FUA bit is propagated through
biodrain and stripe resconstruction such that all the updated parts
of the stripe are written out with FUA writes if any of the dirtying
writes was FUA. preread_active_stripes handling in make_request()
is updated as suggested by Neil Brown.
* raid10: FUA bit needs to be propagated to write clones.
linear, raid0, 1, 5 and 10 tested.
Signed-off-by: Tejun Heo <tj@kernel.org>
Reviewed-by: Neil Brown <neilb@suse.de>
Signed-off-by: Jens Axboe <jaxboe@fusionio.com>
Diffstat (limited to 'drivers/md/raid5.c')
-rw-r--r-- | drivers/md/raid5.c | 43 |
1 files changed, 21 insertions, 22 deletions
diff --git a/drivers/md/raid5.c b/drivers/md/raid5.c index 69b0a16..31140d1 100644 --- a/drivers/md/raid5.c +++ b/drivers/md/raid5.c @@ -506,9 +506,12 @@ static void ops_run_io(struct stripe_head *sh, struct stripe_head_state *s) int rw; struct bio *bi; mdk_rdev_t *rdev; - if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) - rw = WRITE; - else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags)) + if (test_and_clear_bit(R5_Wantwrite, &sh->dev[i].flags)) { + if (test_and_clear_bit(R5_WantFUA, &sh->dev[i].flags)) + rw = WRITE_FUA; + else + rw = WRITE; + } else if (test_and_clear_bit(R5_Wantread, &sh->dev[i].flags)) rw = READ; else continue; @@ -1031,6 +1034,8 @@ ops_run_biodrain(struct stripe_head *sh, struct dma_async_tx_descriptor *tx) while (wbi && wbi->bi_sector < dev->sector + STRIPE_SECTORS) { + if (wbi->bi_rw & REQ_FUA) + set_bit(R5_WantFUA, &dev->flags); tx = async_copy_data(1, wbi, dev->page, dev->sector, tx); wbi = r5_next_bio(wbi, dev->sector); @@ -1048,15 +1053,22 @@ static void ops_complete_reconstruct(void *stripe_head_ref) int pd_idx = sh->pd_idx; int qd_idx = sh->qd_idx; int i; + bool fua = false; pr_debug("%s: stripe %llu\n", __func__, (unsigned long long)sh->sector); + for (i = disks; i--; ) + fua |= test_bit(R5_WantFUA, &sh->dev[i].flags); + for (i = disks; i--; ) { struct r5dev *dev = &sh->dev[i]; - if (dev->written || i == pd_idx || i == qd_idx) + if (dev->written || i == pd_idx || i == qd_idx) { set_bit(R5_UPTODATE, &dev->flags); + if (fua) + set_bit(R5_WantFUA, &dev->flags); + } } if (sh->reconstruct_state == reconstruct_state_drain_run) @@ -3281,7 +3293,7 @@ static void handle_stripe5(struct stripe_head *sh) if (dec_preread_active) { /* We delay this until after ops_run_io so that if make_request - * is waiting on a barrier, it won't continue until the writes + * is waiting on a flush, it won't continue until the writes * have actually been submitted. */ atomic_dec(&conf->preread_active_stripes); @@ -3583,7 +3595,7 @@ static void handle_stripe6(struct stripe_head *sh) if (dec_preread_active) { /* We delay this until after ops_run_io so that if make_request - * is waiting on a barrier, it won't continue until the writes + * is waiting on a flush, it won't continue until the writes * have actually been submitted. */ atomic_dec(&conf->preread_active_stripes); @@ -3978,14 +3990,8 @@ static int make_request(mddev_t *mddev, struct bio * bi) const int rw = bio_data_dir(bi); int remaining; - if (unlikely(bi->bi_rw & REQ_HARDBARRIER)) { - /* Drain all pending writes. We only really need - * to ensure they have been submitted, but this is - * easier. - */ - mddev->pers->quiesce(mddev, 1); - mddev->pers->quiesce(mddev, 0); - md_barrier_request(mddev, bi); + if (unlikely(bi->bi_rw & REQ_FLUSH)) { + md_flush_request(mddev, bi); return 0; } @@ -4103,7 +4109,7 @@ static int make_request(mddev_t *mddev, struct bio * bi) finish_wait(&conf->wait_for_overlap, &w); set_bit(STRIPE_HANDLE, &sh->state); clear_bit(STRIPE_DELAYED, &sh->state); - if (mddev->barrier && + if ((bi->bi_rw & REQ_SYNC) && !test_and_set_bit(STRIPE_PREREAD_ACTIVE, &sh->state)) atomic_inc(&conf->preread_active_stripes); release_stripe(sh); @@ -4126,13 +4132,6 @@ static int make_request(mddev_t *mddev, struct bio * bi) bio_endio(bi, 0); } - if (mddev->barrier) { - /* We need to wait for the stripes to all be handled. - * So: wait for preread_active_stripes to drop to 0. - */ - wait_event(mddev->thread->wqueue, - atomic_read(&conf->preread_active_stripes) == 0); - } return 0; } |