From 9c47008d13add50ec4597a8b9eee200c515282c8 Mon Sep 17 00:00:00 2001 From: "Martin K. Petersen" Date: Thu, 9 Apr 2009 00:27:12 +0100 Subject: dm: add integrity support This patch provides support for data integrity passthrough in the device mapper. - If one or more component devices support integrity an integrity profile is preallocated for the DM device. - If all component devices have compatible profiles the DM device is flagged as capable. - Handle integrity metadata when splitting and cloning bios. Signed-off-by: Martin K. Petersen Signed-off-by: Alasdair G Kergon --- drivers/md/dm.c | 15 +++++++++++++++ 1 file changed, 15 insertions(+) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 788ba96..25d86e2 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -700,6 +700,12 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector, clone->bi_io_vec->bv_len = clone->bi_size; clone->bi_flags |= 1 << BIO_CLONED; + if (bio_integrity(bio)) { + bio_integrity_clone(clone, bio, GFP_NOIO); + bio_integrity_trim(clone, + bio_sector_offset(bio, idx, offset), len); + } + return clone; } @@ -721,6 +727,14 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector, clone->bi_size = to_bytes(len); clone->bi_flags &= ~(1 << BIO_SEG_VALID); + if (bio_integrity(bio)) { + bio_integrity_clone(clone, bio, GFP_NOIO); + + if (idx != bio->bi_idx || clone->bi_size < bio->bi_size) + bio_integrity_trim(clone, + bio_sector_offset(bio, idx, 0), len); + } + return clone; } @@ -1193,6 +1207,7 @@ static void free_dev(struct mapped_device *md) mempool_destroy(md->tio_pool); mempool_destroy(md->io_pool); bioset_free(md->bs); + blk_integrity_unregister(md->disk); del_gendisk(md->disk); free_minor(minor); -- cgit v1.1 From 692d0eb9e02cf81fb387ff891f53840db2f3110a Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 9 Apr 2009 00:27:13 +0100 Subject: dm: remove limited barrier support Prepare for full barrier implementation: first remove the restricted support. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm.c | 15 ++++++++++----- 1 file changed, 10 insertions(+), 5 deletions(-) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 25d86e2..ab3b5d8 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -851,11 +851,7 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) bio_io_error(bio); return; } - if (unlikely(bio_barrier(bio) && !dm_table_barrier_ok(ci.map))) { - dm_table_put(ci.map); - bio_endio(bio, -EOPNOTSUPP); - return; - } + ci.md = md; ci.bio = bio; ci.io = alloc_io(md); @@ -937,6 +933,15 @@ static int dm_request(struct request_queue *q, struct bio *bio) struct mapped_device *md = q->queuedata; int cpu; + /* + * There is no use in forwarding any barrier request since we can't + * guarantee it is (or can be) handled by the targets correctly. + */ + if (unlikely(bio_barrier(bio))) { + bio_endio(bio, -EOPNOTSUPP); + return 0; + } + down_read(&md->io_lock); cpu = part_stat_lock(); -- cgit v1.1 From df12ee996378a5917e9555169fe278ecca0612d4 Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Thu, 9 Apr 2009 00:27:13 +0100 Subject: dm: rearrange dm_wq_work Refactor dm_wq_work() to make later patch more readable. Signed-off-by: Alasdair G Kergon --- drivers/md/dm.c | 17 +++++++++-------- 1 file changed, 9 insertions(+), 8 deletions(-) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index ab3b5d8..020a9e1 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1437,18 +1437,19 @@ static void dm_wq_work(struct work_struct *work) down_write(&md->io_lock); -next_bio: - spin_lock_irq(&md->deferred_lock); - c = bio_list_pop(&md->deferred); - spin_unlock_irq(&md->deferred_lock); + while (1) { + spin_lock_irq(&md->deferred_lock); + c = bio_list_pop(&md->deferred); + spin_unlock_irq(&md->deferred_lock); + + if (!c) { + clear_bit(DMF_BLOCK_IO, &md->flags); + break; + } - if (c) { __split_and_process_bio(md, c); - goto next_bio; } - clear_bit(DMF_BLOCK_IO, &md->flags); - up_write(&md->io_lock); } -- cgit v1.1 From 1eb787ec183d1267cac95aae632e92dee05ed50a Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Thu, 9 Apr 2009 00:27:14 +0100 Subject: dm: split DMF_BLOCK_IO flag into two Split the DMF_BLOCK_IO flag into two. DMF_BLOCK_IO_FOR_SUSPEND is set when I/O must be blocked while suspending a device. DMF_QUEUE_IO_TO_THREAD is set when I/O must be queued to a worker thread for later processing. Signed-off-by: Alasdair G Kergon --- drivers/md/dm.c | 22 +++++++++++++--------- 1 file changed, 13 insertions(+), 9 deletions(-) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 020a9e1..7cac722 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -89,12 +89,13 @@ union map_info *dm_get_mapinfo(struct bio *bio) /* * Bits for the md->flags field. */ -#define DMF_BLOCK_IO 0 +#define DMF_BLOCK_IO_FOR_SUSPEND 0 #define DMF_SUSPENDED 1 #define DMF_FROZEN 2 #define DMF_FREEING 3 #define DMF_DELETING 4 #define DMF_NOFLUSH_SUSPENDING 5 +#define DMF_QUEUE_IO_TO_THREAD 6 /* * Work processed by per-device workqueue. @@ -439,7 +440,7 @@ static int queue_io(struct mapped_device *md, struct bio *bio) { down_write(&md->io_lock); - if (!test_bit(DMF_BLOCK_IO, &md->flags)) { + if (!test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) { up_write(&md->io_lock); return 1; } @@ -950,10 +951,10 @@ static int dm_request(struct request_queue *q, struct bio *bio) part_stat_unlock(); /* - * If we're suspended we have to queue - * this io for later. + * If we're suspended or the thread is processing barriers + * we have to queue this io for later. */ - while (test_bit(DMF_BLOCK_IO, &md->flags)) { + while (test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) { up_read(&md->io_lock); if (bio_rw(bio) != READA) @@ -997,7 +998,7 @@ static int dm_any_congested(void *congested_data, int bdi_bits) struct mapped_device *md = congested_data; struct dm_table *map; - if (!test_bit(DMF_BLOCK_IO, &md->flags)) { + if (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { map = dm_get_table(md); if (map) { r = dm_table_any_congested(map, bdi_bits); @@ -1443,7 +1444,8 @@ static void dm_wq_work(struct work_struct *work) spin_unlock_irq(&md->deferred_lock); if (!c) { - clear_bit(DMF_BLOCK_IO, &md->flags); + clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); + clear_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags); break; } @@ -1574,10 +1576,12 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) } /* - * First we set the BLOCK_IO flag so no more ios will be mapped. + * First we set the DMF_QUEUE_IO_TO_THREAD flag so no more ios + * will be mapped. */ down_write(&md->io_lock); - set_bit(DMF_BLOCK_IO, &md->flags); + set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); + set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags); up_write(&md->io_lock); -- cgit v1.1 From 54d9a1b4513b96cbd835ca6866c6a604d194b2ae Mon Sep 17 00:00:00 2001 From: Alasdair G Kergon Date: Thu, 9 Apr 2009 00:27:14 +0100 Subject: dm: simplify dm_request loop Refactor the code in dm_request(). Require the new DMF_BLOCK_FOR_SUSPEND flag on readahead bios we will discard so we don't drop such bios while processing a barrier. Signed-off-by: Alasdair G Kergon --- drivers/md/dm.c | 18 +++++++----------- 1 file changed, 7 insertions(+), 11 deletions(-) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 7cac722..bb97ec8 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -929,7 +929,6 @@ out: */ static int dm_request(struct request_queue *q, struct bio *bio) { - int r = -EIO; int rw = bio_data_dir(bio); struct mapped_device *md = q->queuedata; int cpu; @@ -957,11 +956,14 @@ static int dm_request(struct request_queue *q, struct bio *bio) while (test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) { up_read(&md->io_lock); - if (bio_rw(bio) != READA) - r = queue_io(md, bio); + if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) && + bio_rw(bio) == READA) { + bio_io_error(bio); + return 0; + } - if (r <= 0) - goto out_req; + if (!queue_io(md, bio)) + return 0; /* * We're in a while loop, because someone could suspend @@ -973,12 +975,6 @@ static int dm_request(struct request_queue *q, struct bio *bio) __split_and_process_bio(md, bio); up_read(&md->io_lock); return 0; - -out_req: - if (r < 0) - bio_io_error(bio); - - return 0; } static void dm_unplug_all(struct request_queue *q) -- cgit v1.1 From 3b00b2036fac7a7667d0676a0f80eee575b8c32b Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 9 Apr 2009 00:27:15 +0100 Subject: dm: rework queueing and suspension Rework shutting down on suspend and document the associated rules. Drop write lock in __split_and_process_bio to allow more processing concurrency. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm.c | 40 ++++++++++++++++++++++++++++++++-------- 1 file changed, 32 insertions(+), 8 deletions(-) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index bb97ec8..9746c1e 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -1434,18 +1434,21 @@ static void dm_wq_work(struct work_struct *work) down_write(&md->io_lock); - while (1) { + while (!test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) { spin_lock_irq(&md->deferred_lock); c = bio_list_pop(&md->deferred); spin_unlock_irq(&md->deferred_lock); if (!c) { - clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); clear_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags); break; } + up_write(&md->io_lock); + __split_and_process_bio(md, c); + + down_write(&md->io_lock); } up_write(&md->io_lock); @@ -1453,8 +1456,9 @@ static void dm_wq_work(struct work_struct *work) static void dm_queue_flush(struct mapped_device *md) { + clear_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); + smp_mb__after_clear_bit(); queue_work(md->wq, &md->work); - flush_workqueue(md->wq); } /* @@ -1572,22 +1576,36 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) } /* - * First we set the DMF_QUEUE_IO_TO_THREAD flag so no more ios - * will be mapped. + * Here we must make sure that no processes are submitting requests + * to target drivers i.e. no one may be executing + * __split_and_process_bio. This is called from dm_request and + * dm_wq_work. + * + * To get all processes out of __split_and_process_bio in dm_request, + * we take the write lock. To prevent any process from reentering + * __split_and_process_bio from dm_request, we set + * DMF_QUEUE_IO_TO_THREAD. + * + * To quiesce the thread (dm_wq_work), we set DMF_BLOCK_IO_FOR_SUSPEND + * and call flush_workqueue(md->wq). flush_workqueue will wait until + * dm_wq_work exits and DMF_BLOCK_IO_FOR_SUSPEND will prevent any + * further calls to __split_and_process_bio from dm_wq_work. */ down_write(&md->io_lock); set_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags); set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags); - up_write(&md->io_lock); + flush_workqueue(md->wq); + /* - * Wait for the already-mapped ios to complete. + * At this point no more requests are entering target request routines. + * We call dm_wait_for_completion to wait for all existing requests + * to finish. */ r = dm_wait_for_completion(md, TASK_INTERRUPTIBLE); down_write(&md->io_lock); - if (noflush) clear_bit(DMF_NOFLUSH_SUSPENDING, &md->flags); up_write(&md->io_lock); @@ -1600,6 +1618,12 @@ int dm_suspend(struct mapped_device *md, unsigned suspend_flags) goto out; /* pushback list is already flushed, so skip flush */ } + /* + * If dm_wait_for_completion returned 0, the device is completely + * quiescent now. There is no request-processing activity. All new + * requests are being added to md->deferred list. + */ + dm_table_postsuspend_targets(map); set_bit(DMF_SUSPENDED, &md->flags); -- cgit v1.1 From 92c639021ca6e962645114f02e356e7feb131d0b Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 9 Apr 2009 00:27:15 +0100 Subject: dm: remove dm_request loop Remove queue_io return value and a loop in dm_request. IO may be submitted to a worker thread with queue_io(). queue_io() sets DMF_QUEUE_IO_TO_THREAD so that all further IO is queued for the thread. When the thread finishes its work, it clears DMF_QUEUE_IO_TO_THREAD and from this point on, requests are submitted from dm_request again. This will be used for processing barriers. Remove the loop in dm_request. queue_io() can submit I/Os to the worker thread even if DMF_QUEUE_IO_TO_THREAD was not set. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm.c | 22 +++++++--------------- 1 file changed, 7 insertions(+), 15 deletions(-) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 9746c1e..db022e5 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -436,21 +436,18 @@ static void end_io_acct(struct dm_io *io) /* * Add the bio to the list of deferred io. */ -static int queue_io(struct mapped_device *md, struct bio *bio) +static void queue_io(struct mapped_device *md, struct bio *bio) { down_write(&md->io_lock); - if (!test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) { - up_write(&md->io_lock); - return 1; - } - spin_lock_irq(&md->deferred_lock); bio_list_add(&md->deferred, bio); spin_unlock_irq(&md->deferred_lock); + if (!test_and_set_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) + queue_work(md->wq, &md->work); + up_write(&md->io_lock); - return 0; /* deferred successfully */ } /* @@ -953,7 +950,7 @@ static int dm_request(struct request_queue *q, struct bio *bio) * If we're suspended or the thread is processing barriers * we have to queue this io for later. */ - while (test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) { + if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags))) { up_read(&md->io_lock); if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) && @@ -962,14 +959,9 @@ static int dm_request(struct request_queue *q, struct bio *bio) return 0; } - if (!queue_io(md, bio)) - return 0; + queue_io(md, bio); - /* - * We're in a while loop, because someone could suspend - * before we get to the following read lock. - */ - down_read(&md->io_lock); + return 0; } __split_and_process_bio(md, bio); -- cgit v1.1 From af7e466a1acededbc10beaba9eec8531d561c566 Mon Sep 17 00:00:00 2001 From: Mikulas Patocka Date: Thu, 9 Apr 2009 00:27:16 +0100 Subject: dm: implement basic barrier support Barriers are submitted to a worker thread that issues them in-order. The thread is modified so that when it sees a barrier request it waits for all pending IO before the request then submits the barrier and waits for it. (We must wait, otherwise it could be intermixed with following requests.) Errors from the barrier request are recorded in a per-device barrier_error variable. There may be only one barrier request in progress at once. For now, the barrier request is converted to a non-barrier request when sending it to the underlying device. This patch guarantees correct barrier behavior if the underlying device doesn't perform write-back caching. The same requirement existed before barriers were supported in dm. Bottom layer barrier support (sending barriers by target drivers) and handling devices with write-back caches will be done in further patches. Signed-off-by: Mikulas Patocka Signed-off-by: Alasdair G Kergon --- drivers/md/dm.c | 88 ++++++++++++++++++++++++++++++++++++++++++++------------- 1 file changed, 68 insertions(+), 20 deletions(-) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index db022e5..8a994be 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -125,6 +125,11 @@ struct mapped_device { spinlock_t deferred_lock; /* + * An error from the barrier request currently being processed. + */ + int barrier_error; + + /* * Processing queue (flush/barriers) */ struct workqueue_struct *wq; @@ -425,6 +430,10 @@ static void end_io_acct(struct dm_io *io) part_stat_add(cpu, &dm_disk(md)->part0, ticks[rw], duration); part_stat_unlock(); + /* + * After this is decremented the bio must not be touched if it is + * a barrier. + */ dm_disk(md)->part0.in_flight = pending = atomic_dec_return(&md->pending); @@ -531,25 +540,35 @@ static void dec_pending(struct dm_io *io, int error) */ spin_lock_irqsave(&md->deferred_lock, flags); if (__noflush_suspending(md)) - bio_list_add(&md->deferred, io->bio); + bio_list_add_head(&md->deferred, io->bio); else /* noflush suspend was interrupted. */ io->error = -EIO; spin_unlock_irqrestore(&md->deferred_lock, flags); } - end_io_acct(io); - io_error = io->error; bio = io->bio; - free_io(md, io); + if (bio_barrier(bio)) { + /* + * There can be just one barrier request so we use + * a per-device variable for error reporting. + * Note that you can't touch the bio after end_io_acct + */ + md->barrier_error = io_error; + end_io_acct(io); + } else { + end_io_acct(io); - if (io_error != DM_ENDIO_REQUEUE) { - trace_block_bio_complete(md->queue, bio); + if (io_error != DM_ENDIO_REQUEUE) { + trace_block_bio_complete(md->queue, bio); - bio_endio(bio, io_error); + bio_endio(bio, io_error); + } } + + free_io(md, io); } } @@ -691,7 +710,7 @@ static struct bio *split_bvec(struct bio *bio, sector_t sector, clone->bi_sector = sector; clone->bi_bdev = bio->bi_bdev; - clone->bi_rw = bio->bi_rw; + clone->bi_rw = bio->bi_rw & ~(1 << BIO_RW_BARRIER); clone->bi_vcnt = 1; clone->bi_size = to_bytes(len); clone->bi_io_vec->bv_offset = offset; @@ -718,6 +737,7 @@ static struct bio *clone_bio(struct bio *bio, sector_t sector, clone = bio_alloc_bioset(GFP_NOIO, bio->bi_max_vecs, bs); __bio_clone(clone, bio); + clone->bi_rw &= ~(1 << BIO_RW_BARRIER); clone->bi_destructor = dm_bio_destructor; clone->bi_sector = sector; clone->bi_idx = idx; @@ -846,7 +866,10 @@ static void __split_and_process_bio(struct mapped_device *md, struct bio *bio) ci.map = dm_get_table(md); if (unlikely(!ci.map)) { - bio_io_error(bio); + if (!bio_barrier(bio)) + bio_io_error(bio); + else + md->barrier_error = -EIO; return; } @@ -930,15 +953,6 @@ static int dm_request(struct request_queue *q, struct bio *bio) struct mapped_device *md = q->queuedata; int cpu; - /* - * There is no use in forwarding any barrier request since we can't - * guarantee it is (or can be) handled by the targets correctly. - */ - if (unlikely(bio_barrier(bio))) { - bio_endio(bio, -EOPNOTSUPP); - return 0; - } - down_read(&md->io_lock); cpu = part_stat_lock(); @@ -950,7 +964,8 @@ static int dm_request(struct request_queue *q, struct bio *bio) * If we're suspended or the thread is processing barriers * we have to queue this io for later. */ - if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags))) { + if (unlikely(test_bit(DMF_QUEUE_IO_TO_THREAD, &md->flags)) || + unlikely(bio_barrier(bio))) { up_read(&md->io_lock); if (unlikely(test_bit(DMF_BLOCK_IO_FOR_SUSPEND, &md->flags)) && @@ -1415,6 +1430,36 @@ static int dm_wait_for_completion(struct mapped_device *md, int interruptible) return r; } +static int dm_flush(struct mapped_device *md) +{ + dm_wait_for_completion(md, TASK_UNINTERRUPTIBLE); + return 0; +} + +static void process_barrier(struct mapped_device *md, struct bio *bio) +{ + int error = dm_flush(md); + + if (unlikely(error)) { + bio_endio(bio, error); + return; + } + if (bio_empty_barrier(bio)) { + bio_endio(bio, 0); + return; + } + + __split_and_process_bio(md, bio); + + error = dm_flush(md); + + if (!error && md->barrier_error) + error = md->barrier_error; + + if (md->barrier_error != DM_ENDIO_REQUEUE) + bio_endio(bio, error); +} + /* * Process the deferred bios */ @@ -1438,7 +1483,10 @@ static void dm_wq_work(struct work_struct *work) up_write(&md->io_lock); - __split_and_process_bio(md, c); + if (bio_barrier(c)) + process_barrier(md, c); + else + __split_and_process_bio(md, c); down_write(&md->io_lock); } -- cgit v1.1 From 8f3d8ba20e67991b531e9c0227dcd1f99271a32c Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Tue, 7 Apr 2009 19:55:13 +0200 Subject: block: move bio list helpers into bio.h It's used by DM and MD and generally useful, so move the bio list helpers into bio.h. Signed-off-by: Christoph Hellwig Acked-by: Alasdair G Kergon Signed-off-by: Jens Axboe --- drivers/md/dm.c | 1 - 1 file changed, 1 deletion(-) (limited to 'drivers/md/dm.c') diff --git a/drivers/md/dm.c b/drivers/md/dm.c index 8a994be..424f7b0 100644 --- a/drivers/md/dm.c +++ b/drivers/md/dm.c @@ -6,7 +6,6 @@ */ #include "dm.h" -#include "dm-bio-list.h" #include "dm-uevent.h" #include -- cgit v1.1