From 6958f145459ca7ad9715024de97445addacb8510 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:16 +0200 Subject: block: kill QUEUE_ORDERED_BY_TAG Nobody is making meaningful use of ORDERED_BY_TAG now and queue draining for barrier requests will be removed soon which will render the advantage of tag ordering moot. Kill ORDERED_BY_TAG. The following users are affected. * brd: converted to ORDERED_DRAIN. * virtio_blk: ORDERED_TAG path was already marked deprecated. Removed. * xen-blkfront: ORDERED_TAG case dropped. Signed-off-by: Tejun Heo Cc: Christoph Hellwig Cc: Nick Piggin Cc: Michael S. Tsirkin Cc: Jeremy Fitzhardinge Cc: Chris Wright Signed-off-by: Jens Axboe --- block/blk-barrier.c | 35 +++++++---------------------------- 1 file changed, 7 insertions(+), 28 deletions(-) (limited to 'block') diff --git a/block/blk-barrier.c b/block/blk-barrier.c index f0faefc..c807e9c 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -26,10 +26,7 @@ int blk_queue_ordered(struct request_queue *q, unsigned ordered) if (ordered != QUEUE_ORDERED_NONE && ordered != QUEUE_ORDERED_DRAIN && ordered != QUEUE_ORDERED_DRAIN_FLUSH && - ordered != QUEUE_ORDERED_DRAIN_FUA && - ordered != QUEUE_ORDERED_TAG && - ordered != QUEUE_ORDERED_TAG_FLUSH && - ordered != QUEUE_ORDERED_TAG_FUA) { + ordered != QUEUE_ORDERED_DRAIN_FUA) { printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered); return -EINVAL; } @@ -155,21 +152,9 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp) * For an empty barrier, there's no actual BAR request, which * in turn makes POSTFLUSH unnecessary. Mask them off. */ - if (!blk_rq_sectors(rq)) { + if (!blk_rq_sectors(rq)) q->ordered &= ~(QUEUE_ORDERED_DO_BAR | QUEUE_ORDERED_DO_POSTFLUSH); - /* - * Empty barrier on a write-through device w/ ordered - * tag has no command to issue and without any command - * to issue, ordering by tag can't be used. Drain - * instead. - */ - if ((q->ordered & QUEUE_ORDERED_BY_TAG) && - !(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) { - q->ordered &= ~QUEUE_ORDERED_BY_TAG; - q->ordered |= QUEUE_ORDERED_BY_DRAIN; - } - } /* stash away the original request */ blk_dequeue_request(rq); @@ -210,7 +195,7 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp) } else skip |= QUEUE_ORDSEQ_PREFLUSH; - if ((q->ordered & QUEUE_ORDERED_BY_DRAIN) && queue_in_flight(q)) + if (queue_in_flight(q)) rq = NULL; else skip |= QUEUE_ORDSEQ_DRAIN; @@ -257,16 +242,10 @@ bool blk_do_ordered(struct request_queue *q, struct request **rqp) rq != &q->pre_flush_rq && rq != &q->post_flush_rq) return true; - if (q->ordered & QUEUE_ORDERED_BY_TAG) { - /* Ordered by tag. Blocking the next barrier is enough. */ - if (is_barrier && rq != &q->bar_rq) - *rqp = NULL; - } else { - /* Ordered by draining. Wait for turn. */ - WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q)); - if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q)) - *rqp = NULL; - } + /* Ordered by draining. Wait for turn. */ + WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q)); + if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q)) + *rqp = NULL; return true; } -- cgit v1.1 From 4913efe456c987057e5d36a3f0a55422a9072cae Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:16 +0200 Subject: block: deprecate barrier and replace blk_queue_ordered() with blk_queue_flush() Barrier is deemed too heavy and will soon be replaced by FLUSH/FUA requests. Deprecate barrier. All REQ_HARDBARRIERs are failed with -EOPNOTSUPP and blk_queue_ordered() is replaced with simpler blk_queue_flush(). blk_queue_flush() takes combinations of REQ_FLUSH and FUA. If a device has write cache and can flush it, it should set REQ_FLUSH. If the device can handle FUA writes, it should also set REQ_FUA. All blk_queue_ordered() users are converted. * ORDERED_DRAIN is mapped to 0 which is the default value. * ORDERED_DRAIN_FLUSH is mapped to REQ_FLUSH. * ORDERED_DRAIN_FLUSH_FUA is mapped to REQ_FLUSH | REQ_FUA. Signed-off-by: Tejun Heo Acked-by: Boaz Harrosh Cc: Christoph Hellwig Cc: Nick Piggin Cc: Michael S. Tsirkin Cc: Jeremy Fitzhardinge Cc: Chris Wright Cc: FUJITA Tomonori Cc: Geert Uytterhoeven Cc: David S. Miller Cc: Alasdair G Kergon Cc: Pierre Ossman Cc: Stefan Weinhuber Signed-off-by: Jens Axboe --- block/blk-barrier.c | 29 ----------------------------- block/blk-core.c | 6 ++++-- block/blk-settings.c | 20 ++++++++++++++++++++ 3 files changed, 24 insertions(+), 31 deletions(-) (limited to 'block') diff --git a/block/blk-barrier.c b/block/blk-barrier.c index c807e9c..ed0aba5 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -9,35 +9,6 @@ #include "blk.h" -/** - * blk_queue_ordered - does this queue support ordered writes - * @q: the request queue - * @ordered: one of QUEUE_ORDERED_* - * - * Description: - * For journalled file systems, doing ordered writes on a commit - * block instead of explicitly doing wait_on_buffer (which is bad - * for performance) can be a big win. Block drivers supporting this - * feature should call this function and indicate so. - * - **/ -int blk_queue_ordered(struct request_queue *q, unsigned ordered) -{ - if (ordered != QUEUE_ORDERED_NONE && - ordered != QUEUE_ORDERED_DRAIN && - ordered != QUEUE_ORDERED_DRAIN_FLUSH && - ordered != QUEUE_ORDERED_DRAIN_FUA) { - printk(KERN_ERR "blk_queue_ordered: bad value %d\n", ordered); - return -EINVAL; - } - - q->ordered = ordered; - q->next_ordered = ordered; - - return 0; -} -EXPORT_SYMBOL(blk_queue_ordered); - /* * Cache flushing for ordered writes handling */ diff --git a/block/blk-core.c b/block/blk-core.c index ee1a1e7..f063541 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1203,11 +1203,13 @@ static int __make_request(struct request_queue *q, struct bio *bio) const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK; int rw_flags; - if ((bio->bi_rw & REQ_HARDBARRIER) && - (q->next_ordered == QUEUE_ORDERED_NONE)) { + /* REQ_HARDBARRIER is no more */ + if (WARN_ONCE(bio->bi_rw & REQ_HARDBARRIER, + "block: HARDBARRIER is deprecated, use FLUSH/FUA instead\n")) { bio_endio(bio, -EOPNOTSUPP); return 0; } + /* * low level driver can indicate that it wants pages above a * certain limit bounced to low memory (ie for highmem, or even diff --git a/block/blk-settings.c b/block/blk-settings.c index a234f4b..9b18afc 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -794,6 +794,26 @@ void blk_queue_update_dma_alignment(struct request_queue *q, int mask) } EXPORT_SYMBOL(blk_queue_update_dma_alignment); +/** + * blk_queue_flush - configure queue's cache flush capability + * @q: the request queue for the device + * @flush: 0, REQ_FLUSH or REQ_FLUSH | REQ_FUA + * + * Tell block layer cache flush capability of @q. If it supports + * flushing, REQ_FLUSH should be set. If it supports bypassing + * write cache for individual writes, REQ_FUA should be set. + */ +void blk_queue_flush(struct request_queue *q, unsigned int flush) +{ + WARN_ON_ONCE(flush & ~(REQ_FLUSH | REQ_FUA)); + + if (WARN_ON_ONCE(!(flush & REQ_FLUSH) && (flush & REQ_FUA))) + flush &= ~REQ_FUA; + + q->flush_flags = flush & (REQ_FLUSH | REQ_FUA); +} +EXPORT_SYMBOL_GPL(blk_queue_flush); + static int __init blk_settings_init(void) { blk_max_low_pfn = max_low_pfn - 1; -- cgit v1.1 From dd831006d5be7f74c3fe7aef82380c51c3637960 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:16 +0200 Subject: block: misc cleanups in barrier code Make the following cleanups in preparation of barrier/flush update. * blk_do_ordered() declaration is moved from include/linux/blkdev.h to block/blk.h. * blk_do_ordered() now returns pointer to struct request, with %NULL meaning "try the next request" and ERR_PTR(-EAGAIN) "try again later". The third case will be dropped with further changes. * In the initialization of proxy barrier request, data direction is already set by init_request_from_bio(). Drop unnecessary explicit REQ_WRITE setting and move init_request_from_bio() above REQ_FUA flag setting. * add_request() is collapsed into __make_request(). These changes don't make any functional difference. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-barrier.c | 32 ++++++++++++++------------------ block/blk-core.c | 21 ++++----------------- block/blk.h | 7 +++++-- 3 files changed, 23 insertions(+), 37 deletions(-) (limited to 'block') diff --git a/block/blk-barrier.c b/block/blk-barrier.c index ed0aba5..f1be85b 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -110,9 +110,9 @@ static void queue_flush(struct request_queue *q, unsigned which) elv_insert(q, rq, ELEVATOR_INSERT_FRONT); } -static inline bool start_ordered(struct request_queue *q, struct request **rqp) +static inline struct request *start_ordered(struct request_queue *q, + struct request *rq) { - struct request *rq = *rqp; unsigned skip = 0; q->orderr = 0; @@ -149,11 +149,9 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp) /* initialize proxy request and queue it */ blk_rq_init(q, rq); - if (bio_data_dir(q->orig_bar_rq->bio) == WRITE) - rq->cmd_flags |= REQ_WRITE; + init_request_from_bio(rq, q->orig_bar_rq->bio); if (q->ordered & QUEUE_ORDERED_DO_FUA) rq->cmd_flags |= REQ_FUA; - init_request_from_bio(rq, q->orig_bar_rq->bio); rq->end_io = bar_end_io; elv_insert(q, rq, ELEVATOR_INSERT_FRONT); @@ -171,27 +169,26 @@ static inline bool start_ordered(struct request_queue *q, struct request **rqp) else skip |= QUEUE_ORDSEQ_DRAIN; - *rqp = rq; - /* * Complete skipped sequences. If whole sequence is complete, - * return false to tell elevator that this request is gone. + * return %NULL to tell elevator that this request is gone. */ - return !blk_ordered_complete_seq(q, skip, 0); + if (blk_ordered_complete_seq(q, skip, 0)) + rq = NULL; + return rq; } -bool blk_do_ordered(struct request_queue *q, struct request **rqp) +struct request *blk_do_ordered(struct request_queue *q, struct request *rq) { - struct request *rq = *rqp; const int is_barrier = rq->cmd_type == REQ_TYPE_FS && (rq->cmd_flags & REQ_HARDBARRIER); if (!q->ordseq) { if (!is_barrier) - return true; + return rq; if (q->next_ordered != QUEUE_ORDERED_NONE) - return start_ordered(q, rqp); + return start_ordered(q, rq); else { /* * Queue ordering not supported. Terminate @@ -199,8 +196,7 @@ bool blk_do_ordered(struct request_queue *q, struct request **rqp) */ blk_dequeue_request(rq); __blk_end_request_all(rq, -EOPNOTSUPP); - *rqp = NULL; - return false; + return NULL; } } @@ -211,14 +207,14 @@ bool blk_do_ordered(struct request_queue *q, struct request **rqp) /* Special requests are not subject to ordering rules. */ if (rq->cmd_type != REQ_TYPE_FS && rq != &q->pre_flush_rq && rq != &q->post_flush_rq) - return true; + return rq; /* Ordered by draining. Wait for turn. */ WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q)); if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q)) - *rqp = NULL; + rq = ERR_PTR(-EAGAIN); - return true; + return rq; } static void bio_end_empty_barrier(struct bio *bio, int err) diff --git a/block/blk-core.c b/block/blk-core.c index f063541..f8d37a8 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1037,22 +1037,6 @@ void blk_insert_request(struct request_queue *q, struct request *rq, } EXPORT_SYMBOL(blk_insert_request); -/* - * add-request adds a request to the linked list. - * queue lock is held and interrupts disabled, as we muck with the - * request queue list. - */ -static inline void add_request(struct request_queue *q, struct request *req) -{ - drive_stat_acct(req, 1); - - /* - * elevator indicated where it wants this request to be - * inserted at elevator_merge time - */ - __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0); -} - static void part_round_stats_single(int cpu, struct hd_struct *part, unsigned long now) { @@ -1316,7 +1300,10 @@ get_rq: req->cpu = blk_cpu_to_group(smp_processor_id()); if (queue_should_plug(q) && elv_queue_empty(q)) blk_plug_device(q); - add_request(q, req); + + /* insert the request into the elevator */ + drive_stat_acct(req, 1); + __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0); out: if (unplug || !queue_should_plug(q)) __generic_unplug_device(q); diff --git a/block/blk.h b/block/blk.h index 6e7dc87..874eb4e 100644 --- a/block/blk.h +++ b/block/blk.h @@ -51,6 +51,8 @@ static inline void blk_clear_rq_complete(struct request *rq) */ #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) +struct request *blk_do_ordered(struct request_queue *q, struct request *rq); + static inline struct request *__elv_next_request(struct request_queue *q) { struct request *rq; @@ -58,8 +60,9 @@ static inline struct request *__elv_next_request(struct request_queue *q) while (1) { while (!list_empty(&q->queue_head)) { rq = list_entry_rq(q->queue_head.next); - if (blk_do_ordered(q, &rq)) - return rq; + rq = blk_do_ordered(q, rq); + if (rq) + return !IS_ERR(rq) ? rq : NULL; } if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) -- cgit v1.1 From 28e7d1845216538303bb95d679d8fd4de50e2f1a Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:16 +0200 Subject: block: drop barrier ordering by queue draining Filesystems will take all the responsibilities for ordering requests around commit writes and will only indicate how the commit writes themselves should be handled by block layers. This patch drops barrier ordering by queue draining from block layer. Ordering by draining implementation was somewhat invasive to request handling. List of notable changes follow. * Each queue has 1 bit color which is flipped on each barrier issue. This is used to track whether a given request is issued before the current barrier or not. REQ_ORDERED_COLOR flag and coloring implementation in __elv_add_request() are removed. * Requests which shouldn't be processed yet for draining were stalled by returning -EAGAIN from blk_do_ordered() according to the test result between blk_ordered_req_seq() and blk_blk_ordered_cur_seq(). This logic is removed. * Draining completion logic in elv_completed_request() removed. * All barrier sequence requests were queued to request queue and then trckled to lower layer according to progress and thus maintaining request orders during requeue was necessary. This is replaced by queueing the next request in the barrier sequence only after the current one is complete from blk_ordered_complete_seq(), which removes the need for multiple proxy requests in struct request_queue and the request sorting logic in the ELEVATOR_INSERT_REQUEUE path of elv_insert(). * As barriers no longer have ordering constraints, there's no need to dump the whole elevator onto the dispatch queue on each barrier. Insert barriers at the front instead. * If other barrier requests come to the front of the dispatch queue while one is already in progress, they are stored in q->pending_barriers and restored to dispatch queue one-by-one after each barrier completion from blk_ordered_complete_seq(). Signed-off-by: Tejun Heo Cc: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-barrier.c | 220 ++++++++++++++++++++-------------------------------- block/blk-core.c | 11 ++- block/blk.h | 2 +- block/elevator.c | 79 +++---------------- 4 files changed, 105 insertions(+), 207 deletions(-) (limited to 'block') diff --git a/block/blk-barrier.c b/block/blk-barrier.c index f1be85b..e8b2e5c 100644 --- a/block/blk-barrier.c +++ b/block/blk-barrier.c @@ -9,6 +9,8 @@ #include "blk.h" +static struct request *queue_next_ordseq(struct request_queue *q); + /* * Cache flushing for ordered writes handling */ @@ -19,38 +21,10 @@ unsigned blk_ordered_cur_seq(struct request_queue *q) return 1 << ffz(q->ordseq); } -unsigned blk_ordered_req_seq(struct request *rq) -{ - struct request_queue *q = rq->q; - - BUG_ON(q->ordseq == 0); - - if (rq == &q->pre_flush_rq) - return QUEUE_ORDSEQ_PREFLUSH; - if (rq == &q->bar_rq) - return QUEUE_ORDSEQ_BAR; - if (rq == &q->post_flush_rq) - return QUEUE_ORDSEQ_POSTFLUSH; - - /* - * !fs requests don't need to follow barrier ordering. Always - * put them at the front. This fixes the following deadlock. - * - * http://thread.gmane.org/gmane.linux.kernel/537473 - */ - if (rq->cmd_type != REQ_TYPE_FS) - return QUEUE_ORDSEQ_DRAIN; - - if ((rq->cmd_flags & REQ_ORDERED_COLOR) == - (q->orig_bar_rq->cmd_flags & REQ_ORDERED_COLOR)) - return QUEUE_ORDSEQ_DRAIN; - else - return QUEUE_ORDSEQ_DONE; -} - -bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) +static struct request *blk_ordered_complete_seq(struct request_queue *q, + unsigned seq, int error) { - struct request *rq; + struct request *next_rq = NULL; if (error && !q->orderr) q->orderr = error; @@ -58,16 +32,22 @@ bool blk_ordered_complete_seq(struct request_queue *q, unsigned seq, int error) BUG_ON(q->ordseq & seq); q->ordseq |= seq; - if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) - return false; - - /* - * Okay, sequence complete. - */ - q->ordseq = 0; - rq = q->orig_bar_rq; - __blk_end_request_all(rq, q->orderr); - return true; + if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) { + /* not complete yet, queue the next ordered sequence */ + next_rq = queue_next_ordseq(q); + } else { + /* complete this barrier request */ + __blk_end_request_all(q->orig_bar_rq, q->orderr); + q->orig_bar_rq = NULL; + q->ordseq = 0; + + /* dispatch the next barrier if there's one */ + if (!list_empty(&q->pending_barriers)) { + next_rq = list_entry_rq(q->pending_barriers.next); + list_move(&next_rq->queuelist, &q->queue_head); + } + } + return next_rq; } static void pre_flush_end_io(struct request *rq, int error) @@ -88,133 +68,105 @@ static void post_flush_end_io(struct request *rq, int error) blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); } -static void queue_flush(struct request_queue *q, unsigned which) +static void queue_flush(struct request_queue *q, struct request *rq, + rq_end_io_fn *end_io) { - struct request *rq; - rq_end_io_fn *end_io; - - if (which == QUEUE_ORDERED_DO_PREFLUSH) { - rq = &q->pre_flush_rq; - end_io = pre_flush_end_io; - } else { - rq = &q->post_flush_rq; - end_io = post_flush_end_io; - } - blk_rq_init(q, rq); rq->cmd_type = REQ_TYPE_FS; - rq->cmd_flags = REQ_HARDBARRIER | REQ_FLUSH; + rq->cmd_flags = REQ_FLUSH; rq->rq_disk = q->orig_bar_rq->rq_disk; rq->end_io = end_io; elv_insert(q, rq, ELEVATOR_INSERT_FRONT); } -static inline struct request *start_ordered(struct request_queue *q, - struct request *rq) +static struct request *queue_next_ordseq(struct request_queue *q) { - unsigned skip = 0; - - q->orderr = 0; - q->ordered = q->next_ordered; - q->ordseq |= QUEUE_ORDSEQ_STARTED; - - /* - * For an empty barrier, there's no actual BAR request, which - * in turn makes POSTFLUSH unnecessary. Mask them off. - */ - if (!blk_rq_sectors(rq)) - q->ordered &= ~(QUEUE_ORDERED_DO_BAR | - QUEUE_ORDERED_DO_POSTFLUSH); - - /* stash away the original request */ - blk_dequeue_request(rq); - q->orig_bar_rq = rq; - rq = NULL; - - /* - * Queue ordered sequence. As we stack them at the head, we - * need to queue in reverse order. Note that we rely on that - * no fs request uses ELEVATOR_INSERT_FRONT and thus no fs - * request gets inbetween ordered sequence. - */ - if (q->ordered & QUEUE_ORDERED_DO_POSTFLUSH) { - queue_flush(q, QUEUE_ORDERED_DO_POSTFLUSH); - rq = &q->post_flush_rq; - } else - skip |= QUEUE_ORDSEQ_POSTFLUSH; + struct request *rq = &q->bar_rq; - if (q->ordered & QUEUE_ORDERED_DO_BAR) { - rq = &q->bar_rq; + switch (blk_ordered_cur_seq(q)) { + case QUEUE_ORDSEQ_PREFLUSH: + queue_flush(q, rq, pre_flush_end_io); + break; + case QUEUE_ORDSEQ_BAR: /* initialize proxy request and queue it */ blk_rq_init(q, rq); init_request_from_bio(rq, q->orig_bar_rq->bio); + rq->cmd_flags &= ~REQ_HARDBARRIER; if (q->ordered & QUEUE_ORDERED_DO_FUA) rq->cmd_flags |= REQ_FUA; rq->end_io = bar_end_io; elv_insert(q, rq, ELEVATOR_INSERT_FRONT); - } else - skip |= QUEUE_ORDSEQ_BAR; + break; - if (q->ordered & QUEUE_ORDERED_DO_PREFLUSH) { - queue_flush(q, QUEUE_ORDERED_DO_PREFLUSH); - rq = &q->pre_flush_rq; - } else - skip |= QUEUE_ORDSEQ_PREFLUSH; + case QUEUE_ORDSEQ_POSTFLUSH: + queue_flush(q, rq, post_flush_end_io); + break; - if (queue_in_flight(q)) - rq = NULL; - else - skip |= QUEUE_ORDSEQ_DRAIN; - - /* - * Complete skipped sequences. If whole sequence is complete, - * return %NULL to tell elevator that this request is gone. - */ - if (blk_ordered_complete_seq(q, skip, 0)) - rq = NULL; + default: + BUG(); + } return rq; } struct request *blk_do_ordered(struct request_queue *q, struct request *rq) { - const int is_barrier = rq->cmd_type == REQ_TYPE_FS && - (rq->cmd_flags & REQ_HARDBARRIER); - - if (!q->ordseq) { - if (!is_barrier) - return rq; - - if (q->next_ordered != QUEUE_ORDERED_NONE) - return start_ordered(q, rq); - else { - /* - * Queue ordering not supported. Terminate - * with prejudice. - */ - blk_dequeue_request(rq); - __blk_end_request_all(rq, -EOPNOTSUPP); - return NULL; - } + unsigned skip = 0; + + if (!(rq->cmd_flags & REQ_HARDBARRIER)) + return rq; + + if (q->ordseq) { + /* + * Barrier is already in progress and they can't be + * processed in parallel. Queue for later processing. + */ + list_move_tail(&rq->queuelist, &q->pending_barriers); + return NULL; + } + + if (unlikely(q->next_ordered == QUEUE_ORDERED_NONE)) { + /* + * Queue ordering not supported. Terminate + * with prejudice. + */ + blk_dequeue_request(rq); + __blk_end_request_all(rq, -EOPNOTSUPP); + return NULL; } /* - * Ordered sequence in progress + * Start a new ordered sequence */ + q->orderr = 0; + q->ordered = q->next_ordered; + q->ordseq |= QUEUE_ORDSEQ_STARTED; - /* Special requests are not subject to ordering rules. */ - if (rq->cmd_type != REQ_TYPE_FS && - rq != &q->pre_flush_rq && rq != &q->post_flush_rq) - return rq; + /* + * For an empty barrier, there's no actual BAR request, which + * in turn makes POSTFLUSH unnecessary. Mask them off. + */ + if (!blk_rq_sectors(rq)) + q->ordered &= ~(QUEUE_ORDERED_DO_BAR | + QUEUE_ORDERED_DO_POSTFLUSH); - /* Ordered by draining. Wait for turn. */ - WARN_ON(blk_ordered_req_seq(rq) < blk_ordered_cur_seq(q)); - if (blk_ordered_req_seq(rq) > blk_ordered_cur_seq(q)) - rq = ERR_PTR(-EAGAIN); + /* stash away the original request */ + blk_dequeue_request(rq); + q->orig_bar_rq = rq; - return rq; + if (!(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) + skip |= QUEUE_ORDSEQ_PREFLUSH; + + if (!(q->ordered & QUEUE_ORDERED_DO_BAR)) + skip |= QUEUE_ORDSEQ_BAR; + + if (!(q->ordered & QUEUE_ORDERED_DO_POSTFLUSH)) + skip |= QUEUE_ORDSEQ_POSTFLUSH; + + /* complete skipped sequences and return the first sequence */ + return blk_ordered_complete_seq(q, skip, 0); } static void bio_end_empty_barrier(struct bio *bio, int err) diff --git a/block/blk-core.c b/block/blk-core.c index f8d37a8..d316662 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -520,6 +520,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) init_timer(&q->unplug_timer); setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); INIT_LIST_HEAD(&q->timeout_list); + INIT_LIST_HEAD(&q->pending_barriers); INIT_WORK(&q->unplug_work, blk_unplug_work); kobject_init(&q->kobj, &blk_queue_ktype); @@ -1185,6 +1186,7 @@ static int __make_request(struct request_queue *q, struct bio *bio) const bool sync = (bio->bi_rw & REQ_SYNC); const bool unplug = (bio->bi_rw & REQ_UNPLUG); const unsigned int ff = bio->bi_rw & REQ_FAILFAST_MASK; + int where = ELEVATOR_INSERT_SORT; int rw_flags; /* REQ_HARDBARRIER is no more */ @@ -1203,7 +1205,12 @@ static int __make_request(struct request_queue *q, struct bio *bio) spin_lock_irq(q->queue_lock); - if (unlikely((bio->bi_rw & REQ_HARDBARRIER)) || elv_queue_empty(q)) + if (bio->bi_rw & REQ_HARDBARRIER) { + where = ELEVATOR_INSERT_FRONT; + goto get_rq; + } + + if (elv_queue_empty(q)) goto get_rq; el_ret = elv_merge(q, &req, bio); @@ -1303,7 +1310,7 @@ get_rq: /* insert the request into the elevator */ drive_stat_acct(req, 1); - __elv_add_request(q, req, ELEVATOR_INSERT_SORT, 0); + __elv_add_request(q, req, where, 0); out: if (unplug || !queue_should_plug(q)) __generic_unplug_device(q); diff --git a/block/blk.h b/block/blk.h index 874eb4e..08081e4b 100644 --- a/block/blk.h +++ b/block/blk.h @@ -62,7 +62,7 @@ static inline struct request *__elv_next_request(struct request_queue *q) rq = list_entry_rq(q->queue_head.next); rq = blk_do_ordered(q, rq); if (rq) - return !IS_ERR(rq) ? rq : NULL; + return rq; } if (!q->elevator->ops->elevator_dispatch_fn(q, 0)) diff --git a/block/elevator.c b/block/elevator.c index ec585c9..241c69c 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -617,8 +617,6 @@ void elv_quiesce_end(struct request_queue *q) void elv_insert(struct request_queue *q, struct request *rq, int where) { - struct list_head *pos; - unsigned ordseq; int unplug_it = 1; trace_block_rq_insert(q, rq); @@ -626,9 +624,16 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) rq->q = q; switch (where) { + case ELEVATOR_INSERT_REQUEUE: + /* + * Most requeues happen because of a busy condition, + * don't force unplug of the queue for that case. + * Clear unplug_it and fall through. + */ + unplug_it = 0; + case ELEVATOR_INSERT_FRONT: rq->cmd_flags |= REQ_SOFTBARRIER; - list_add(&rq->queuelist, &q->queue_head); break; @@ -668,36 +673,6 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) q->elevator->ops->elevator_add_req_fn(q, rq); break; - case ELEVATOR_INSERT_REQUEUE: - /* - * If ordered flush isn't in progress, we do front - * insertion; otherwise, requests should be requeued - * in ordseq order. - */ - rq->cmd_flags |= REQ_SOFTBARRIER; - - /* - * Most requeues happen because of a busy condition, - * don't force unplug of the queue for that case. - */ - unplug_it = 0; - - if (q->ordseq == 0) { - list_add(&rq->queuelist, &q->queue_head); - break; - } - - ordseq = blk_ordered_req_seq(rq); - - list_for_each(pos, &q->queue_head) { - struct request *pos_rq = list_entry_rq(pos); - if (ordseq <= blk_ordered_req_seq(pos_rq)) - break; - } - - list_add_tail(&rq->queuelist, pos); - break; - default: printk(KERN_ERR "%s: bad insertion point %d\n", __func__, where); @@ -716,26 +691,8 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) void __elv_add_request(struct request_queue *q, struct request *rq, int where, int plug) { - if (q->ordcolor) - rq->cmd_flags |= REQ_ORDERED_COLOR; - if (rq->cmd_flags & (REQ_SOFTBARRIER | REQ_HARDBARRIER)) { - /* - * toggle ordered color - */ - if (rq->cmd_flags & REQ_HARDBARRIER) - q->ordcolor ^= 1; - - /* - * barriers implicitly indicate back insertion - */ - if (where == ELEVATOR_INSERT_SORT) - where = ELEVATOR_INSERT_BACK; - - /* - * this request is scheduling boundary, update - * end_sector - */ + /* barriers are scheduling boundary, update end_sector */ if (rq->cmd_type == REQ_TYPE_FS || (rq->cmd_flags & REQ_DISCARD)) { q->end_sector = rq_end_sector(rq); @@ -855,24 +812,6 @@ void elv_completed_request(struct request_queue *q, struct request *rq) e->ops->elevator_completed_req_fn) e->ops->elevator_completed_req_fn(q, rq); } - - /* - * Check if the queue is waiting for fs requests to be - * drained for flush sequence. - */ - if (unlikely(q->ordseq)) { - struct request *next = NULL; - - if (!list_empty(&q->queue_head)) - next = list_entry_rq(q->queue_head.next); - - if (!queue_in_flight(q) && - blk_ordered_cur_seq(q) == QUEUE_ORDSEQ_DRAIN && - (!next || blk_ordered_req_seq(next) > QUEUE_ORDSEQ_DRAIN)) { - blk_ordered_complete_seq(q, QUEUE_ORDSEQ_DRAIN, 0); - __blk_run_queue(q); - } - } } #define to_elv(atr) container_of((atr), struct elv_fs_entry, attr) -- cgit v1.1 From 8839a0e055d9abd6c011d533373a8dd266cad011 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:16 +0200 Subject: block: rename blk-barrier.c to blk-flush.c Without ordering requirements, barrier and ordering are minomers. Rename block/blk-barrier.c to block/blk-flush.c. Rename of symbols will follow. Signed-off-by: Tejun Heo Cc: Christoph Hellwig Signed-off-by: Jens Axboe --- block/Makefile | 2 +- block/blk-barrier.c | 248 ---------------------------------------------------- block/blk-flush.c | 248 ++++++++++++++++++++++++++++++++++++++++++++++++++++ 3 files changed, 249 insertions(+), 249 deletions(-) delete mode 100644 block/blk-barrier.c create mode 100644 block/blk-flush.c (limited to 'block') diff --git a/block/Makefile b/block/Makefile index 0bb499a..f627e4b 100644 --- a/block/Makefile +++ b/block/Makefile @@ -3,7 +3,7 @@ # obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ - blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \ + blk-flush.o blk-settings.o blk-ioc.o blk-map.o \ blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o diff --git a/block/blk-barrier.c b/block/blk-barrier.c deleted file mode 100644 index e8b2e5c..0000000 --- a/block/blk-barrier.c +++ /dev/null @@ -1,248 +0,0 @@ -/* - * Functions related to barrier IO handling - */ -#include -#include -#include -#include -#include - -#include "blk.h" - -static struct request *queue_next_ordseq(struct request_queue *q); - -/* - * Cache flushing for ordered writes handling - */ -unsigned blk_ordered_cur_seq(struct request_queue *q) -{ - if (!q->ordseq) - return 0; - return 1 << ffz(q->ordseq); -} - -static struct request *blk_ordered_complete_seq(struct request_queue *q, - unsigned seq, int error) -{ - struct request *next_rq = NULL; - - if (error && !q->orderr) - q->orderr = error; - - BUG_ON(q->ordseq & seq); - q->ordseq |= seq; - - if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) { - /* not complete yet, queue the next ordered sequence */ - next_rq = queue_next_ordseq(q); - } else { - /* complete this barrier request */ - __blk_end_request_all(q->orig_bar_rq, q->orderr); - q->orig_bar_rq = NULL; - q->ordseq = 0; - - /* dispatch the next barrier if there's one */ - if (!list_empty(&q->pending_barriers)) { - next_rq = list_entry_rq(q->pending_barriers.next); - list_move(&next_rq->queuelist, &q->queue_head); - } - } - return next_rq; -} - -static void pre_flush_end_io(struct request *rq, int error) -{ - elv_completed_request(rq->q, rq); - blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error); -} - -static void bar_end_io(struct request *rq, int error) -{ - elv_completed_request(rq->q, rq); - blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error); -} - -static void post_flush_end_io(struct request *rq, int error) -{ - elv_completed_request(rq->q, rq); - blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); -} - -static void queue_flush(struct request_queue *q, struct request *rq, - rq_end_io_fn *end_io) -{ - blk_rq_init(q, rq); - rq->cmd_type = REQ_TYPE_FS; - rq->cmd_flags = REQ_FLUSH; - rq->rq_disk = q->orig_bar_rq->rq_disk; - rq->end_io = end_io; - - elv_insert(q, rq, ELEVATOR_INSERT_FRONT); -} - -static struct request *queue_next_ordseq(struct request_queue *q) -{ - struct request *rq = &q->bar_rq; - - switch (blk_ordered_cur_seq(q)) { - case QUEUE_ORDSEQ_PREFLUSH: - queue_flush(q, rq, pre_flush_end_io); - break; - - case QUEUE_ORDSEQ_BAR: - /* initialize proxy request and queue it */ - blk_rq_init(q, rq); - init_request_from_bio(rq, q->orig_bar_rq->bio); - rq->cmd_flags &= ~REQ_HARDBARRIER; - if (q->ordered & QUEUE_ORDERED_DO_FUA) - rq->cmd_flags |= REQ_FUA; - rq->end_io = bar_end_io; - - elv_insert(q, rq, ELEVATOR_INSERT_FRONT); - break; - - case QUEUE_ORDSEQ_POSTFLUSH: - queue_flush(q, rq, post_flush_end_io); - break; - - default: - BUG(); - } - return rq; -} - -struct request *blk_do_ordered(struct request_queue *q, struct request *rq) -{ - unsigned skip = 0; - - if (!(rq->cmd_flags & REQ_HARDBARRIER)) - return rq; - - if (q->ordseq) { - /* - * Barrier is already in progress and they can't be - * processed in parallel. Queue for later processing. - */ - list_move_tail(&rq->queuelist, &q->pending_barriers); - return NULL; - } - - if (unlikely(q->next_ordered == QUEUE_ORDERED_NONE)) { - /* - * Queue ordering not supported. Terminate - * with prejudice. - */ - blk_dequeue_request(rq); - __blk_end_request_all(rq, -EOPNOTSUPP); - return NULL; - } - - /* - * Start a new ordered sequence - */ - q->orderr = 0; - q->ordered = q->next_ordered; - q->ordseq |= QUEUE_ORDSEQ_STARTED; - - /* - * For an empty barrier, there's no actual BAR request, which - * in turn makes POSTFLUSH unnecessary. Mask them off. - */ - if (!blk_rq_sectors(rq)) - q->ordered &= ~(QUEUE_ORDERED_DO_BAR | - QUEUE_ORDERED_DO_POSTFLUSH); - - /* stash away the original request */ - blk_dequeue_request(rq); - q->orig_bar_rq = rq; - - if (!(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) - skip |= QUEUE_ORDSEQ_PREFLUSH; - - if (!(q->ordered & QUEUE_ORDERED_DO_BAR)) - skip |= QUEUE_ORDSEQ_BAR; - - if (!(q->ordered & QUEUE_ORDERED_DO_POSTFLUSH)) - skip |= QUEUE_ORDSEQ_POSTFLUSH; - - /* complete skipped sequences and return the first sequence */ - return blk_ordered_complete_seq(q, skip, 0); -} - -static void bio_end_empty_barrier(struct bio *bio, int err) -{ - if (err) { - if (err == -EOPNOTSUPP) - set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); - clear_bit(BIO_UPTODATE, &bio->bi_flags); - } - if (bio->bi_private) - complete(bio->bi_private); - bio_put(bio); -} - -/** - * blkdev_issue_flush - queue a flush - * @bdev: blockdev to issue flush for - * @gfp_mask: memory allocation flags (for bio_alloc) - * @error_sector: error sector - * @flags: BLKDEV_IFL_* flags to control behaviour - * - * Description: - * Issue a flush for the block device in question. Caller can supply - * room for storing the error offset in case of a flush error, if they - * wish to. If WAIT flag is not passed then caller may check only what - * request was pushed in some internal queue for later handling. - */ -int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, - sector_t *error_sector, unsigned long flags) -{ - DECLARE_COMPLETION_ONSTACK(wait); - struct request_queue *q; - struct bio *bio; - int ret = 0; - - if (bdev->bd_disk == NULL) - return -ENXIO; - - q = bdev_get_queue(bdev); - if (!q) - return -ENXIO; - - /* - * some block devices may not have their queue correctly set up here - * (e.g. loop device without a backing file) and so issuing a flush - * here will panic. Ensure there is a request function before issuing - * the barrier. - */ - if (!q->make_request_fn) - return -ENXIO; - - bio = bio_alloc(gfp_mask, 0); - bio->bi_end_io = bio_end_empty_barrier; - bio->bi_bdev = bdev; - if (test_bit(BLKDEV_WAIT, &flags)) - bio->bi_private = &wait; - - bio_get(bio); - submit_bio(WRITE_BARRIER, bio); - if (test_bit(BLKDEV_WAIT, &flags)) { - wait_for_completion(&wait); - /* - * The driver must store the error location in ->bi_sector, if - * it supports it. For non-stacked drivers, this should be - * copied from blk_rq_pos(rq). - */ - if (error_sector) - *error_sector = bio->bi_sector; - } - - if (bio_flagged(bio, BIO_EOPNOTSUPP)) - ret = -EOPNOTSUPP; - else if (!bio_flagged(bio, BIO_UPTODATE)) - ret = -EIO; - - bio_put(bio); - return ret; -} -EXPORT_SYMBOL(blkdev_issue_flush); diff --git a/block/blk-flush.c b/block/blk-flush.c new file mode 100644 index 0000000..e8b2e5c --- /dev/null +++ b/block/blk-flush.c @@ -0,0 +1,248 @@ +/* + * Functions related to barrier IO handling + */ +#include +#include +#include +#include +#include + +#include "blk.h" + +static struct request *queue_next_ordseq(struct request_queue *q); + +/* + * Cache flushing for ordered writes handling + */ +unsigned blk_ordered_cur_seq(struct request_queue *q) +{ + if (!q->ordseq) + return 0; + return 1 << ffz(q->ordseq); +} + +static struct request *blk_ordered_complete_seq(struct request_queue *q, + unsigned seq, int error) +{ + struct request *next_rq = NULL; + + if (error && !q->orderr) + q->orderr = error; + + BUG_ON(q->ordseq & seq); + q->ordseq |= seq; + + if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) { + /* not complete yet, queue the next ordered sequence */ + next_rq = queue_next_ordseq(q); + } else { + /* complete this barrier request */ + __blk_end_request_all(q->orig_bar_rq, q->orderr); + q->orig_bar_rq = NULL; + q->ordseq = 0; + + /* dispatch the next barrier if there's one */ + if (!list_empty(&q->pending_barriers)) { + next_rq = list_entry_rq(q->pending_barriers.next); + list_move(&next_rq->queuelist, &q->queue_head); + } + } + return next_rq; +} + +static void pre_flush_end_io(struct request *rq, int error) +{ + elv_completed_request(rq->q, rq); + blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error); +} + +static void bar_end_io(struct request *rq, int error) +{ + elv_completed_request(rq->q, rq); + blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error); +} + +static void post_flush_end_io(struct request *rq, int error) +{ + elv_completed_request(rq->q, rq); + blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); +} + +static void queue_flush(struct request_queue *q, struct request *rq, + rq_end_io_fn *end_io) +{ + blk_rq_init(q, rq); + rq->cmd_type = REQ_TYPE_FS; + rq->cmd_flags = REQ_FLUSH; + rq->rq_disk = q->orig_bar_rq->rq_disk; + rq->end_io = end_io; + + elv_insert(q, rq, ELEVATOR_INSERT_FRONT); +} + +static struct request *queue_next_ordseq(struct request_queue *q) +{ + struct request *rq = &q->bar_rq; + + switch (blk_ordered_cur_seq(q)) { + case QUEUE_ORDSEQ_PREFLUSH: + queue_flush(q, rq, pre_flush_end_io); + break; + + case QUEUE_ORDSEQ_BAR: + /* initialize proxy request and queue it */ + blk_rq_init(q, rq); + init_request_from_bio(rq, q->orig_bar_rq->bio); + rq->cmd_flags &= ~REQ_HARDBARRIER; + if (q->ordered & QUEUE_ORDERED_DO_FUA) + rq->cmd_flags |= REQ_FUA; + rq->end_io = bar_end_io; + + elv_insert(q, rq, ELEVATOR_INSERT_FRONT); + break; + + case QUEUE_ORDSEQ_POSTFLUSH: + queue_flush(q, rq, post_flush_end_io); + break; + + default: + BUG(); + } + return rq; +} + +struct request *blk_do_ordered(struct request_queue *q, struct request *rq) +{ + unsigned skip = 0; + + if (!(rq->cmd_flags & REQ_HARDBARRIER)) + return rq; + + if (q->ordseq) { + /* + * Barrier is already in progress and they can't be + * processed in parallel. Queue for later processing. + */ + list_move_tail(&rq->queuelist, &q->pending_barriers); + return NULL; + } + + if (unlikely(q->next_ordered == QUEUE_ORDERED_NONE)) { + /* + * Queue ordering not supported. Terminate + * with prejudice. + */ + blk_dequeue_request(rq); + __blk_end_request_all(rq, -EOPNOTSUPP); + return NULL; + } + + /* + * Start a new ordered sequence + */ + q->orderr = 0; + q->ordered = q->next_ordered; + q->ordseq |= QUEUE_ORDSEQ_STARTED; + + /* + * For an empty barrier, there's no actual BAR request, which + * in turn makes POSTFLUSH unnecessary. Mask them off. + */ + if (!blk_rq_sectors(rq)) + q->ordered &= ~(QUEUE_ORDERED_DO_BAR | + QUEUE_ORDERED_DO_POSTFLUSH); + + /* stash away the original request */ + blk_dequeue_request(rq); + q->orig_bar_rq = rq; + + if (!(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) + skip |= QUEUE_ORDSEQ_PREFLUSH; + + if (!(q->ordered & QUEUE_ORDERED_DO_BAR)) + skip |= QUEUE_ORDSEQ_BAR; + + if (!(q->ordered & QUEUE_ORDERED_DO_POSTFLUSH)) + skip |= QUEUE_ORDSEQ_POSTFLUSH; + + /* complete skipped sequences and return the first sequence */ + return blk_ordered_complete_seq(q, skip, 0); +} + +static void bio_end_empty_barrier(struct bio *bio, int err) +{ + if (err) { + if (err == -EOPNOTSUPP) + set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); + clear_bit(BIO_UPTODATE, &bio->bi_flags); + } + if (bio->bi_private) + complete(bio->bi_private); + bio_put(bio); +} + +/** + * blkdev_issue_flush - queue a flush + * @bdev: blockdev to issue flush for + * @gfp_mask: memory allocation flags (for bio_alloc) + * @error_sector: error sector + * @flags: BLKDEV_IFL_* flags to control behaviour + * + * Description: + * Issue a flush for the block device in question. Caller can supply + * room for storing the error offset in case of a flush error, if they + * wish to. If WAIT flag is not passed then caller may check only what + * request was pushed in some internal queue for later handling. + */ +int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, + sector_t *error_sector, unsigned long flags) +{ + DECLARE_COMPLETION_ONSTACK(wait); + struct request_queue *q; + struct bio *bio; + int ret = 0; + + if (bdev->bd_disk == NULL) + return -ENXIO; + + q = bdev_get_queue(bdev); + if (!q) + return -ENXIO; + + /* + * some block devices may not have their queue correctly set up here + * (e.g. loop device without a backing file) and so issuing a flush + * here will panic. Ensure there is a request function before issuing + * the barrier. + */ + if (!q->make_request_fn) + return -ENXIO; + + bio = bio_alloc(gfp_mask, 0); + bio->bi_end_io = bio_end_empty_barrier; + bio->bi_bdev = bdev; + if (test_bit(BLKDEV_WAIT, &flags)) + bio->bi_private = &wait; + + bio_get(bio); + submit_bio(WRITE_BARRIER, bio); + if (test_bit(BLKDEV_WAIT, &flags)) { + wait_for_completion(&wait); + /* + * The driver must store the error location in ->bi_sector, if + * it supports it. For non-stacked drivers, this should be + * copied from blk_rq_pos(rq). + */ + if (error_sector) + *error_sector = bio->bi_sector; + } + + if (bio_flagged(bio, BIO_EOPNOTSUPP)) + ret = -EOPNOTSUPP; + else if (!bio_flagged(bio, BIO_UPTODATE)) + ret = -EIO; + + bio_put(bio); + return ret; +} +EXPORT_SYMBOL(blkdev_issue_flush); -- cgit v1.1 From dd4c133f387c48f526022860ad70354637a80f4c Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:16 +0200 Subject: block: rename barrier/ordered to flush With ordering requirements dropped, barrier and ordered are misnomers. Now all block layer does is sequencing FLUSH and FUA. Rename them to flush. Signed-off-by: Tejun Heo Cc: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-core.c | 21 ++++++------ block/blk-flush.c | 98 +++++++++++++++++++++++++++---------------------------- block/blk.h | 4 +-- 3 files changed, 60 insertions(+), 63 deletions(-) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index d316662..8870ae4 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -136,7 +136,7 @@ static void req_bio_endio(struct request *rq, struct bio *bio, { struct request_queue *q = rq->q; - if (&q->bar_rq != rq) { + if (&q->flush_rq != rq) { if (error) clear_bit(BIO_UPTODATE, &bio->bi_flags); else if (!test_bit(BIO_UPTODATE, &bio->bi_flags)) @@ -160,13 +160,12 @@ static void req_bio_endio(struct request *rq, struct bio *bio, if (bio->bi_size == 0) bio_endio(bio, error); } else { - /* - * Okay, this is the barrier request in progress, just - * record the error; + * Okay, this is the sequenced flush request in + * progress, just record the error; */ - if (error && !q->orderr) - q->orderr = error; + if (error && !q->flush_err) + q->flush_err = error; } } @@ -520,7 +519,7 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) init_timer(&q->unplug_timer); setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); INIT_LIST_HEAD(&q->timeout_list); - INIT_LIST_HEAD(&q->pending_barriers); + INIT_LIST_HEAD(&q->pending_flushes); INIT_WORK(&q->unplug_work, blk_unplug_work); kobject_init(&q->kobj, &blk_queue_ktype); @@ -1764,11 +1763,11 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes) static void blk_account_io_done(struct request *req) { /* - * Account IO completion. bar_rq isn't accounted as a normal - * IO on queueing nor completion. Accounting the containing - * request is enough. + * Account IO completion. flush_rq isn't accounted as a + * normal IO on queueing nor completion. Accounting the + * containing request is enough. */ - if (blk_do_io_stat(req) && req != &req->q->bar_rq) { + if (blk_do_io_stat(req) && req != &req->q->flush_rq) { unsigned long duration = jiffies - req->start_time; const int rw = rq_data_dir(req); struct hd_struct *part; diff --git a/block/blk-flush.c b/block/blk-flush.c index e8b2e5c..dd87322 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -9,41 +9,38 @@ #include "blk.h" -static struct request *queue_next_ordseq(struct request_queue *q); +static struct request *queue_next_fseq(struct request_queue *q); -/* - * Cache flushing for ordered writes handling - */ -unsigned blk_ordered_cur_seq(struct request_queue *q) +unsigned blk_flush_cur_seq(struct request_queue *q) { - if (!q->ordseq) + if (!q->flush_seq) return 0; - return 1 << ffz(q->ordseq); + return 1 << ffz(q->flush_seq); } -static struct request *blk_ordered_complete_seq(struct request_queue *q, - unsigned seq, int error) +static struct request *blk_flush_complete_seq(struct request_queue *q, + unsigned seq, int error) { struct request *next_rq = NULL; - if (error && !q->orderr) - q->orderr = error; + if (error && !q->flush_err) + q->flush_err = error; - BUG_ON(q->ordseq & seq); - q->ordseq |= seq; + BUG_ON(q->flush_seq & seq); + q->flush_seq |= seq; - if (blk_ordered_cur_seq(q) != QUEUE_ORDSEQ_DONE) { - /* not complete yet, queue the next ordered sequence */ - next_rq = queue_next_ordseq(q); + if (blk_flush_cur_seq(q) != QUEUE_FSEQ_DONE) { + /* not complete yet, queue the next flush sequence */ + next_rq = queue_next_fseq(q); } else { - /* complete this barrier request */ - __blk_end_request_all(q->orig_bar_rq, q->orderr); - q->orig_bar_rq = NULL; - q->ordseq = 0; - - /* dispatch the next barrier if there's one */ - if (!list_empty(&q->pending_barriers)) { - next_rq = list_entry_rq(q->pending_barriers.next); + /* complete this flush request */ + __blk_end_request_all(q->orig_flush_rq, q->flush_err); + q->orig_flush_rq = NULL; + q->flush_seq = 0; + + /* dispatch the next flush if there's one */ + if (!list_empty(&q->pending_flushes)) { + next_rq = list_entry_rq(q->pending_flushes.next); list_move(&next_rq->queuelist, &q->queue_head); } } @@ -53,19 +50,19 @@ static struct request *blk_ordered_complete_seq(struct request_queue *q, static void pre_flush_end_io(struct request *rq, int error) { elv_completed_request(rq->q, rq); - blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_PREFLUSH, error); + blk_flush_complete_seq(rq->q, QUEUE_FSEQ_PREFLUSH, error); } -static void bar_end_io(struct request *rq, int error) +static void flush_data_end_io(struct request *rq, int error) { elv_completed_request(rq->q, rq); - blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_BAR, error); + blk_flush_complete_seq(rq->q, QUEUE_FSEQ_DATA, error); } static void post_flush_end_io(struct request *rq, int error) { elv_completed_request(rq->q, rq); - blk_ordered_complete_seq(rq->q, QUEUE_ORDSEQ_POSTFLUSH, error); + blk_flush_complete_seq(rq->q, QUEUE_FSEQ_POSTFLUSH, error); } static void queue_flush(struct request_queue *q, struct request *rq, @@ -74,34 +71,34 @@ static void queue_flush(struct request_queue *q, struct request *rq, blk_rq_init(q, rq); rq->cmd_type = REQ_TYPE_FS; rq->cmd_flags = REQ_FLUSH; - rq->rq_disk = q->orig_bar_rq->rq_disk; + rq->rq_disk = q->orig_flush_rq->rq_disk; rq->end_io = end_io; elv_insert(q, rq, ELEVATOR_INSERT_FRONT); } -static struct request *queue_next_ordseq(struct request_queue *q) +static struct request *queue_next_fseq(struct request_queue *q) { - struct request *rq = &q->bar_rq; + struct request *rq = &q->flush_rq; - switch (blk_ordered_cur_seq(q)) { - case QUEUE_ORDSEQ_PREFLUSH: + switch (blk_flush_cur_seq(q)) { + case QUEUE_FSEQ_PREFLUSH: queue_flush(q, rq, pre_flush_end_io); break; - case QUEUE_ORDSEQ_BAR: + case QUEUE_FSEQ_DATA: /* initialize proxy request and queue it */ blk_rq_init(q, rq); - init_request_from_bio(rq, q->orig_bar_rq->bio); + init_request_from_bio(rq, q->orig_flush_rq->bio); rq->cmd_flags &= ~REQ_HARDBARRIER; if (q->ordered & QUEUE_ORDERED_DO_FUA) rq->cmd_flags |= REQ_FUA; - rq->end_io = bar_end_io; + rq->end_io = flush_data_end_io; elv_insert(q, rq, ELEVATOR_INSERT_FRONT); break; - case QUEUE_ORDSEQ_POSTFLUSH: + case QUEUE_FSEQ_POSTFLUSH: queue_flush(q, rq, post_flush_end_io); break; @@ -111,19 +108,20 @@ static struct request *queue_next_ordseq(struct request_queue *q) return rq; } -struct request *blk_do_ordered(struct request_queue *q, struct request *rq) +struct request *blk_do_flush(struct request_queue *q, struct request *rq) { unsigned skip = 0; if (!(rq->cmd_flags & REQ_HARDBARRIER)) return rq; - if (q->ordseq) { + if (q->flush_seq) { /* - * Barrier is already in progress and they can't be - * processed in parallel. Queue for later processing. + * Sequenced flush is already in progress and they + * can't be processed in parallel. Queue for later + * processing. */ - list_move_tail(&rq->queuelist, &q->pending_barriers); + list_move_tail(&rq->queuelist, &q->pending_flushes); return NULL; } @@ -138,11 +136,11 @@ struct request *blk_do_ordered(struct request_queue *q, struct request *rq) } /* - * Start a new ordered sequence + * Start a new flush sequence */ - q->orderr = 0; + q->flush_err = 0; q->ordered = q->next_ordered; - q->ordseq |= QUEUE_ORDSEQ_STARTED; + q->flush_seq |= QUEUE_FSEQ_STARTED; /* * For an empty barrier, there's no actual BAR request, which @@ -154,19 +152,19 @@ struct request *blk_do_ordered(struct request_queue *q, struct request *rq) /* stash away the original request */ blk_dequeue_request(rq); - q->orig_bar_rq = rq; + q->orig_flush_rq = rq; if (!(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) - skip |= QUEUE_ORDSEQ_PREFLUSH; + skip |= QUEUE_FSEQ_PREFLUSH; if (!(q->ordered & QUEUE_ORDERED_DO_BAR)) - skip |= QUEUE_ORDSEQ_BAR; + skip |= QUEUE_FSEQ_DATA; if (!(q->ordered & QUEUE_ORDERED_DO_POSTFLUSH)) - skip |= QUEUE_ORDSEQ_POSTFLUSH; + skip |= QUEUE_FSEQ_POSTFLUSH; /* complete skipped sequences and return the first sequence */ - return blk_ordered_complete_seq(q, skip, 0); + return blk_flush_complete_seq(q, skip, 0); } static void bio_end_empty_barrier(struct bio *bio, int err) diff --git a/block/blk.h b/block/blk.h index 08081e4b..24b92bd 100644 --- a/block/blk.h +++ b/block/blk.h @@ -51,7 +51,7 @@ static inline void blk_clear_rq_complete(struct request *rq) */ #define ELV_ON_HASH(rq) (!hlist_unhashed(&(rq)->hash)) -struct request *blk_do_ordered(struct request_queue *q, struct request *rq); +struct request *blk_do_flush(struct request_queue *q, struct request *rq); static inline struct request *__elv_next_request(struct request_queue *q) { @@ -60,7 +60,7 @@ static inline struct request *__elv_next_request(struct request_queue *q) while (1) { while (!list_empty(&q->queue_head)) { rq = list_entry_rq(q->queue_head.next); - rq = blk_do_ordered(q, rq); + rq = blk_do_flush(q, rq); if (rq) return rq; } -- cgit v1.1 From 4fed947cb311e5aa51781d316cefca836352f6ce Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:17 +0200 Subject: block: implement REQ_FLUSH/FUA based interface for FLUSH/FUA requests Now that the backend conversion is complete, export sequenced FLUSH/FUA capability through REQ_FLUSH/FUA flags. REQ_FLUSH means the device cache should be flushed before executing the request. REQ_FUA means that the data in the request should be on non-volatile media on completion. Block layer will choose the correct way of implementing the semantics and execute it. The request may be passed to the device directly if the device can handle it; otherwise, it will be sequenced using one or more proxy requests. Devices will never see REQ_FLUSH and/or FUA which it doesn't support. Also, unlike the original REQ_HARDBARRIER, REQ_FLUSH/FUA requests are never failed with -EOPNOTSUPP. If the underlying device doesn't support FLUSH/FUA, the block layer simply make those noop. IOW, it no longer distinguishes between writeback cache which doesn't support cache flush and writethrough/no cache. Devices which have WB cache w/o flush are very difficult to come by these days and there's nothing much we can do anyway, so it doesn't make sense to require everyone to implement -EOPNOTSUPP handling. This will simplify filesystems and block drivers as they can drop -EOPNOTSUPP retry logic for barriers. * QUEUE_ORDERED_* are removed and QUEUE_FSEQ_* are moved into blk-flush.c. * REQ_FLUSH w/o data can also be directly passed to drivers without sequencing but some drivers assume that zero length requests don't have rq->bio which isn't true for these requests requiring the use of proxy requests. * REQ_COMMON_MASK now includes REQ_FLUSH | REQ_FUA so that they are copied from bio to request. * WRITE_BARRIER is marked deprecated and WRITE_FLUSH, WRITE_FUA and WRITE_FLUSH_FUA are added. Signed-off-by: Tejun Heo Cc: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-core.c | 2 +- block/blk-flush.c | 85 ++++++++++++++++++++++++++++++------------------------- block/blk.h | 3 ++ 3 files changed, 50 insertions(+), 40 deletions(-) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index 8870ae4..18455c4 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1204,7 +1204,7 @@ static int __make_request(struct request_queue *q, struct bio *bio) spin_lock_irq(q->queue_lock); - if (bio->bi_rw & REQ_HARDBARRIER) { + if (bio->bi_rw & (REQ_FLUSH | REQ_FUA)) { where = ELEVATOR_INSERT_FRONT; goto get_rq; } diff --git a/block/blk-flush.c b/block/blk-flush.c index dd87322..452c552 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -1,5 +1,5 @@ /* - * Functions related to barrier IO handling + * Functions to sequence FLUSH and FUA writes. */ #include #include @@ -9,6 +9,15 @@ #include "blk.h" +/* FLUSH/FUA sequences */ +enum { + QUEUE_FSEQ_STARTED = (1 << 0), /* flushing in progress */ + QUEUE_FSEQ_PREFLUSH = (1 << 1), /* pre-flushing in progress */ + QUEUE_FSEQ_DATA = (1 << 2), /* data write in progress */ + QUEUE_FSEQ_POSTFLUSH = (1 << 3), /* post-flushing in progress */ + QUEUE_FSEQ_DONE = (1 << 4), +}; + static struct request *queue_next_fseq(struct request_queue *q); unsigned blk_flush_cur_seq(struct request_queue *q) @@ -79,6 +88,7 @@ static void queue_flush(struct request_queue *q, struct request *rq, static struct request *queue_next_fseq(struct request_queue *q) { + struct request *orig_rq = q->orig_flush_rq; struct request *rq = &q->flush_rq; switch (blk_flush_cur_seq(q)) { @@ -87,12 +97,11 @@ static struct request *queue_next_fseq(struct request_queue *q) break; case QUEUE_FSEQ_DATA: - /* initialize proxy request and queue it */ + /* initialize proxy request, inherit FLUSH/FUA and queue it */ blk_rq_init(q, rq); - init_request_from_bio(rq, q->orig_flush_rq->bio); - rq->cmd_flags &= ~REQ_HARDBARRIER; - if (q->ordered & QUEUE_ORDERED_DO_FUA) - rq->cmd_flags |= REQ_FUA; + init_request_from_bio(rq, orig_rq->bio); + rq->cmd_flags &= ~(REQ_FLUSH | REQ_FUA); + rq->cmd_flags |= orig_rq->cmd_flags & (REQ_FLUSH | REQ_FUA); rq->end_io = flush_data_end_io; elv_insert(q, rq, ELEVATOR_INSERT_FRONT); @@ -110,60 +119,58 @@ static struct request *queue_next_fseq(struct request_queue *q) struct request *blk_do_flush(struct request_queue *q, struct request *rq) { + unsigned int fflags = q->flush_flags; /* may change, cache it */ + bool has_flush = fflags & REQ_FLUSH, has_fua = fflags & REQ_FUA; + bool do_preflush = has_flush && (rq->cmd_flags & REQ_FLUSH); + bool do_postflush = has_flush && !has_fua && (rq->cmd_flags & REQ_FUA); unsigned skip = 0; - if (!(rq->cmd_flags & REQ_HARDBARRIER)) + /* + * Special case. If there's data but flush is not necessary, + * the request can be issued directly. + * + * Flush w/o data should be able to be issued directly too but + * currently some drivers assume that rq->bio contains + * non-zero data if it isn't NULL and empty FLUSH requests + * getting here usually have bio's without data. + */ + if (blk_rq_sectors(rq) && !do_preflush && !do_postflush) { + rq->cmd_flags &= ~REQ_FLUSH; + if (!has_fua) + rq->cmd_flags &= ~REQ_FUA; return rq; + } + /* + * Sequenced flushes can't be processed in parallel. If + * another one is already in progress, queue for later + * processing. + */ if (q->flush_seq) { - /* - * Sequenced flush is already in progress and they - * can't be processed in parallel. Queue for later - * processing. - */ list_move_tail(&rq->queuelist, &q->pending_flushes); return NULL; } - if (unlikely(q->next_ordered == QUEUE_ORDERED_NONE)) { - /* - * Queue ordering not supported. Terminate - * with prejudice. - */ - blk_dequeue_request(rq); - __blk_end_request_all(rq, -EOPNOTSUPP); - return NULL; - } - /* * Start a new flush sequence */ q->flush_err = 0; - q->ordered = q->next_ordered; q->flush_seq |= QUEUE_FSEQ_STARTED; - /* - * For an empty barrier, there's no actual BAR request, which - * in turn makes POSTFLUSH unnecessary. Mask them off. - */ - if (!blk_rq_sectors(rq)) - q->ordered &= ~(QUEUE_ORDERED_DO_BAR | - QUEUE_ORDERED_DO_POSTFLUSH); - - /* stash away the original request */ + /* adjust FLUSH/FUA of the original request and stash it away */ + rq->cmd_flags &= ~REQ_FLUSH; + if (!has_fua) + rq->cmd_flags &= ~REQ_FUA; blk_dequeue_request(rq); q->orig_flush_rq = rq; - if (!(q->ordered & QUEUE_ORDERED_DO_PREFLUSH)) + /* skip unneded sequences and return the first one */ + if (!do_preflush) skip |= QUEUE_FSEQ_PREFLUSH; - - if (!(q->ordered & QUEUE_ORDERED_DO_BAR)) + if (!blk_rq_sectors(rq)) skip |= QUEUE_FSEQ_DATA; - - if (!(q->ordered & QUEUE_ORDERED_DO_POSTFLUSH)) + if (!do_postflush) skip |= QUEUE_FSEQ_POSTFLUSH; - - /* complete skipped sequences and return the first sequence */ return blk_flush_complete_seq(q, skip, 0); } diff --git a/block/blk.h b/block/blk.h index 24b92bd..a09c18b 100644 --- a/block/blk.h +++ b/block/blk.h @@ -60,6 +60,9 @@ static inline struct request *__elv_next_request(struct request_queue *q) while (1) { while (!list_empty(&q->queue_head)) { rq = list_entry_rq(q->queue_head.next); + if (!(rq->cmd_flags & (REQ_FLUSH | REQ_FUA)) || + rq == &q->flush_rq) + return rq; rq = blk_do_flush(q, rq); if (rq) return rq; -- cgit v1.1 From 1e87901e189c8f01750d67485009fe3827c691bf Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:17 +0200 Subject: block: filter flush bio's in __generic_make_request() There are a number of make_request based drivers which don't support cache flushes. Filter out flush bio's in __generic_make_request() so that they don't have to worry about them. All FLUSH/FUA requests with data are converted to regular IO requests and empty ones are completed immediately. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-core.c | 13 +++++++++++++ 1 file changed, 13 insertions(+) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index 18455c4..495bdc4 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -1509,6 +1509,19 @@ static inline void __generic_make_request(struct bio *bio) if (bio_check_eod(bio, nr_sectors)) goto end_io; + /* + * Filter flush bio's early so that make_request based + * drivers without flush support don't have to worry + * about them. + */ + if ((bio->bi_rw & (REQ_FLUSH | REQ_FUA)) && !q->flush_flags) { + bio->bi_rw &= ~(REQ_FLUSH | REQ_FUA); + if (!nr_sectors) { + err = 0; + goto end_io; + } + } + if ((bio->bi_rw & REQ_DISCARD) && (!blk_queue_discard(q) || ((bio->bi_rw & REQ_SECURE) && -- cgit v1.1 From cde4c406d8fb051c5aafc917643adbb9dbd0abc2 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Fri, 3 Sep 2010 11:56:17 +0200 Subject: block: simplify queue_next_fseq We need to call blk_rq_init and elv_insert for all cases in queue_next_fseq, so take these calls into common code. Also move the end_io initialization from queue_flush into queue_next_fseq and rename queue_flush to init_flush_request now that it's old name doesn't apply anymore. Signed-off-by: Christoph Hellwig Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-flush.c | 26 ++++++++++---------------- 1 file changed, 10 insertions(+), 16 deletions(-) (limited to 'block') diff --git a/block/blk-flush.c b/block/blk-flush.c index 452c552..72905f8 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -74,16 +74,11 @@ static void post_flush_end_io(struct request *rq, int error) blk_flush_complete_seq(rq->q, QUEUE_FSEQ_POSTFLUSH, error); } -static void queue_flush(struct request_queue *q, struct request *rq, - rq_end_io_fn *end_io) +static void init_flush_request(struct request *rq, struct gendisk *disk) { - blk_rq_init(q, rq); rq->cmd_type = REQ_TYPE_FS; rq->cmd_flags = REQ_FLUSH; - rq->rq_disk = q->orig_flush_rq->rq_disk; - rq->end_io = end_io; - - elv_insert(q, rq, ELEVATOR_INSERT_FRONT); + rq->rq_disk = disk; } static struct request *queue_next_fseq(struct request_queue *q) @@ -91,29 +86,28 @@ static struct request *queue_next_fseq(struct request_queue *q) struct request *orig_rq = q->orig_flush_rq; struct request *rq = &q->flush_rq; + blk_rq_init(q, rq); + switch (blk_flush_cur_seq(q)) { case QUEUE_FSEQ_PREFLUSH: - queue_flush(q, rq, pre_flush_end_io); + init_flush_request(rq, orig_rq->rq_disk); + rq->end_io = pre_flush_end_io; break; - case QUEUE_FSEQ_DATA: - /* initialize proxy request, inherit FLUSH/FUA and queue it */ - blk_rq_init(q, rq); init_request_from_bio(rq, orig_rq->bio); rq->cmd_flags &= ~(REQ_FLUSH | REQ_FUA); rq->cmd_flags |= orig_rq->cmd_flags & (REQ_FLUSH | REQ_FUA); rq->end_io = flush_data_end_io; - - elv_insert(q, rq, ELEVATOR_INSERT_FRONT); break; - case QUEUE_FSEQ_POSTFLUSH: - queue_flush(q, rq, post_flush_end_io); + init_flush_request(rq, orig_rq->rq_disk); + rq->end_io = post_flush_end_io; break; - default: BUG(); } + + elv_insert(q, rq, ELEVATOR_INSERT_FRONT); return rq; } -- cgit v1.1 From 337238be1bf52e1242f940fc6fe83fb395e55057 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:17 +0200 Subject: block: initialize flush request with WRITE_FLUSH instead of REQ_FLUSH init_flush_request() only set REQ_FLUSH when initializing flush requests making them READ requests. Use WRITE_FLUSH instead. Signed-off-by: Tejun Heo Reported-by: Mike Snitzer Signed-off-by: Jens Axboe --- block/blk-flush.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block') diff --git a/block/blk-flush.c b/block/blk-flush.c index 72905f8..f357f1f 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -77,7 +77,7 @@ static void post_flush_end_io(struct request *rq, int error) static void init_flush_request(struct request *rq, struct gendisk *disk) { rq->cmd_type = REQ_TYPE_FS; - rq->cmd_flags = REQ_FLUSH; + rq->cmd_flags = WRITE_FLUSH; rq->rq_disk = disk; } -- cgit v1.1 From 47f70d5a6ca78c40a1c799d43506efbfed914f7b Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:17 +0200 Subject: block: kick queue after sequencing REQ_FLUSH/FUA While completing a request from a REQ_FLUSH/FUA sequence, another request can be pushed to the request queue. If a driver tests elv_queue_empty() before completing a request and runs the queue again only if the queue wasn't empty, this may lead to hang. Please note that most drivers either kick the queue unconditionally or test queue emptiness after completing the current request and don't have this problem. This patch removes this possibility by making REQ_FLUSH/FUA sequence code kick the queue if the queue was empty before completing a request from REQ_FLUSH/FUA sequence. Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-flush.c | 22 +++++++++++++++++++--- 1 file changed, 19 insertions(+), 3 deletions(-) (limited to 'block') diff --git a/block/blk-flush.c b/block/blk-flush.c index f357f1f..cb4c8440 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -56,22 +56,38 @@ static struct request *blk_flush_complete_seq(struct request_queue *q, return next_rq; } +static void blk_flush_complete_seq_end_io(struct request_queue *q, + unsigned seq, int error) +{ + bool was_empty = elv_queue_empty(q); + struct request *next_rq; + + next_rq = blk_flush_complete_seq(q, seq, error); + + /* + * Moving a request silently to empty queue_head may stall the + * queue. Kick the queue in those cases. + */ + if (was_empty && next_rq) + __blk_run_queue(q); +} + static void pre_flush_end_io(struct request *rq, int error) { elv_completed_request(rq->q, rq); - blk_flush_complete_seq(rq->q, QUEUE_FSEQ_PREFLUSH, error); + blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_PREFLUSH, error); } static void flush_data_end_io(struct request *rq, int error) { elv_completed_request(rq->q, rq); - blk_flush_complete_seq(rq->q, QUEUE_FSEQ_DATA, error); + blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_DATA, error); } static void post_flush_end_io(struct request *rq, int error) { elv_completed_request(rq->q, rq); - blk_flush_complete_seq(rq->q, QUEUE_FSEQ_POSTFLUSH, error); + blk_flush_complete_seq_end_io(rq->q, QUEUE_FSEQ_POSTFLUSH, error); } static void init_flush_request(struct request *rq, struct gendisk *disk) -- cgit v1.1 From 09d60c701b64b509f328cac72970eb894f485b9e Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:17 +0200 Subject: block: make sure FSEQ_DATA request has the same rq_disk as the original rq->rq_disk and bio->bi_bdev->bd_disk may differ if a request has passed through remapping drivers. FSEQ_DATA request incorrectly followed bio->bi_bdev->bd_disk ending up being issued w/ mismatching rq_disk. Make it follow orig_rq->rq_disk. Signed-off-by: Tejun Heo Reported-by: Kiyoshi Ueda Tested-by: Kiyoshi Ueda Signed-off-by: Jens Axboe --- block/blk-flush.c | 7 +++++++ 1 file changed, 7 insertions(+) (limited to 'block') diff --git a/block/blk-flush.c b/block/blk-flush.c index cb4c8440..7d1fc98 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -111,6 +111,13 @@ static struct request *queue_next_fseq(struct request_queue *q) break; case QUEUE_FSEQ_DATA: init_request_from_bio(rq, orig_rq->bio); + /* + * orig_rq->rq_disk may be different from + * bio->bi_bdev->bd_disk if orig_rq got here through + * remapping drivers. Make sure rq->rq_disk points + * to the same one as orig_rq. + */ + rq->rq_disk = orig_rq->rq_disk; rq->cmd_flags &= ~(REQ_FLUSH | REQ_FUA); rq->cmd_flags |= orig_rq->cmd_flags & (REQ_FLUSH | REQ_FUA); rq->end_io = flush_data_end_io; -- cgit v1.1 From d391a2dda2f1c993f094bdb3a8a342c5e0546553 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:17 +0200 Subject: block: use REQ_FLUSH in blkdev_issue_flush() Update blkdev_issue_flush() to use new REQ_FLUSH interface. Signed-off-by: Tejun Heo Cc: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-flush.c | 17 ++++++----------- 1 file changed, 6 insertions(+), 11 deletions(-) (limited to 'block') diff --git a/block/blk-flush.c b/block/blk-flush.c index 7d1fc98..62b7df9 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -191,13 +191,10 @@ struct request *blk_do_flush(struct request_queue *q, struct request *rq) return blk_flush_complete_seq(q, skip, 0); } -static void bio_end_empty_barrier(struct bio *bio, int err) +static void bio_end_flush(struct bio *bio, int err) { - if (err) { - if (err == -EOPNOTSUPP) - set_bit(BIO_EOPNOTSUPP, &bio->bi_flags); + if (err) clear_bit(BIO_UPTODATE, &bio->bi_flags); - } if (bio->bi_private) complete(bio->bi_private); bio_put(bio); @@ -235,19 +232,19 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, * some block devices may not have their queue correctly set up here * (e.g. loop device without a backing file) and so issuing a flush * here will panic. Ensure there is a request function before issuing - * the barrier. + * the flush. */ if (!q->make_request_fn) return -ENXIO; bio = bio_alloc(gfp_mask, 0); - bio->bi_end_io = bio_end_empty_barrier; + bio->bi_end_io = bio_end_flush; bio->bi_bdev = bdev; if (test_bit(BLKDEV_WAIT, &flags)) bio->bi_private = &wait; bio_get(bio); - submit_bio(WRITE_BARRIER, bio); + submit_bio(WRITE_FLUSH, bio); if (test_bit(BLKDEV_WAIT, &flags)) { wait_for_completion(&wait); /* @@ -259,9 +256,7 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, *error_sector = bio->bi_sector; } - if (bio_flagged(bio, BIO_EOPNOTSUPP)) - ret = -EOPNOTSUPP; - else if (!bio_flagged(bio, BIO_UPTODATE)) + if (!bio_flagged(bio, BIO_UPTODATE)) ret = -EIO; bio_put(bio); -- cgit v1.1 From 3a2edd0d6ddbd5fa3b389ea6db811285415ce6c8 Mon Sep 17 00:00:00 2001 From: Tejun Heo Date: Fri, 3 Sep 2010 11:56:18 +0200 Subject: block: make __blk_rq_prep_clone() copy most command flags Currently __blk_rq_prep_clone() copies only REQ_WRITE and REQ_DISCARD. There's no reason to omit other command flags and REQ_FUA needs to be copied to implement FUA support in request-based dm. REQ_COMMON_MASK which specifies flags to be copied from bio to request already identifies all the command flags. Define REQ_CLONE_MASK to be the same as REQ_COMMON_MASK for clarity and make __blk_rq_prep_clone() copy all flags in the mask. Signed-off-by: Tejun Heo Reviewed-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-core.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'block') diff --git a/block/blk-core.c b/block/blk-core.c index 495bdc4..2a5b192 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -2505,9 +2505,7 @@ EXPORT_SYMBOL_GPL(blk_rq_unprep_clone); static void __blk_rq_prep_clone(struct request *dst, struct request *src) { dst->cpu = src->cpu; - dst->cmd_flags = (rq_data_dir(src) | REQ_NOMERGE); - if (src->cmd_flags & REQ_DISCARD) - dst->cmd_flags |= REQ_DISCARD; + dst->cmd_flags = (src->cmd_flags & REQ_CLONE_MASK) | REQ_NOMERGE; dst->cmd_type = src->cmd_type; dst->__sector = blk_rq_pos(src); dst->__data_len = blk_rq_bytes(src); -- cgit v1.1 From 8c5553678237b7121355108e03c36086037d8975 Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Wed, 18 Aug 2010 05:29:22 -0400 Subject: block: remove the BLKDEV_IFL_BARRIER flag Remove support for barriers on discards, which is unused now. Also remove the DISCARD_NOBARRIER I/O type in favour of just setting the rw flags up locally in blkdev_issue_discard. tj: Also remove DISCARD_SECURE and use REQ_SECURE directly. Signed-off-by: Christoph Hellwig Acked-by: Mike Snitzer Signed-off-by: Tejun Heo Signed-off-by: Jens Axboe --- block/blk-lib.c | 18 ++---------------- 1 file changed, 2 insertions(+), 16 deletions(-) (limited to 'block') diff --git a/block/blk-lib.c b/block/blk-lib.c index c392029..fe2e6ed 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -39,8 +39,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, { DECLARE_COMPLETION_ONSTACK(wait); struct request_queue *q = bdev_get_queue(bdev); - int type = flags & BLKDEV_IFL_BARRIER ? - DISCARD_BARRIER : DISCARD_NOBARRIER; + int type = REQ_WRITE | REQ_DISCARD; unsigned int max_discard_sectors; struct bio *bio; int ret = 0; @@ -65,7 +64,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, if (flags & BLKDEV_IFL_SECURE) { if (!blk_queue_secdiscard(q)) return -EOPNOTSUPP; - type |= DISCARD_SECURE; + type |= REQ_SECURE; } while (nr_sects && !ret) { @@ -162,12 +161,6 @@ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, bb.wait = &wait; bb.end_io = NULL; - if (flags & BLKDEV_IFL_BARRIER) { - /* issue async barrier before the data */ - ret = blkdev_issue_flush(bdev, gfp_mask, NULL, 0); - if (ret) - return ret; - } submit: ret = 0; while (nr_sects != 0) { @@ -199,13 +192,6 @@ submit: issued++; submit_bio(WRITE, bio); } - /* - * When all data bios are in flight. Send final barrier if requeted. - */ - if (nr_sects == 0 && flags & BLKDEV_IFL_BARRIER) - ret = blkdev_issue_flush(bdev, gfp_mask, NULL, - flags & BLKDEV_IFL_WAIT); - if (flags & BLKDEV_IFL_WAIT) /* Wait for bios in-flight */ -- cgit v1.1 From dd3932eddf428571762596e17b65f5dc92ca361b Mon Sep 17 00:00:00 2001 From: Christoph Hellwig Date: Thu, 16 Sep 2010 20:51:46 +0200 Subject: block: remove BLKDEV_IFL_WAIT All the blkdev_issue_* helpers can only sanely be used for synchronous caller. To issue cache flushes or barriers asynchronously the caller needs to set up a bio by itself with a completion callback to move the asynchronous state machine ahead. So drop the BLKDEV_IFL_WAIT flag that is always specified when calling blkdev_issue_* and also remove the now unused flags argument to blkdev_issue_flush and blkdev_issue_zeroout. For blkdev_issue_discard we need to keep it for the secure discard flag, which gains a more descriptive name and loses the bitops vs flag confusion. Signed-off-by: Christoph Hellwig Signed-off-by: Jens Axboe --- block/blk-flush.c | 25 +++++++++++-------------- block/blk-lib.c | 21 ++++++++------------- block/ioctl.c | 4 ++-- 3 files changed, 21 insertions(+), 29 deletions(-) (limited to 'block') diff --git a/block/blk-flush.c b/block/blk-flush.c index 62b7df9..54b123d 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -205,7 +205,6 @@ static void bio_end_flush(struct bio *bio, int err) * @bdev: blockdev to issue flush for * @gfp_mask: memory allocation flags (for bio_alloc) * @error_sector: error sector - * @flags: BLKDEV_IFL_* flags to control behaviour * * Description: * Issue a flush for the block device in question. Caller can supply @@ -214,7 +213,7 @@ static void bio_end_flush(struct bio *bio, int err) * request was pushed in some internal queue for later handling. */ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, - sector_t *error_sector, unsigned long flags) + sector_t *error_sector) { DECLARE_COMPLETION_ONSTACK(wait); struct request_queue *q; @@ -240,21 +239,19 @@ int blkdev_issue_flush(struct block_device *bdev, gfp_t gfp_mask, bio = bio_alloc(gfp_mask, 0); bio->bi_end_io = bio_end_flush; bio->bi_bdev = bdev; - if (test_bit(BLKDEV_WAIT, &flags)) - bio->bi_private = &wait; + bio->bi_private = &wait; bio_get(bio); submit_bio(WRITE_FLUSH, bio); - if (test_bit(BLKDEV_WAIT, &flags)) { - wait_for_completion(&wait); - /* - * The driver must store the error location in ->bi_sector, if - * it supports it. For non-stacked drivers, this should be - * copied from blk_rq_pos(rq). - */ - if (error_sector) - *error_sector = bio->bi_sector; - } + wait_for_completion(&wait); + + /* + * The driver must store the error location in ->bi_sector, if + * it supports it. For non-stacked drivers, this should be + * copied from blk_rq_pos(rq). + */ + if (error_sector) + *error_sector = bio->bi_sector; if (!bio_flagged(bio, BIO_UPTODATE)) ret = -EIO; diff --git a/block/blk-lib.c b/block/blk-lib.c index fe2e6ed..1a320d2 100644 --- a/block/blk-lib.c +++ b/block/blk-lib.c @@ -61,7 +61,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, max_discard_sectors &= ~(disc_sects - 1); } - if (flags & BLKDEV_IFL_SECURE) { + if (flags & BLKDEV_DISCARD_SECURE) { if (!blk_queue_secdiscard(q)) return -EOPNOTSUPP; type |= REQ_SECURE; @@ -77,8 +77,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, bio->bi_sector = sector; bio->bi_end_io = blkdev_discard_end_io; bio->bi_bdev = bdev; - if (flags & BLKDEV_IFL_WAIT) - bio->bi_private = &wait; + bio->bi_private = &wait; if (nr_sects > max_discard_sectors) { bio->bi_size = max_discard_sectors << 9; @@ -92,8 +91,7 @@ int blkdev_issue_discard(struct block_device *bdev, sector_t sector, bio_get(bio); submit_bio(type, bio); - if (flags & BLKDEV_IFL_WAIT) - wait_for_completion(&wait); + wait_for_completion(&wait); if (bio_flagged(bio, BIO_EOPNOTSUPP)) ret = -EOPNOTSUPP; @@ -139,7 +137,6 @@ static void bio_batch_end_io(struct bio *bio, int err) * @sector: start sector * @nr_sects: number of sectors to write * @gfp_mask: memory allocation flags (for bio_alloc) - * @flags: BLKDEV_IFL_* flags to control behaviour * * Description: * Generate and issue number of bios with zerofiled pages. @@ -148,7 +145,7 @@ static void bio_batch_end_io(struct bio *bio, int err) */ int blkdev_issue_zeroout(struct block_device *bdev, sector_t sector, - sector_t nr_sects, gfp_t gfp_mask, unsigned long flags) + sector_t nr_sects, gfp_t gfp_mask) { int ret; struct bio *bio; @@ -174,8 +171,7 @@ submit: bio->bi_sector = sector; bio->bi_bdev = bdev; bio->bi_end_io = bio_batch_end_io; - if (flags & BLKDEV_IFL_WAIT) - bio->bi_private = &bb; + bio->bi_private = &bb; while (nr_sects != 0) { sz = min((sector_t) PAGE_SIZE >> 9 , nr_sects); @@ -193,10 +189,9 @@ submit: submit_bio(WRITE, bio); } - if (flags & BLKDEV_IFL_WAIT) - /* Wait for bios in-flight */ - while ( issued != atomic_read(&bb.done)) - wait_for_completion(&wait); + /* Wait for bios in-flight */ + while (issued != atomic_read(&bb.done)) + wait_for_completion(&wait); if (!test_bit(BIO_UPTODATE, &bb.flags)) /* One of bios in the batch was completed with error.*/ diff --git a/block/ioctl.c b/block/ioctl.c index d8052f0..cb2b909 100644 --- a/block/ioctl.c +++ b/block/ioctl.c @@ -116,7 +116,7 @@ static int blkdev_reread_part(struct block_device *bdev) static int blk_ioctl_discard(struct block_device *bdev, uint64_t start, uint64_t len, int secure) { - unsigned long flags = BLKDEV_IFL_WAIT; + unsigned long flags = 0; if (start & 511) return -EINVAL; @@ -128,7 +128,7 @@ static int blk_ioctl_discard(struct block_device *bdev, uint64_t start, if (start + len > (bdev->bd_inode->i_size >> 9)) return -EINVAL; if (secure) - flags |= BLKDEV_IFL_SECURE; + flags |= BLKDEV_DISCARD_SECURE; return blkdev_issue_discard(bdev, start, len, GFP_KERNEL, flags); } -- cgit v1.1