diff options
author | Jens Axboe <jens.axboe@oracle.com> | 2008-09-14 05:55:09 -0700 |
---|---|---|
committer | Jens Axboe <jens.axboe@oracle.com> | 2008-10-09 08:56:13 +0200 |
commit | 242f9dcb8ba6f68fcd217a119a7648a4f69290e9 (patch) | |
tree | 1bfe245ffbc50d204d76665cd8f90d85100f86a1 /block | |
parent | 608aeef17a91747d6303de4df5e2c2e6899a95e8 (diff) | |
download | op-kernel-dev-242f9dcb8ba6f68fcd217a119a7648a4f69290e9.zip op-kernel-dev-242f9dcb8ba6f68fcd217a119a7648a4f69290e9.tar.gz |
block: unify request timeout handling
Right now SCSI and others do their own command timeout handling.
Move those bits to the block layer.
Instead of having a timer per command, we try to be a bit more clever
and simply have one per-queue. This avoids the overhead of having to
tear down and setup a timer for each command, so it will result in a lot
less timer fiddling.
Signed-off-by: Mike Anderson <andmike@linux.vnet.ibm.com>
Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
Diffstat (limited to 'block')
-rw-r--r-- | block/Makefile | 4 | ||||
-rw-r--r-- | block/blk-core.c | 7 | ||||
-rw-r--r-- | block/blk-settings.c | 12 | ||||
-rw-r--r-- | block/blk-softirq.c | 30 | ||||
-rw-r--r-- | block/blk-timeout.c | 155 | ||||
-rw-r--r-- | block/blk.h | 24 | ||||
-rw-r--r-- | block/elevator.c | 8 |
7 files changed, 226 insertions, 14 deletions
diff --git a/block/Makefile b/block/Makefile index 0da976c..bfe7304 100644 --- a/block/Makefile +++ b/block/Makefile @@ -4,8 +4,8 @@ obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ blk-barrier.o blk-settings.o blk-ioc.o blk-map.o \ - blk-exec.o blk-merge.o blk-softirq.o ioctl.o genhd.o \ - scsi_ioctl.o cmd-filter.o + blk-exec.o blk-merge.o blk-softirq.o blk-timeout.o \ + ioctl.o genhd.o scsi_ioctl.o cmd-filter.o obj-$(CONFIG_BLK_DEV_BSG) += bsg.o obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o diff --git a/block/blk-core.c b/block/blk-core.c index f25eb97..d768a8d 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -110,6 +110,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq) memset(rq, 0, sizeof(*rq)); INIT_LIST_HEAD(&rq->queuelist); + INIT_LIST_HEAD(&rq->timeout_list); rq->cpu = -1; rq->q = q; rq->sector = rq->hard_sector = (sector_t) -1; @@ -490,6 +491,8 @@ struct request_queue *blk_alloc_queue_node(gfp_t gfp_mask, int node_id) } init_timer(&q->unplug_timer); + setup_timer(&q->timeout, blk_rq_timed_out_timer, (unsigned long) q); + INIT_LIST_HEAD(&q->timeout_list); kobject_init(&q->kobj, &blk_queue_ktype); @@ -897,6 +900,8 @@ EXPORT_SYMBOL(blk_start_queueing); */ void blk_requeue_request(struct request_queue *q, struct request *rq) { + blk_delete_timer(rq); + blk_clear_rq_complete(rq); blk_add_trace_rq(q, rq, BLK_TA_REQUEUE); if (blk_rq_tagged(rq)) @@ -1650,6 +1655,8 @@ static void end_that_request_last(struct request *req, int error) { struct gendisk *disk = req->rq_disk; + blk_delete_timer(req); + if (blk_rq_tagged(req)) blk_queue_end_tag(req->q, req); diff --git a/block/blk-settings.c b/block/blk-settings.c index d70692b..1d0330d 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -77,6 +77,18 @@ void blk_queue_softirq_done(struct request_queue *q, softirq_done_fn *fn) } EXPORT_SYMBOL(blk_queue_softirq_done); +void blk_queue_rq_timeout(struct request_queue *q, unsigned int timeout) +{ + q->rq_timeout = timeout; +} +EXPORT_SYMBOL_GPL(blk_queue_rq_timeout); + +void blk_queue_rq_timed_out(struct request_queue *q, rq_timed_out_fn *fn) +{ + q->rq_timed_out_fn = fn; +} +EXPORT_SYMBOL_GPL(blk_queue_rq_timed_out); + /** * blk_queue_make_request - define an alternate make_request function for a device * @q: the request queue for the device to be affected diff --git a/block/blk-softirq.c b/block/blk-softirq.c index 3a1af55..7ab344a 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c @@ -101,18 +101,7 @@ static struct notifier_block __cpuinitdata blk_cpu_notifier = { .notifier_call = blk_cpu_notify, }; -/** - * blk_complete_request - end I/O on a request - * @req: the request being processed - * - * Description: - * Ends all I/O on a request. It does not handle partial completions, - * unless the driver actually implements this in its completion callback - * through requeueing. The actual completion happens out-of-order, - * through a softirq handler. The user must have registered a completion - * callback through blk_queue_softirq_done(). - **/ -void blk_complete_request(struct request *req) +void __blk_complete_request(struct request *req) { struct request_queue *q = req->q; unsigned long flags; @@ -151,6 +140,23 @@ do_local: local_irq_restore(flags); } + +/** + * blk_complete_request - end I/O on a request + * @req: the request being processed + * + * Description: + * Ends all I/O on a request. It does not handle partial completions, + * unless the driver actually implements this in its completion callback + * through requeueing. The actual completion happens out-of-order, + * through a softirq handler. The user must have registered a completion + * callback through blk_queue_softirq_done(). + **/ +void blk_complete_request(struct request *req) +{ + if (!blk_mark_rq_complete(req)) + __blk_complete_request(req); +} EXPORT_SYMBOL(blk_complete_request); __init int blk_softirq_init(void) diff --git a/block/blk-timeout.c b/block/blk-timeout.c new file mode 100644 index 0000000..b36d07bf --- /dev/null +++ b/block/blk-timeout.c @@ -0,0 +1,155 @@ +/* + * Functions related to generic timeout handling of requests. + */ +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/blkdev.h> + +#include "blk.h" + +/* + * blk_delete_timer - Delete/cancel timer for a given function. + * @req: request that we are canceling timer for + * + */ +void blk_delete_timer(struct request *req) +{ + struct request_queue *q = req->q; + + /* + * Nothing to detach + */ + if (!q->rq_timed_out_fn || !req->deadline) + return; + + list_del_init(&req->timeout_list); + + if (list_empty(&q->timeout_list)) + del_timer(&q->timeout); +} + +static void blk_rq_timed_out(struct request *req) +{ + struct request_queue *q = req->q; + enum blk_eh_timer_return ret; + + ret = q->rq_timed_out_fn(req); + switch (ret) { + case BLK_EH_HANDLED: + __blk_complete_request(req); + break; + case BLK_EH_RESET_TIMER: + blk_clear_rq_complete(req); + blk_add_timer(req); + break; + case BLK_EH_NOT_HANDLED: + /* + * LLD handles this for now but in the future + * we can send a request msg to abort the command + * and we can move more of the generic scsi eh code to + * the blk layer. + */ + break; + default: + printk(KERN_ERR "block: bad eh return: %d\n", ret); + break; + } +} + +void blk_rq_timed_out_timer(unsigned long data) +{ + struct request_queue *q = (struct request_queue *) data; + unsigned long flags, uninitialized_var(next), next_set = 0; + struct request *rq, *tmp; + + spin_lock_irqsave(q->queue_lock, flags); + + list_for_each_entry_safe(rq, tmp, &q->timeout_list, timeout_list) { + if (time_after_eq(jiffies, rq->deadline)) { + list_del_init(&rq->timeout_list); + + /* + * Check if we raced with end io completion + */ + if (blk_mark_rq_complete(rq)) + continue; + blk_rq_timed_out(rq); + } + if (!next_set) { + next = rq->deadline; + next_set = 1; + } else if (time_after(next, rq->deadline)) + next = rq->deadline; + } + + if (next_set && !list_empty(&q->timeout_list)) + mod_timer(&q->timeout, round_jiffies(next)); + + spin_unlock_irqrestore(q->queue_lock, flags); +} + +/** + * blk_abort_request -- Request request recovery for the specified command + * @req: pointer to the request of interest + * + * This function requests that the block layer start recovery for the + * request by deleting the timer and calling the q's timeout function. + * LLDDs who implement their own error recovery MAY ignore the timeout + * event if they generated blk_abort_req. Must hold queue lock. + */ +void blk_abort_request(struct request *req) +{ + blk_delete_timer(req); + blk_rq_timed_out(req); +} +EXPORT_SYMBOL_GPL(blk_abort_request); + +/** + * blk_add_timer - Start timeout timer for a single request + * @req: request that is about to start running. + * + * Notes: + * Each request has its own timer, and as it is added to the queue, we + * set up the timer. When the request completes, we cancel the timer. + */ +void blk_add_timer(struct request *req) +{ + struct request_queue *q = req->q; + unsigned long expiry; + + if (!q->rq_timed_out_fn) + return; + + BUG_ON(!list_empty(&req->timeout_list)); + BUG_ON(test_bit(REQ_ATOM_COMPLETE, &req->atomic_flags)); + + if (req->timeout) + req->deadline = jiffies + req->timeout; + else { + req->deadline = jiffies + q->rq_timeout; + /* + * Some LLDs, like scsi, peek at the timeout to prevent + * a command from being retried forever. + */ + req->timeout = q->rq_timeout; + } + list_add_tail(&req->timeout_list, &q->timeout_list); + + /* + * If the timer isn't already pending or this timeout is earlier + * than an existing one, modify the timer. Round to next nearest + * second. + */ + expiry = round_jiffies(req->deadline); + + /* + * We use ->deadline == 0 to detect whether a timer was added or + * not, so just increase to next jiffy for that specific case + */ + if (unlikely(!req->deadline)) + req->deadline = 1; + + if (!timer_pending(&q->timeout) || + time_before(expiry, q->timeout.expires)) + mod_timer(&q->timeout, expiry); +} diff --git a/block/blk.h b/block/blk.h index de74254..a4f4a50 100644 --- a/block/blk.h +++ b/block/blk.h @@ -17,6 +17,30 @@ void __blk_queue_free_tags(struct request_queue *q); void blk_unplug_work(struct work_struct *work); void blk_unplug_timeout(unsigned long data); +void blk_rq_timed_out_timer(unsigned long data); +void blk_delete_timer(struct request *); +void blk_add_timer(struct request *); + +/* + * Internal atomic flags for request handling + */ +enum rq_atomic_flags { + REQ_ATOM_COMPLETE = 0, +}; + +/* + * EH timer and IO completion will both attempt to 'grab' the request, make + * sure that only one of them suceeds + */ +static inline int blk_mark_rq_complete(struct request *rq) +{ + return test_and_set_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags); +} + +static inline void blk_clear_rq_complete(struct request *rq) +{ + clear_bit(REQ_ATOM_COMPLETE, &rq->atomic_flags); +} struct io_context *current_io_context(gfp_t gfp_flags, int node); diff --git a/block/elevator.c b/block/elevator.c index 8e3fc3a..a91fc59 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -36,6 +36,8 @@ #include <linux/hash.h> #include <linux/uaccess.h> +#include "blk.h" + static DEFINE_SPINLOCK(elv_list_lock); static LIST_HEAD(elv_list); @@ -771,6 +773,12 @@ struct request *elv_next_request(struct request_queue *q) */ rq->cmd_flags |= REQ_STARTED; blk_add_trace_rq(q, rq, BLK_TA_ISSUE); + + /* + * We are now handing the request to the hardware, + * add the timeout handler + */ + blk_add_timer(rq); } if (!q->boundary_rq || q->boundary_rq == rq) { |