From 5e605b64a183a6c0e84cdb99a6f8acb1f8200437 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Wed, 5 Aug 2009 09:07:21 +0200 Subject: block: add blk-iopoll, a NAPI like approach for block devices This borrows some code from NAPI and implements a polled completion mode for block devices. The idea is the same as NAPI - instead of doing the command completion when the irq occurs, schedule a dedicated softirq in the hopes that we will complete more IO when the iopoll handler is invoked. Devices have a budget of commands assigned, and will stay in polled mode as long as they continue to consume their budget from the iopoll softirq handler. If they do not, the device is set back to interrupt completion mode. This patch holds the core bits for blk-iopoll, device driver support sold separately. Signed-off-by: Jens Axboe --- block/blk-iopoll.c | 220 +++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 220 insertions(+) create mode 100644 block/blk-iopoll.c (limited to 'block/blk-iopoll.c') diff --git a/block/blk-iopoll.c b/block/blk-iopoll.c new file mode 100644 index 0000000..566db1e --- /dev/null +++ b/block/blk-iopoll.c @@ -0,0 +1,220 @@ +/* + * Functions related to interrupt-poll handling in the block layer. This + * is similar to NAPI for network devices. + */ +#include +#include +#include +#include +#include +#include +#include +#include +#include + +#include "blk.h" + +int blk_iopoll_enabled = 1; +EXPORT_SYMBOL(blk_iopoll_enabled); + +static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll); + +/** + * blk_iopoll_sched - Schedule a run of the iopoll handler + * @iop: The parent iopoll structure + * + * Description: + * Add this blk_iopoll structure to the pending poll list and trigger the raise + * of the blk iopoll softirq. The driver must already have gotten a succesful + * return from blk_iopoll_sched_prep() before calling this. + **/ +void blk_iopoll_sched(struct blk_iopoll *iop) +{ + unsigned long flags; + + local_irq_save(flags); + list_add_tail(&iop->list, &__get_cpu_var(blk_cpu_iopoll)); + __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ); + local_irq_restore(flags); +} +EXPORT_SYMBOL(blk_iopoll_sched); + +/** + * __blk_iopoll_complete - Mark this @iop as un-polled again + * @iop: The parent iopoll structure + * + * Description: + * See blk_iopoll_complete(). This function must be called with interrupts disabled. + **/ +void __blk_iopoll_complete(struct blk_iopoll *iop) +{ + list_del(&iop->list); + smp_mb__before_clear_bit(); + clear_bit_unlock(IOPOLL_F_SCHED, &iop->state); +} +EXPORT_SYMBOL(__blk_iopoll_complete); + +/** + * blk_iopoll_complete - Mark this @iop as un-polled again + * @iop: The parent iopoll structure + * + * Description: + * If a driver consumes less than the assigned budget in its run of the iopoll + * handler, it'll end the polled mode by calling this function. The iopoll handler + * will not be invoked again before blk_iopoll_sched_prep() is called. + **/ +void blk_iopoll_complete(struct blk_iopoll *iopoll) +{ + unsigned long flags; + + local_irq_save(flags); + __blk_iopoll_complete(iopoll); + local_irq_restore(flags); +} +EXPORT_SYMBOL(blk_iopoll_complete); + +static void blk_iopoll_softirq(struct softirq_action *h) +{ + struct list_head *list = &__get_cpu_var(blk_cpu_iopoll); + unsigned long start_time = jiffies; + int rearm = 0, budget = 64; + + local_irq_disable(); + + while (!list_empty(list)) { + struct blk_iopoll *iop; + int work, weight; + + /* + * If softirq window is exhausted then punt. + */ + if (budget <= 0 || time_after(jiffies, start_time)) { + rearm = 1; + break; + } + + local_irq_enable(); + + /* Even though interrupts have been re-enabled, this + * access is safe because interrupts can only add new + * entries to the tail of this list, and only ->poll() + * calls can remove this head entry from the list. + */ + iop = list_entry(list->next, struct blk_iopoll, list); + + weight = iop->weight; + work = 0; + if (test_bit(IOPOLL_F_SCHED, &iop->state)) + work = iop->poll(iop, weight); + + budget -= work; + + local_irq_disable(); + + /* Drivers must not modify the NAPI state if they + * consume the entire weight. In such cases this code + * still "owns" the NAPI instance and therefore can + * move the instance around on the list at-will. + */ + if (work >= weight) { + if (blk_iopoll_disable_pending(iop)) + __blk_iopoll_complete(iop); + else + list_move_tail(&iop->list, list); + } + } + + if (rearm) + __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ); + + local_irq_enable(); +} + +/** + * blk_iopoll_disable - Disable iopoll on this @iop + * @iop: The parent iopoll structure + * + * Description: + * Disable io polling and wait for any pending callbacks to have completed. + **/ +void blk_iopoll_disable(struct blk_iopoll *iop) +{ + set_bit(IOPOLL_F_DISABLE, &iop->state); + while (test_and_set_bit(IOPOLL_F_SCHED, &iop->state)) + msleep(1); + clear_bit(IOPOLL_F_DISABLE, &iop->state); +} +EXPORT_SYMBOL(blk_iopoll_disable); + +/** + * blk_iopoll_enable - Enable iopoll on this @iop + * @iop: The parent iopoll structure + * + * Description: + * Enable iopoll on this @iop. Note that the handler run will not be scheduled, it + * will only mark it as active. + **/ +void blk_iopoll_enable(struct blk_iopoll *iop) +{ + BUG_ON(!test_bit(IOPOLL_F_SCHED, &iop->state)); + smp_mb__before_clear_bit(); + clear_bit_unlock(IOPOLL_F_SCHED, &iop->state); +} +EXPORT_SYMBOL(blk_iopoll_enable); + +/** + * blk_iopoll_init - Initialize this @iop + * @iop: The parent iopoll structure + * @weight: The default weight (or command completion budget) + * @poll_fn: The handler to invoke + * + * Description: + * Initialize this blk_iopoll structure. Before being actively used, the driver + * must call blk_iopoll_enable(). + **/ +void blk_iopoll_init(struct blk_iopoll *iop, int weight, blk_iopoll_fn *poll_fn) +{ + memset(iop, 0, sizeof(*iop)); + INIT_LIST_HEAD(&iop->list); + iop->weight = weight; + iop->poll = poll_fn; + set_bit(IOPOLL_F_SCHED, &iop->state); +} +EXPORT_SYMBOL(blk_iopoll_init); + +static int __cpuinit blk_iopoll_cpu_notify(struct notifier_block *self, + unsigned long action, void *hcpu) +{ + /* + * If a CPU goes away, splice its entries to the current CPU + * and trigger a run of the softirq + */ + if (action == CPU_DEAD || action == CPU_DEAD_FROZEN) { + int cpu = (unsigned long) hcpu; + + local_irq_disable(); + list_splice_init(&per_cpu(blk_cpu_iopoll, cpu), + &__get_cpu_var(blk_cpu_iopoll)); + raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ); + local_irq_enable(); + } + + return NOTIFY_OK; +} + +static struct notifier_block __cpuinitdata blk_iopoll_cpu_notifier = { + .notifier_call = blk_iopoll_cpu_notify, +}; + +static __init int blk_iopoll_setup(void) +{ + int i; + + for_each_possible_cpu(i) + INIT_LIST_HEAD(&per_cpu(blk_cpu_iopoll, i)); + + open_softirq(BLOCK_IOPOLL_SOFTIRQ, blk_iopoll_softirq); + register_hotcpu_notifier(&blk_iopoll_cpu_notifier); + return 0; +} +subsys_initcall(blk_iopoll_setup); -- cgit v1.1 From 1badcfbd7febd33f71fc02bda9112bd25e9c294b Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 6 Aug 2009 20:49:14 +0200 Subject: block: fix long lines in block/blk-iopoll.c Note sure why they happened in the first place, probably some bad terminal setting. Signed-off-by: Jens Axboe --- block/blk-iopoll.c | 26 ++++++++++++++------------ 1 file changed, 14 insertions(+), 12 deletions(-) (limited to 'block/blk-iopoll.c') diff --git a/block/blk-iopoll.c b/block/blk-iopoll.c index 566db1e..df6f192 100644 --- a/block/blk-iopoll.c +++ b/block/blk-iopoll.c @@ -24,9 +24,9 @@ static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll); * @iop: The parent iopoll structure * * Description: - * Add this blk_iopoll structure to the pending poll list and trigger the raise - * of the blk iopoll softirq. The driver must already have gotten a succesful - * return from blk_iopoll_sched_prep() before calling this. + * Add this blk_iopoll structure to the pending poll list and trigger the + * raise of the blk iopoll softirq. The driver must already have gotten a + * succesful return from blk_iopoll_sched_prep() before calling this. **/ void blk_iopoll_sched(struct blk_iopoll *iop) { @@ -44,7 +44,8 @@ EXPORT_SYMBOL(blk_iopoll_sched); * @iop: The parent iopoll structure * * Description: - * See blk_iopoll_complete(). This function must be called with interrupts disabled. + * See blk_iopoll_complete(). This function must be called with interrupts + * disabled. **/ void __blk_iopoll_complete(struct blk_iopoll *iop) { @@ -59,9 +60,10 @@ EXPORT_SYMBOL(__blk_iopoll_complete); * @iop: The parent iopoll structure * * Description: - * If a driver consumes less than the assigned budget in its run of the iopoll - * handler, it'll end the polled mode by calling this function. The iopoll handler - * will not be invoked again before blk_iopoll_sched_prep() is called. + * If a driver consumes less than the assigned budget in its run of the + * iopoll handler, it'll end the polled mode by calling this function. The + * iopoll handler will not be invoked again before blk_iopoll_sched_prep() + * is called. **/ void blk_iopoll_complete(struct blk_iopoll *iopoll) { @@ -151,13 +153,13 @@ EXPORT_SYMBOL(blk_iopoll_disable); * @iop: The parent iopoll structure * * Description: - * Enable iopoll on this @iop. Note that the handler run will not be scheduled, it - * will only mark it as active. + * Enable iopoll on this @iop. Note that the handler run will not be + * scheduled, it will only mark it as active. **/ void blk_iopoll_enable(struct blk_iopoll *iop) { BUG_ON(!test_bit(IOPOLL_F_SCHED, &iop->state)); - smp_mb__before_clear_bit(); + smp_mb__before_clear_bit(); clear_bit_unlock(IOPOLL_F_SCHED, &iop->state); } EXPORT_SYMBOL(blk_iopoll_enable); @@ -169,8 +171,8 @@ EXPORT_SYMBOL(blk_iopoll_enable); * @poll_fn: The handler to invoke * * Description: - * Initialize this blk_iopoll structure. Before being actively used, the driver - * must call blk_iopoll_enable(). + * Initialize this blk_iopoll structure. Before being actively used, the + * driver must call blk_iopoll_enable(). **/ void blk_iopoll_init(struct blk_iopoll *iop, int weight, blk_iopoll_fn *poll_fn) { -- cgit v1.1 From 37867ae7c549c58d44337e27738577ed8b7cd753 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 6 Aug 2009 20:50:48 +0200 Subject: block: adjust default budget for blk-iopoll It's not exported, I doubt we'll have a reason to change this... Signed-off-by: Jens Axboe --- block/blk-iopoll.c | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) (limited to 'block/blk-iopoll.c') diff --git a/block/blk-iopoll.c b/block/blk-iopoll.c index df6f192..0671d46 100644 --- a/block/blk-iopoll.c +++ b/block/blk-iopoll.c @@ -17,6 +17,8 @@ int blk_iopoll_enabled = 1; EXPORT_SYMBOL(blk_iopoll_enabled); +static unsigned int blk_iopoll_budget __read_mostly = 256; + static DEFINE_PER_CPU(struct list_head, blk_cpu_iopoll); /** @@ -78,8 +80,8 @@ EXPORT_SYMBOL(blk_iopoll_complete); static void blk_iopoll_softirq(struct softirq_action *h) { struct list_head *list = &__get_cpu_var(blk_cpu_iopoll); + int rearm = 0, budget = blk_iopoll_budget; unsigned long start_time = jiffies; - int rearm = 0, budget = 64; local_irq_disable(); -- cgit v1.1 From fca51d64c5baf64604bc1edc5c5f0e7ed176322f Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 6 Aug 2009 20:53:23 +0200 Subject: block: fix comment in blk-iopoll.c Signed-off-by: Jens Axboe --- block/blk-iopoll.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'block/blk-iopoll.c') diff --git a/block/blk-iopoll.c b/block/blk-iopoll.c index 0671d46..b9b3265 100644 --- a/block/blk-iopoll.c +++ b/block/blk-iopoll.c @@ -115,9 +115,12 @@ static void blk_iopoll_softirq(struct softirq_action *h) local_irq_disable(); - /* Drivers must not modify the NAPI state if they - * consume the entire weight. In such cases this code - * still "owns" the NAPI instance and therefore can + /* + * Drivers must not modify the iopoll state, if they + * consume their assigned weight (or more, some drivers can't + * easily just stop processing, they have to complete an + * entire mask of commands).In such cases this code + * still "owns" the iopoll instance and therefore can * move the instance around on the list at-will. */ if (work >= weight) { -- cgit v1.1 From a33dac26d42d6f156b3b05a929961bd8d904f6e2 Mon Sep 17 00:00:00 2001 From: Jens Axboe Date: Thu, 6 Aug 2009 20:53:45 +0200 Subject: block: use interrupts disabled version of raise_softirq_irqoff() We already have interrupts disabled at that point, so use the __raise_softirq_irqoff() variant. Signed-off-by: Jens Axboe --- block/blk-iopoll.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'block/blk-iopoll.c') diff --git a/block/blk-iopoll.c b/block/blk-iopoll.c index b9b3265..ca56420 100644 --- a/block/blk-iopoll.c +++ b/block/blk-iopoll.c @@ -202,7 +202,7 @@ static int __cpuinit blk_iopoll_cpu_notify(struct notifier_block *self, local_irq_disable(); list_splice_init(&per_cpu(blk_cpu_iopoll, cpu), &__get_cpu_var(blk_cpu_iopoll)); - raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ); + __raise_softirq_irqoff(BLOCK_IOPOLL_SOFTIRQ); local_irq_enable(); } -- cgit v1.1