diff options
Diffstat (limited to 'block/as-iosched.c')
-rw-r--r-- | block/as-iosched.c | 674 |
1 files changed, 162 insertions, 512 deletions
diff --git a/block/as-iosched.c b/block/as-iosched.c index 5da56d4..50b95e4c 100644 --- a/block/as-iosched.c +++ b/block/as-iosched.c @@ -1,7 +1,7 @@ /* * Anticipatory & deadline i/o scheduler. * - * Copyright (C) 2002 Jens Axboe <axboe@suse.de> + * Copyright (C) 2002 Jens Axboe <axboe@kernel.dk> * Nick Piggin <nickpiggin@yahoo.com.au> * */ @@ -14,7 +14,6 @@ #include <linux/slab.h> #include <linux/init.h> #include <linux/compiler.h> -#include <linux/hash.h> #include <linux/rbtree.h> #include <linux/interrupt.h> @@ -93,9 +92,8 @@ struct as_data { struct rb_root sort_list[2]; struct list_head fifo_list[2]; - struct as_rq *next_arq[2]; /* next in sort order */ + struct request *next_rq[2]; /* next in sort order */ sector_t last_sector[2]; /* last REQ_SYNC & REQ_ASYNC sectors */ - struct hlist_head *hash; /* request hash */ unsigned long exit_prob; /* probability a task will exit while being waited on */ @@ -115,7 +113,6 @@ struct as_data { int write_batch_count; /* max # of reqs in a write batch */ int current_write_count; /* how many requests left this batch */ int write_batch_idled; /* has the write batch gone idle? */ - mempool_t *arq_pool; enum anticipation_status antic_status; unsigned long antic_start; /* jiffies: when it started */ @@ -133,8 +130,6 @@ struct as_data { unsigned long antic_expire; }; -#define list_entry_fifo(ptr) list_entry((ptr), struct as_rq, fifo) - /* * per-request data. */ @@ -150,40 +145,14 @@ enum arq_state { AS_RQ_POSTSCHED, /* when they shouldn't be */ }; -struct as_rq { - /* - * rbtree index, key is the starting offset - */ - struct rb_node rb_node; - sector_t rb_key; - - struct request *request; - - struct io_context *io_context; /* The submitting task */ - - /* - * request hash, key is the ending offset (for back merge lookup) - */ - struct hlist_node hash; - - /* - * expire fifo - */ - struct list_head fifo; - unsigned long expires; +#define RQ_IOC(rq) ((struct io_context *) (rq)->elevator_private) +#define RQ_STATE(rq) ((enum arq_state)(rq)->elevator_private2) +#define RQ_SET_STATE(rq, state) ((rq)->elevator_private2 = (void *) state) - unsigned int is_sync; - enum arq_state state; -}; - -#define RQ_DATA(rq) ((struct as_rq *) (rq)->elevator_private) - -static kmem_cache_t *arq_pool; - -static atomic_t ioc_count = ATOMIC_INIT(0); +static DEFINE_PER_CPU(unsigned long, ioc_count); static struct completion *ioc_gone; -static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq); +static void as_move_to_dispatch(struct as_data *ad, struct request *rq); static void as_antic_stop(struct as_data *ad); /* @@ -194,7 +163,8 @@ static void as_antic_stop(struct as_data *ad); static void free_as_io_context(struct as_io_context *aic) { kfree(aic); - if (atomic_dec_and_test(&ioc_count) && ioc_gone) + elv_ioc_count_dec(ioc_count); + if (ioc_gone && !elv_ioc_count_read(ioc_count)) complete(ioc_gone); } @@ -230,7 +200,7 @@ static struct as_io_context *alloc_as_io_context(void) ret->seek_total = 0; ret->seek_samples = 0; ret->seek_mean = 0; - atomic_inc(&ioc_count); + elv_ioc_count_inc(ioc_count); } return ret; @@ -240,9 +210,9 @@ static struct as_io_context *alloc_as_io_context(void) * If the current task has no AS IO context then create one and initialise it. * Then take a ref on the task's io context and return it. */ -static struct io_context *as_get_io_context(void) +static struct io_context *as_get_io_context(int node) { - struct io_context *ioc = get_io_context(GFP_ATOMIC); + struct io_context *ioc = get_io_context(GFP_ATOMIC, node); if (ioc && !ioc->aic) { ioc->aic = alloc_as_io_context(); if (!ioc->aic) { @@ -253,194 +223,43 @@ static struct io_context *as_get_io_context(void) return ioc; } -static void as_put_io_context(struct as_rq *arq) +static void as_put_io_context(struct request *rq) { struct as_io_context *aic; - if (unlikely(!arq->io_context)) + if (unlikely(!RQ_IOC(rq))) return; - aic = arq->io_context->aic; + aic = RQ_IOC(rq)->aic; - if (arq->is_sync == REQ_SYNC && aic) { + if (rq_is_sync(rq) && aic) { spin_lock(&aic->lock); set_bit(AS_TASK_IORUNNING, &aic->state); aic->last_end_request = jiffies; spin_unlock(&aic->lock); } - put_io_context(arq->io_context); -} - -/* - * the back merge hash support functions - */ -static const int as_hash_shift = 6; -#define AS_HASH_BLOCK(sec) ((sec) >> 3) -#define AS_HASH_FN(sec) (hash_long(AS_HASH_BLOCK((sec)), as_hash_shift)) -#define AS_HASH_ENTRIES (1 << as_hash_shift) -#define rq_hash_key(rq) ((rq)->sector + (rq)->nr_sectors) - -static inline void __as_del_arq_hash(struct as_rq *arq) -{ - hlist_del_init(&arq->hash); -} - -static inline void as_del_arq_hash(struct as_rq *arq) -{ - if (!hlist_unhashed(&arq->hash)) - __as_del_arq_hash(arq); -} - -static void as_add_arq_hash(struct as_data *ad, struct as_rq *arq) -{ - struct request *rq = arq->request; - - BUG_ON(!hlist_unhashed(&arq->hash)); - - hlist_add_head(&arq->hash, &ad->hash[AS_HASH_FN(rq_hash_key(rq))]); -} - -/* - * move hot entry to front of chain - */ -static inline void as_hot_arq_hash(struct as_data *ad, struct as_rq *arq) -{ - struct request *rq = arq->request; - struct hlist_head *head = &ad->hash[AS_HASH_FN(rq_hash_key(rq))]; - - if (hlist_unhashed(&arq->hash)) { - WARN_ON(1); - return; - } - - if (&arq->hash != head->first) { - hlist_del(&arq->hash); - hlist_add_head(&arq->hash, head); - } -} - -static struct request *as_find_arq_hash(struct as_data *ad, sector_t offset) -{ - struct hlist_head *hash_list = &ad->hash[AS_HASH_FN(offset)]; - struct hlist_node *entry, *next; - struct as_rq *arq; - - hlist_for_each_entry_safe(arq, entry, next, hash_list, hash) { - struct request *__rq = arq->request; - - BUG_ON(hlist_unhashed(&arq->hash)); - - if (!rq_mergeable(__rq)) { - as_del_arq_hash(arq); - continue; - } - - if (rq_hash_key(__rq) == offset) - return __rq; - } - - return NULL; + put_io_context(RQ_IOC(rq)); } /* * rb tree support functions */ -#define rb_entry_arq(node) rb_entry((node), struct as_rq, rb_node) -#define ARQ_RB_ROOT(ad, arq) (&(ad)->sort_list[(arq)->is_sync]) -#define rq_rb_key(rq) (rq)->sector - -/* - * as_find_first_arq finds the first (lowest sector numbered) request - * for the specified data_dir. Used to sweep back to the start of the disk - * (1-way elevator) after we process the last (highest sector) request. - */ -static struct as_rq *as_find_first_arq(struct as_data *ad, int data_dir) -{ - struct rb_node *n = ad->sort_list[data_dir].rb_node; - - if (n == NULL) - return NULL; - - for (;;) { - if (n->rb_left == NULL) - return rb_entry_arq(n); - - n = n->rb_left; - } -} - -/* - * Add the request to the rb tree if it is unique. If there is an alias (an - * existing request against the same sector), which can happen when using - * direct IO, then return the alias. - */ -static struct as_rq *__as_add_arq_rb(struct as_data *ad, struct as_rq *arq) -{ - struct rb_node **p = &ARQ_RB_ROOT(ad, arq)->rb_node; - struct rb_node *parent = NULL; - struct as_rq *__arq; - struct request *rq = arq->request; - - arq->rb_key = rq_rb_key(rq); - - while (*p) { - parent = *p; - __arq = rb_entry_arq(parent); - - if (arq->rb_key < __arq->rb_key) - p = &(*p)->rb_left; - else if (arq->rb_key > __arq->rb_key) - p = &(*p)->rb_right; - else - return __arq; - } - - rb_link_node(&arq->rb_node, parent, p); - rb_insert_color(&arq->rb_node, ARQ_RB_ROOT(ad, arq)); - - return NULL; -} +#define RQ_RB_ROOT(ad, rq) (&(ad)->sort_list[rq_is_sync((rq))]) -static void as_add_arq_rb(struct as_data *ad, struct as_rq *arq) +static void as_add_rq_rb(struct as_data *ad, struct request *rq) { - struct as_rq *alias; + struct request *alias; - while ((unlikely(alias = __as_add_arq_rb(ad, arq)))) { + while ((unlikely(alias = elv_rb_add(RQ_RB_ROOT(ad, rq), rq)))) { as_move_to_dispatch(ad, alias); as_antic_stop(ad); } } -static inline void as_del_arq_rb(struct as_data *ad, struct as_rq *arq) -{ - if (!RB_EMPTY_NODE(&arq->rb_node)) { - WARN_ON(1); - return; - } - - rb_erase(&arq->rb_node, ARQ_RB_ROOT(ad, arq)); - RB_CLEAR_NODE(&arq->rb_node); -} - -static struct request * -as_find_arq_rb(struct as_data *ad, sector_t sector, int data_dir) +static inline void as_del_rq_rb(struct as_data *ad, struct request *rq) { - struct rb_node *n = ad->sort_list[data_dir].rb_node; - struct as_rq *arq; - - while (n) { - arq = rb_entry_arq(n); - - if (sector < arq->rb_key) - n = n->rb_left; - else if (sector > arq->rb_key) - n = n->rb_right; - else - return arq->request; - } - - return NULL; + elv_rb_del(RQ_RB_ROOT(ad, rq), rq); } /* @@ -458,26 +277,26 @@ as_find_arq_rb(struct as_data *ad, sector_t sector, int data_dir) * as_choose_req selects the preferred one of two requests of the same data_dir * ignoring time - eg. timeouts, which is the job of as_dispatch_request */ -static struct as_rq * -as_choose_req(struct as_data *ad, struct as_rq *arq1, struct as_rq *arq2) +static struct request * +as_choose_req(struct as_data *ad, struct request *rq1, struct request *rq2) { int data_dir; sector_t last, s1, s2, d1, d2; int r1_wrap=0, r2_wrap=0; /* requests are behind the disk head */ const sector_t maxback = MAXBACK; - if (arq1 == NULL || arq1 == arq2) - return arq2; - if (arq2 == NULL) - return arq1; + if (rq1 == NULL || rq1 == rq2) + return rq2; + if (rq2 == NULL) + return rq1; - data_dir = arq1->is_sync; + data_dir = rq_is_sync(rq1); last = ad->last_sector[data_dir]; - s1 = arq1->request->sector; - s2 = arq2->request->sector; + s1 = rq1->sector; + s2 = rq2->sector; - BUG_ON(data_dir != arq2->is_sync); + BUG_ON(data_dir != rq_is_sync(rq2)); /* * Strict one way elevator _except_ in the case where we allow @@ -504,61 +323,58 @@ as_choose_req(struct as_data *ad, struct as_rq *arq1, struct as_rq *arq2) /* Found required data */ if (!r1_wrap && r2_wrap) - return arq1; + return rq1; else if (!r2_wrap && r1_wrap) - return arq2; + return rq2; else if (r1_wrap && r2_wrap) { /* both behind the head */ if (s1 <= s2) - return arq1; + return rq1; else - return arq2; + return rq2; } /* Both requests in front of the head */ if (d1 < d2) - return arq1; + return rq1; else if (d2 < d1) - return arq2; + return rq2; else { if (s1 >= s2) - return arq1; + return rq1; else - return arq2; + return rq2; } } /* - * as_find_next_arq finds the next request after @prev in elevator order. + * as_find_next_rq finds the next request after @prev in elevator order. * this with as_choose_req form the basis for how the scheduler chooses * what request to process next. Anticipation works on top of this. */ -static struct as_rq *as_find_next_arq(struct as_data *ad, struct as_rq *last) +static struct request * +as_find_next_rq(struct as_data *ad, struct request *last) { - const int data_dir = last->is_sync; - struct as_rq *ret; struct rb_node *rbnext = rb_next(&last->rb_node); struct rb_node *rbprev = rb_prev(&last->rb_node); - struct as_rq *arq_next, *arq_prev; + struct request *next = NULL, *prev = NULL; - BUG_ON(!RB_EMPTY_NODE(&last->rb_node)); + BUG_ON(RB_EMPTY_NODE(&last->rb_node)); if (rbprev) - arq_prev = rb_entry_arq(rbprev); - else - arq_prev = NULL; + prev = rb_entry_rq(rbprev); if (rbnext) - arq_next = rb_entry_arq(rbnext); + next = rb_entry_rq(rbnext); else { - arq_next = as_find_first_arq(ad, data_dir); - if (arq_next == last) - arq_next = NULL; - } + const int data_dir = rq_is_sync(last); - ret = as_choose_req(ad, arq_next, arq_prev); + rbnext = rb_first(&ad->sort_list[data_dir]); + if (rbnext && rbnext != &last->rb_node) + next = rb_entry_rq(rbnext); + } - return ret; + return as_choose_req(ad, next, prev); } /* @@ -712,8 +528,7 @@ static void as_update_seekdist(struct as_data *ad, struct as_io_context *aic, static void as_update_iohist(struct as_data *ad, struct as_io_context *aic, struct request *rq) { - struct as_rq *arq = RQ_DATA(rq); - int data_dir = arq->is_sync; + int data_dir = rq_is_sync(rq); unsigned long thinktime = 0; sector_t seek_dist; @@ -752,11 +567,11 @@ static void as_update_iohist(struct as_data *ad, struct as_io_context *aic, * previous one issued. */ static int as_close_req(struct as_data *ad, struct as_io_context *aic, - struct as_rq *arq) + struct request *rq) { unsigned long delay; /* milliseconds */ sector_t last = ad->last_sector[ad->batch_data_dir]; - sector_t next = arq->request->sector; + sector_t next = rq->sector; sector_t delta; /* acceptable close offset (in sectors) */ sector_t s; @@ -813,7 +628,7 @@ static int as_close_req(struct as_data *ad, struct as_io_context *aic, * * If this task has queued some other IO, do not enter enticipation. */ -static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq) +static int as_can_break_anticipation(struct as_data *ad, struct request *rq) { struct io_context *ioc; struct as_io_context *aic; @@ -821,7 +636,7 @@ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq) ioc = ad->io_context; BUG_ON(!ioc); - if (arq && ioc == arq->io_context) { + if (rq && ioc == RQ_IOC(rq)) { /* request from same process */ return 1; } @@ -848,7 +663,7 @@ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq) return 1; } - if (arq && arq->is_sync == REQ_SYNC && as_close_req(ad, aic, arq)) { + if (rq && rq_is_sync(rq) && as_close_req(ad, aic, rq)) { /* * Found a close request that is not one of ours. * @@ -864,7 +679,7 @@ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq) ad->exit_no_coop = (7*ad->exit_no_coop)/8; } - as_update_iohist(ad, aic, arq->request); + as_update_iohist(ad, aic, rq); return 1; } @@ -891,10 +706,10 @@ static int as_can_break_anticipation(struct as_data *ad, struct as_rq *arq) } /* - * as_can_anticipate indicates whether we should either run arq + * as_can_anticipate indicates whether we should either run rq * or keep anticipating a better request. */ -static int as_can_anticipate(struct as_data *ad, struct as_rq *arq) +static int as_can_anticipate(struct as_data *ad, struct request *rq) { if (!ad->io_context) /* @@ -908,7 +723,7 @@ static int as_can_anticipate(struct as_data *ad, struct as_rq *arq) */ return 0; - if (as_can_break_anticipation(ad, arq)) + if (as_can_break_anticipation(ad, rq)) /* * This request is a good candidate. Don't keep anticipating, * run it. @@ -926,16 +741,16 @@ static int as_can_anticipate(struct as_data *ad, struct as_rq *arq) } /* - * as_update_arq must be called whenever a request (arq) is added to + * as_update_rq must be called whenever a request (rq) is added to * the sort_list. This function keeps caches up to date, and checks if the * request might be one we are "anticipating" */ -static void as_update_arq(struct as_data *ad, struct as_rq *arq) +static void as_update_rq(struct as_data *ad, struct request *rq) { - const int data_dir = arq->is_sync; + const int data_dir = rq_is_sync(rq); - /* keep the next_arq cache up to date */ - ad->next_arq[data_dir] = as_choose_req(ad, arq, ad->next_arq[data_dir]); + /* keep the next_rq cache up to date */ + ad->next_rq[data_dir] = as_choose_req(ad, rq, ad->next_rq[data_dir]); /* * have we been anticipating this request? @@ -944,7 +759,7 @@ static void as_update_arq(struct as_data *ad, struct as_rq *arq) */ if (ad->antic_status == ANTIC_WAIT_REQ || ad->antic_status == ANTIC_WAIT_NEXT) { - if (as_can_break_anticipation(ad, arq)) + if (as_can_break_anticipation(ad, rq)) as_antic_stop(ad); } } @@ -984,12 +799,11 @@ static void update_write_batch(struct as_data *ad) static void as_completed_request(request_queue_t *q, struct request *rq) { struct as_data *ad = q->elevator->elevator_data; - struct as_rq *arq = RQ_DATA(rq); WARN_ON(!list_empty(&rq->queuelist)); - if (arq->state != AS_RQ_REMOVED) { - printk("arq->state %d\n", arq->state); + if (RQ_STATE(rq) != AS_RQ_REMOVED) { + printk("rq->state %d\n", RQ_STATE(rq)); WARN_ON(1); goto out; } @@ -1009,14 +823,14 @@ static void as_completed_request(request_queue_t *q, struct request *rq) * actually serviced. This should help devices with big TCQ windows * and writeback caches */ - if (ad->new_batch && ad->batch_data_dir == arq->is_sync) { + if (ad->new_batch && ad->batch_data_dir == rq_is_sync(rq)) { update_write_batch(ad); ad->current_batch_expires = jiffies + ad->batch_expire[REQ_SYNC]; ad->new_batch = 0; } - if (ad->io_context == arq->io_context && ad->io_context) { + if (ad->io_context == RQ_IOC(rq) && ad->io_context) { ad->antic_start = jiffies; ad->ioc_finished = 1; if (ad->antic_status == ANTIC_WAIT_REQ) { @@ -1028,9 +842,9 @@ static void as_completed_request(request_queue_t *q, struct request *rq) } } - as_put_io_context(arq); + as_put_io_context(rq); out: - arq->state = AS_RQ_POSTSCHED; + RQ_SET_STATE(rq, AS_RQ_POSTSCHED); } /* @@ -1041,27 +855,27 @@ out: */ static void as_remove_queued_request(request_queue_t *q, struct request *rq) { - struct as_rq *arq = RQ_DATA(rq); - const int data_dir = arq->is_sync; + const int data_dir = rq_is_sync(rq); struct as_data *ad = q->elevator->elevator_data; + struct io_context *ioc; - WARN_ON(arq->state != AS_RQ_QUEUED); + WARN_ON(RQ_STATE(rq) != AS_RQ_QUEUED); - if (arq->io_context && arq->io_context->aic) { - BUG_ON(!atomic_read(&arq->io_context->aic->nr_queued)); - atomic_dec(&arq->io_context->aic->nr_queued); + ioc = RQ_IOC(rq); + if (ioc && ioc->aic) { + BUG_ON(!atomic_read(&ioc->aic->nr_queued)); + atomic_dec(&ioc->aic->nr_queued); } /* - * Update the "next_arq" cache if we are about to remove its + * Update the "next_rq" cache if we are about to remove its * entry */ - if (ad->next_arq[data_dir] == arq) - ad->next_arq[data_dir] = as_find_next_arq(ad, arq); + if (ad->next_rq[data_dir] == rq) + ad->next_rq[data_dir] = as_find_next_rq(ad, rq); - list_del_init(&arq->fifo); - as_del_arq_hash(arq); - as_del_arq_rb(ad, arq); + rq_fifo_clear(rq); + as_del_rq_rb(ad, rq); } /* @@ -1074,7 +888,7 @@ static void as_remove_queued_request(request_queue_t *q, struct request *rq) */ static int as_fifo_expired(struct as_data *ad, int adir) { - struct as_rq *arq; + struct request *rq; long delta_jif; delta_jif = jiffies - ad->last_check_fifo[adir]; @@ -1088,9 +902,9 @@ static int as_fifo_expired(struct as_data *ad, int adir) if (list_empty(&ad->fifo_list[adir])) return 0; - arq = list_entry_fifo(ad->fifo_list[adir].next); + rq = rq_entry_fifo(ad->fifo_list[adir].next); - return time_after(jiffies, arq->expires); + return time_after(jiffies, rq_fifo_time(rq)); } /* @@ -1113,25 +927,25 @@ static inline int as_batch_expired(struct as_data *ad) /* * move an entry to dispatch queue */ -static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq) +static void as_move_to_dispatch(struct as_data *ad, struct request *rq) { - struct request *rq = arq->request; - const int data_dir = arq->is_sync; + const int data_dir = rq_is_sync(rq); - BUG_ON(!RB_EMPTY_NODE(&arq->rb_node)); + BUG_ON(RB_EMPTY_NODE(&rq->rb_node)); as_antic_stop(ad); ad->antic_status = ANTIC_OFF; /* * This has to be set in order to be correctly updated by - * as_find_next_arq + * as_find_next_rq */ ad->last_sector[data_dir] = rq->sector + rq->nr_sectors; if (data_dir == REQ_SYNC) { + struct io_context *ioc = RQ_IOC(rq); /* In case we have to anticipate after this */ - copy_io_context(&ad->io_context, &arq->io_context); + copy_io_context(&ad->io_context, &ioc); } else { if (ad->io_context) { put_io_context(ad->io_context); @@ -1143,19 +957,19 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq) } ad->ioc_finished = 0; - ad->next_arq[data_dir] = as_find_next_arq(ad, arq); + ad->next_rq[data_dir] = as_find_next_rq(ad, rq); /* * take it off the sort and fifo list, add to dispatch queue */ as_remove_queued_request(ad->q, rq); - WARN_ON(arq->state != AS_RQ_QUEUED); + WARN_ON(RQ_STATE(rq) != AS_RQ_QUEUED); elv_dispatch_sort(ad->q, rq); - arq->state = AS_RQ_DISPATCHED; - if (arq->io_context && arq->io_context->aic) - atomic_inc(&arq->io_context->aic->nr_dispatched); + RQ_SET_STATE(rq, AS_RQ_DISPATCHED); + if (RQ_IOC(rq) && RQ_IOC(rq)->aic) + atomic_inc(&RQ_IOC(rq)->aic->nr_dispatched); ad->nr_dispatched++; } @@ -1167,9 +981,9 @@ static void as_move_to_dispatch(struct as_data *ad, struct as_rq *arq) static int as_dispatch_request(request_queue_t *q, int force) { struct as_data *ad = q->elevator->elevator_data; - struct as_rq *arq; const int reads = !list_empty(&ad->fifo_list[REQ_SYNC]); const int writes = !list_empty(&ad->fifo_list[REQ_ASYNC]); + struct request *rq; if (unlikely(force)) { /* @@ -1185,14 +999,14 @@ static int as_dispatch_request(request_queue_t *q, int force) ad->changed_batch = 0; ad->new_batch = 0; - while (ad->next_arq[REQ_SYNC]) { - as_move_to_dispatch(ad, ad->next_arq[REQ_SYNC]); + while (ad->next_rq[REQ_SYNC]) { + as_move_to_dispatch(ad, ad->next_rq[REQ_SYNC]); dispatched++; } ad->last_check_fifo[REQ_SYNC] = jiffies; - while (ad->next_arq[REQ_ASYNC]) { - as_move_to_dispatch(ad, ad->next_arq[REQ_ASYNC]); + while (ad->next_rq[REQ_ASYNC]) { + as_move_to_dispatch(ad, ad->next_rq[REQ_ASYNC]); dispatched++; } ad->last_check_fifo[REQ_ASYNC] = jiffies; @@ -1216,19 +1030,19 @@ static int as_dispatch_request(request_queue_t *q, int force) /* * batch is still running or no reads or no writes */ - arq = ad->next_arq[ad->batch_data_dir]; + rq = ad->next_rq[ad->batch_data_dir]; if (ad->batch_data_dir == REQ_SYNC && ad->antic_expire) { if (as_fifo_expired(ad, REQ_SYNC)) goto fifo_expired; - if (as_can_anticipate(ad, arq)) { + if (as_can_anticipate(ad, rq)) { as_antic_waitreq(ad); return 0; } } - if (arq) { + if (rq) { /* we have a "next request" */ if (reads && !writes) ad->current_batch_expires = @@ -1256,7 +1070,7 @@ static int as_dispatch_request(request_queue_t *q, int force) ad->changed_batch = 1; } ad->batch_data_dir = REQ_SYNC; - arq = list_entry_fifo(ad->fifo_list[ad->batch_data_dir].next); + rq = rq_entry_fifo(ad->fifo_list[REQ_SYNC].next); ad->last_check_fifo[ad->batch_data_dir] = jiffies; goto dispatch_request; } @@ -1282,7 +1096,7 @@ dispatch_writes: ad->batch_data_dir = REQ_ASYNC; ad->current_write_count = ad->write_batch_count; ad->write_batch_idled = 0; - arq = ad->next_arq[ad->batch_data_dir]; + rq = ad->next_rq[ad->batch_data_dir]; goto dispatch_request; } @@ -1296,8 +1110,7 @@ dispatch_request: if (as_fifo_expired(ad, ad->batch_data_dir)) { fifo_expired: - arq = list_entry_fifo(ad->fifo_list[ad->batch_data_dir].next); - BUG_ON(arq == NULL); + rq = rq_entry_fifo(ad->fifo_list[ad->batch_data_dir].next); } if (ad->changed_batch) { @@ -1316,70 +1129,58 @@ fifo_expired: } /* - * arq is the selected appropriate request. + * rq is the selected appropriate request. */ - as_move_to_dispatch(ad, arq); + as_move_to_dispatch(ad, rq); return 1; } /* - * add arq to rbtree and fifo + * add rq to rbtree and fifo */ static void as_add_request(request_queue_t *q, struct request *rq) { struct as_data *ad = q->elevator->elevator_data; - struct as_rq *arq = RQ_DATA(rq); int data_dir; - arq->state = AS_RQ_NEW; + RQ_SET_STATE(rq, AS_RQ_NEW); - if (rq_data_dir(arq->request) == READ - || (arq->request->flags & REQ_RW_SYNC)) - arq->is_sync = 1; - else - arq->is_sync = 0; - data_dir = arq->is_sync; + data_dir = rq_is_sync(rq); - arq->io_context = as_get_io_context(); + rq->elevator_private = as_get_io_context(q->node); - if (arq->io_context) { - as_update_iohist(ad, arq->io_context->aic, arq->request); - atomic_inc(&arq->io_context->aic->nr_queued); + if (RQ_IOC(rq)) { + as_update_iohist(ad, RQ_IOC(rq)->aic, rq); + atomic_inc(&RQ_IOC(rq)->aic->nr_queued); } - as_add_arq_rb(ad, arq); - if (rq_mergeable(arq->request)) - as_add_arq_hash(ad, arq); + as_add_rq_rb(ad, rq); /* * set expire time (only used for reads) and add to fifo list */ - arq->expires = jiffies + ad->fifo_expire[data_dir]; - list_add_tail(&arq->fifo, &ad->fifo_list[data_dir]); + rq_set_fifo_time(rq, jiffies + ad->fifo_expire[data_dir]); + list_add_tail(&rq->queuelist, &ad->fifo_list[data_dir]); - as_update_arq(ad, arq); /* keep state machine up to date */ - arq->state = AS_RQ_QUEUED; + as_update_rq(ad, rq); /* keep state machine up to date */ + RQ_SET_STATE(rq, AS_RQ_QUEUED); } static void as_activate_request(request_queue_t *q, struct request *rq) { - struct as_rq *arq = RQ_DATA(rq); - - WARN_ON(arq->state != AS_RQ_DISPATCHED); - arq->state = AS_RQ_REMOVED; - if (arq->io_context && arq->io_context->aic) - atomic_dec(&arq->io_context->aic->nr_dispatched); + WARN_ON(RQ_STATE(rq) != AS_RQ_DISPATCHED); + RQ_SET_STATE(rq, AS_RQ_REMOVED); + if (RQ_IOC(rq) && RQ_IOC(rq)->aic) + atomic_dec(&RQ_IOC(rq)->aic->nr_dispatched); } static void as_deactivate_request(request_queue_t *q, struct request *rq) { - struct as_rq *arq = RQ_DATA(rq); - - WARN_ON(arq->state != AS_RQ_REMOVED); - arq->state = AS_RQ_DISPATCHED; - if (arq->io_context && arq->io_context->aic) - atomic_inc(&arq->io_context->aic->nr_dispatched); + WARN_ON(RQ_STATE(rq) != AS_RQ_REMOVED); + RQ_SET_STATE(rq, AS_RQ_DISPATCHED); + if (RQ_IOC(rq) && RQ_IOC(rq)->aic) + atomic_inc(&RQ_IOC(rq)->aic->nr_dispatched); } /* @@ -1396,93 +1197,35 @@ static int as_queue_empty(request_queue_t *q) && list_empty(&ad->fifo_list[REQ_SYNC]); } -static struct request *as_former_request(request_queue_t *q, - struct request *rq) -{ - struct as_rq *arq = RQ_DATA(rq); - struct rb_node *rbprev = rb_prev(&arq->rb_node); - struct request *ret = NULL; - - if (rbprev) - ret = rb_entry_arq(rbprev)->request; - - return ret; -} - -static struct request *as_latter_request(request_queue_t *q, - struct request *rq) -{ - struct as_rq *arq = RQ_DATA(rq); - struct rb_node *rbnext = rb_next(&arq->rb_node); - struct request *ret = NULL; - - if (rbnext) - ret = rb_entry_arq(rbnext)->request; - - return ret; -} - static int as_merge(request_queue_t *q, struct request **req, struct bio *bio) { struct as_data *ad = q->elevator->elevator_data; sector_t rb_key = bio->bi_sector + bio_sectors(bio); struct request *__rq; - int ret; - - /* - * see if the merge hash can satisfy a back merge - */ - __rq = as_find_arq_hash(ad, bio->bi_sector); - if (__rq) { - BUG_ON(__rq->sector + __rq->nr_sectors != bio->bi_sector); - - if (elv_rq_merge_ok(__rq, bio)) { - ret = ELEVATOR_BACK_MERGE; - goto out; - } - } /* * check for front merge */ - __rq = as_find_arq_rb(ad, rb_key, bio_data_dir(bio)); - if (__rq) { - BUG_ON(rb_key != rq_rb_key(__rq)); - - if (elv_rq_merge_ok(__rq, bio)) { - ret = ELEVATOR_FRONT_MERGE; - goto out; - } + __rq = elv_rb_find(&ad->sort_list[bio_data_dir(bio)], rb_key); + if (__rq && elv_rq_merge_ok(__rq, bio)) { + *req = __rq; + return ELEVATOR_FRONT_MERGE; } return ELEVATOR_NO_MERGE; -out: - if (ret) { - if (rq_mergeable(__rq)) - as_hot_arq_hash(ad, RQ_DATA(__rq)); - } - *req = __rq; - return ret; } -static void as_merged_request(request_queue_t *q, struct request *req) +static void as_merged_request(request_queue_t *q, struct request *req, int type) { struct as_data *ad = q->elevator->elevator_data; - struct as_rq *arq = RQ_DATA(req); - - /* - * hash always needs to be repositioned, key is end sector - */ - as_del_arq_hash(arq); - as_add_arq_hash(ad, arq); /* * if the merge was a front merge, we need to reposition request */ - if (rq_rb_key(req) != arq->rb_key) { - as_del_arq_rb(ad, arq); - as_add_arq_rb(ad, arq); + if (type == ELEVATOR_FRONT_MERGE) { + as_del_rq_rb(ad, req); + as_add_rq_rb(ad, req); /* * Note! At this stage of this and the next function, our next * request may not be optimal - eg the request may have "grown" @@ -1494,38 +1237,22 @@ static void as_merged_request(request_queue_t *q, struct request *req) static void as_merged_requests(request_queue_t *q, struct request *req, struct request *next) { - struct as_data *ad = q->elevator->elevator_data; - struct as_rq *arq = RQ_DATA(req); - struct as_rq *anext = RQ_DATA(next); - - BUG_ON(!arq); - BUG_ON(!anext); - /* - * reposition arq (this is the merged request) in hash, and in rbtree - * in case of a front merge + * if next expires before rq, assign its expire time to arq + * and move into next position (next will be deleted) in fifo */ - as_del_arq_hash(arq); - as_add_arq_hash(ad, arq); - - if (rq_rb_key(req) != arq->rb_key) { - as_del_arq_rb(ad, arq); - as_add_arq_rb(ad, arq); - } + if (!list_empty(&req->queuelist) && !list_empty(&next->queuelist)) { + if (time_before(rq_fifo_time(next), rq_fifo_time(req))) { + struct io_context *rioc = RQ_IOC(req); + struct io_context *nioc = RQ_IOC(next); - /* - * if anext expires before arq, assign its expire time to arq - * and move into anext position (anext will be deleted) in fifo - */ - if (!list_empty(&arq->fifo) && !list_empty(&anext->fifo)) { - if (time_before(anext->expires, arq->expires)) { - list_move(&arq->fifo, &anext->fifo); - arq->expires = anext->expires; + list_move(&req->queuelist, &next->queuelist); + rq_set_fifo_time(req, rq_fifo_time(next)); /* * Don't copy here but swap, because when anext is * removed below, it must contain the unused context */ - swap_io_context(&arq->io_context, &anext->io_context); + swap_io_context(&rioc, &nioc); } } @@ -1533,9 +1260,9 @@ static void as_merged_requests(request_queue_t *q, struct request *req, * kill knowledge of next, this one is a goner */ as_remove_queued_request(q, next); - as_put_io_context(anext); + as_put_io_context(next); - anext->state = AS_RQ_MERGED; + RQ_SET_STATE(next, AS_RQ_MERGED); } /* @@ -1553,61 +1280,18 @@ static void as_work_handler(void *data) unsigned long flags; spin_lock_irqsave(q->queue_lock, flags); - if (!as_queue_empty(q)) - q->request_fn(q); + blk_start_queueing(q); spin_unlock_irqrestore(q->queue_lock, flags); } -static void as_put_request(request_queue_t *q, struct request *rq) -{ - struct as_data *ad = q->elevator->elevator_data; - struct as_rq *arq = RQ_DATA(rq); - - if (!arq) { - WARN_ON(1); - return; - } - - if (unlikely(arq->state != AS_RQ_POSTSCHED && - arq->state != AS_RQ_PRESCHED && - arq->state != AS_RQ_MERGED)) { - printk("arq->state %d\n", arq->state); - WARN_ON(1); - } - - mempool_free(arq, ad->arq_pool); - rq->elevator_private = NULL; -} - -static int as_set_request(request_queue_t *q, struct request *rq, - struct bio *bio, gfp_t gfp_mask) -{ - struct as_data *ad = q->elevator->elevator_data; - struct as_rq *arq = mempool_alloc(ad->arq_pool, gfp_mask); - - if (arq) { - memset(arq, 0, sizeof(*arq)); - RB_CLEAR_NODE(&arq->rb_node); - arq->request = rq; - arq->state = AS_RQ_PRESCHED; - arq->io_context = NULL; - INIT_HLIST_NODE(&arq->hash); - INIT_LIST_HEAD(&arq->fifo); - rq->elevator_private = arq; - return 0; - } - - return 1; -} - -static int as_may_queue(request_queue_t *q, int rw, struct bio *bio) +static int as_may_queue(request_queue_t *q, int rw) { int ret = ELV_MQUEUE_MAY; struct as_data *ad = q->elevator->elevator_data; struct io_context *ioc; if (ad->antic_status == ANTIC_WAIT_REQ || ad->antic_status == ANTIC_WAIT_NEXT) { - ioc = as_get_io_context(); + ioc = as_get_io_context(q->node); if (ad->io_context == ioc) ret = ELV_MQUEUE_MUST; put_io_context(ioc); @@ -1626,23 +1310,16 @@ static void as_exit_queue(elevator_t *e) BUG_ON(!list_empty(&ad->fifo_list[REQ_SYNC])); BUG_ON(!list_empty(&ad->fifo_list[REQ_ASYNC])); - mempool_destroy(ad->arq_pool); put_io_context(ad->io_context); - kfree(ad->hash); kfree(ad); } /* - * initialize elevator private data (as_data), and alloc a arq for - * each request on the free lists + * initialize elevator private data (as_data). */ static void *as_init_queue(request_queue_t *q, elevator_t *e) { struct as_data *ad; - int i; - - if (!arq_pool) - return NULL; ad = kmalloc_node(sizeof(*ad), GFP_KERNEL, q->node); if (!ad) @@ -1651,30 +1328,12 @@ static void *as_init_queue(request_queue_t *q, elevator_t *e) ad->q = q; /* Identify what queue the data belongs to */ - ad->hash = kmalloc_node(sizeof(struct hlist_head)*AS_HASH_ENTRIES, - GFP_KERNEL, q->node); - if (!ad->hash) { - kfree(ad); - return NULL; - } - - ad->arq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, - mempool_free_slab, arq_pool, q->node); - if (!ad->arq_pool) { - kfree(ad->hash); - kfree(ad); - return NULL; - } - /* anticipatory scheduling helpers */ ad->antic_timer.function = as_antic_timeout; ad->antic_timer.data = (unsigned long)q; init_timer(&ad->antic_timer); INIT_WORK(&ad->antic_work, as_work_handler, q); - for (i = 0; i < AS_HASH_ENTRIES; i++) - INIT_HLIST_HEAD(&ad->hash[i]); - INIT_LIST_HEAD(&ad->fifo_list[REQ_SYNC]); INIT_LIST_HEAD(&ad->fifo_list[REQ_ASYNC]); ad->sort_list[REQ_SYNC] = RB_ROOT; @@ -1787,10 +1446,8 @@ static struct elevator_type iosched_as = { .elevator_deactivate_req_fn = as_deactivate_request, .elevator_queue_empty_fn = as_queue_empty, .elevator_completed_req_fn = as_completed_request, - .elevator_former_req_fn = as_former_request, - .elevator_latter_req_fn = as_latter_request, - .elevator_set_req_fn = as_set_request, - .elevator_put_req_fn = as_put_request, + .elevator_former_req_fn = elv_rb_former_request, + .elevator_latter_req_fn = elv_rb_latter_request, .elevator_may_queue_fn = as_may_queue, .elevator_init_fn = as_init_queue, .elevator_exit_fn = as_exit_queue, @@ -1806,11 +1463,6 @@ static int __init as_init(void) { int ret; - arq_pool = kmem_cache_create("as_arq", sizeof(struct as_rq), - 0, 0, NULL, NULL); - if (!arq_pool) - return -ENOMEM; - ret = elv_register(&iosched_as); if (!ret) { /* @@ -1822,21 +1474,19 @@ static int __init as_init(void) return 0; } - kmem_cache_destroy(arq_pool); return ret; } static void __exit as_exit(void) { - DECLARE_COMPLETION(all_gone); + DECLARE_COMPLETION_ONSTACK(all_gone); elv_unregister(&iosched_as); ioc_gone = &all_gone; /* ioc_gone's update must be visible before reading ioc_count */ smp_wmb(); - if (atomic_read(&ioc_count)) + if (elv_ioc_count_read(ioc_count)) wait_for_completion(ioc_gone); synchronize_rcu(); - kmem_cache_destroy(arq_pool); } module_init(as_init); |