diff options
Diffstat (limited to 'block')
-rw-r--r-- | block/blk-core.c | 113 | ||||
-rw-r--r-- | block/blk-merge.c | 31 | ||||
-rw-r--r-- | block/blk-settings.c | 2 | ||||
-rw-r--r-- | block/blk-softirq.c | 2 | ||||
-rw-r--r-- | block/blk-sysfs.c | 44 | ||||
-rw-r--r-- | block/blk.h | 16 | ||||
-rw-r--r-- | block/bsg.c | 12 | ||||
-rw-r--r-- | block/cfq-iosched.c | 206 | ||||
-rw-r--r-- | block/cmd-filter.c | 1 | ||||
-rw-r--r-- | block/elevator.c | 44 | ||||
-rw-r--r-- | block/scsi_ioctl.c | 21 |
11 files changed, 264 insertions, 228 deletions
diff --git a/block/blk-core.c b/block/blk-core.c index 859879d..07ab754 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -64,12 +64,11 @@ static struct workqueue_struct *kblockd_workqueue; static void drive_stat_acct(struct request *rq, int new_io) { - struct gendisk *disk = rq->rq_disk; struct hd_struct *part; int rw = rq_data_dir(rq); int cpu; - if (!blk_fs_request(rq) || !disk || !blk_do_io_stat(disk->queue)) + if (!blk_fs_request(rq) || !blk_do_io_stat(rq)) return; cpu = part_stat_lock(); @@ -485,11 +484,11 @@ static int blk_init_free_list(struct request_queue *q) { struct request_list *rl = &q->rq; - rl->count[READ] = rl->count[WRITE] = 0; - rl->starved[READ] = rl->starved[WRITE] = 0; + rl->count[BLK_RW_SYNC] = rl->count[BLK_RW_ASYNC] = 0; + rl->starved[BLK_RW_SYNC] = rl->starved[BLK_RW_ASYNC] = 0; rl->elvpriv = 0; - init_waitqueue_head(&rl->wait[READ]); - init_waitqueue_head(&rl->wait[WRITE]); + init_waitqueue_head(&rl->wait[BLK_RW_SYNC]); + init_waitqueue_head(&rl->wait[BLK_RW_ASYNC]); rl->rq_pool = mempool_create_node(BLKDEV_MIN_RQ, mempool_alloc_slab, mempool_free_slab, request_cachep, q->node); @@ -604,13 +603,10 @@ blk_init_queue_node(request_fn_proc *rfn, spinlock_t *lock, int node_id) q->queue_flags = QUEUE_FLAG_DEFAULT; q->queue_lock = lock; - blk_queue_segment_boundary(q, BLK_SEG_BOUNDARY_MASK); - + /* + * This also sets hw/phys segments, boundary and size + */ blk_queue_make_request(q, __make_request); - blk_queue_max_segment_size(q, MAX_SEGMENT_SIZE); - - blk_queue_max_hw_segments(q, MAX_HW_SEGMENTS); - blk_queue_max_phys_segments(q, MAX_PHYS_SEGMENTS); q->sg_reserved_size = INT_MAX; @@ -703,18 +699,18 @@ static void ioc_set_batching(struct request_queue *q, struct io_context *ioc) ioc->last_waited = jiffies; } -static void __freed_request(struct request_queue *q, int rw) +static void __freed_request(struct request_queue *q, int sync) { struct request_list *rl = &q->rq; - if (rl->count[rw] < queue_congestion_off_threshold(q)) - blk_clear_queue_congested(q, rw); + if (rl->count[sync] < queue_congestion_off_threshold(q)) + blk_clear_queue_congested(q, sync); - if (rl->count[rw] + 1 <= q->nr_requests) { - if (waitqueue_active(&rl->wait[rw])) - wake_up(&rl->wait[rw]); + if (rl->count[sync] + 1 <= q->nr_requests) { + if (waitqueue_active(&rl->wait[sync])) + wake_up(&rl->wait[sync]); - blk_clear_queue_full(q, rw); + blk_clear_queue_full(q, sync); } } @@ -722,21 +718,20 @@ static void __freed_request(struct request_queue *q, int rw) * A request has just been released. Account for it, update the full and * congestion status, wake up any waiters. Called under q->queue_lock. */ -static void freed_request(struct request_queue *q, int rw, int priv) +static void freed_request(struct request_queue *q, int sync, int priv) { struct request_list *rl = &q->rq; - rl->count[rw]--; + rl->count[sync]--; if (priv) rl->elvpriv--; - __freed_request(q, rw); + __freed_request(q, sync); - if (unlikely(rl->starved[rw ^ 1])) - __freed_request(q, rw ^ 1); + if (unlikely(rl->starved[sync ^ 1])) + __freed_request(q, sync ^ 1); } -#define blkdev_free_rq(list) list_entry((list)->next, struct request, queuelist) /* * Get a free request, queue_lock must be held. * Returns NULL on failure, with queue_lock held. @@ -748,15 +743,15 @@ static struct request *get_request(struct request_queue *q, int rw_flags, struct request *rq = NULL; struct request_list *rl = &q->rq; struct io_context *ioc = NULL; - const int rw = rw_flags & 0x01; + const bool is_sync = rw_is_sync(rw_flags) != 0; int may_queue, priv; may_queue = elv_may_queue(q, rw_flags); if (may_queue == ELV_MQUEUE_NO) goto rq_starved; - if (rl->count[rw]+1 >= queue_congestion_on_threshold(q)) { - if (rl->count[rw]+1 >= q->nr_requests) { + if (rl->count[is_sync]+1 >= queue_congestion_on_threshold(q)) { + if (rl->count[is_sync]+1 >= q->nr_requests) { ioc = current_io_context(GFP_ATOMIC, q->node); /* * The queue will fill after this allocation, so set @@ -764,9 +759,9 @@ static struct request *get_request(struct request_queue *q, int rw_flags, * This process will be allowed to complete a batch of * requests, others will be blocked. */ - if (!blk_queue_full(q, rw)) { + if (!blk_queue_full(q, is_sync)) { ioc_set_batching(q, ioc); - blk_set_queue_full(q, rw); + blk_set_queue_full(q, is_sync); } else { if (may_queue != ELV_MQUEUE_MUST && !ioc_batching(q, ioc)) { @@ -779,7 +774,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags, } } } - blk_set_queue_congested(q, rw); + blk_set_queue_congested(q, is_sync); } /* @@ -787,11 +782,11 @@ static struct request *get_request(struct request_queue *q, int rw_flags, * limit of requests, otherwise we could have thousands of requests * allocated with any setting of ->nr_requests */ - if (rl->count[rw] >= (3 * q->nr_requests / 2)) + if (rl->count[is_sync] >= (3 * q->nr_requests / 2)) goto out; - rl->count[rw]++; - rl->starved[rw] = 0; + rl->count[is_sync]++; + rl->starved[is_sync] = 0; priv = !test_bit(QUEUE_FLAG_ELVSWITCH, &q->queue_flags); if (priv) @@ -809,7 +804,7 @@ static struct request *get_request(struct request_queue *q, int rw_flags, * wait queue, but this is pretty rare. */ spin_lock_irq(q->queue_lock); - freed_request(q, rw, priv); + freed_request(q, is_sync, priv); /* * in the very unlikely event that allocation failed and no @@ -819,8 +814,8 @@ static struct request *get_request(struct request_queue *q, int rw_flags, * rq mempool into READ and WRITE */ rq_starved: - if (unlikely(rl->count[rw] == 0)) - rl->starved[rw] = 1; + if (unlikely(rl->count[is_sync] == 0)) + rl->starved[is_sync] = 1; goto out; } @@ -834,7 +829,7 @@ rq_starved: if (ioc_batching(q, ioc)) ioc->nr_batch_requests--; - trace_block_getrq(q, bio, rw); + trace_block_getrq(q, bio, rw_flags & 1); out: return rq; } @@ -848,7 +843,7 @@ out: static struct request *get_request_wait(struct request_queue *q, int rw_flags, struct bio *bio) { - const int rw = rw_flags & 0x01; + const bool is_sync = rw_is_sync(rw_flags) != 0; struct request *rq; rq = get_request(q, rw_flags, bio, GFP_NOIO); @@ -857,10 +852,10 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags, struct io_context *ioc; struct request_list *rl = &q->rq; - prepare_to_wait_exclusive(&rl->wait[rw], &wait, + prepare_to_wait_exclusive(&rl->wait[is_sync], &wait, TASK_UNINTERRUPTIBLE); - trace_block_sleeprq(q, bio, rw); + trace_block_sleeprq(q, bio, rw_flags & 1); __generic_unplug_device(q); spin_unlock_irq(q->queue_lock); @@ -876,7 +871,7 @@ static struct request *get_request_wait(struct request_queue *q, int rw_flags, ioc_set_batching(q, ioc); spin_lock_irq(q->queue_lock); - finish_wait(&rl->wait[rw], &wait); + finish_wait(&rl->wait[is_sync], &wait); rq = get_request(q, rw_flags, bio, GFP_NOIO); }; @@ -1067,19 +1062,22 @@ void __blk_put_request(struct request_queue *q, struct request *req) elv_completed_request(q, req); + /* this is a bio leak */ + WARN_ON(req->bio != NULL); + /* * Request may not have originated from ll_rw_blk. if not, * it didn't come out of our reserved rq pools */ if (req->cmd_flags & REQ_ALLOCED) { - int rw = rq_data_dir(req); + int is_sync = rq_is_sync(req) != 0; int priv = req->cmd_flags & REQ_ELVPRIV; BUG_ON(!list_empty(&req->queuelist)); BUG_ON(!hlist_unhashed(&req->hash)); blk_free_request(q, req); - freed_request(q, rw, priv); + freed_request(q, is_sync, priv); } } EXPORT_SYMBOL_GPL(__blk_put_request); @@ -1126,10 +1124,10 @@ void init_request_from_bio(struct request *req, struct bio *bio) if (bio_sync(bio)) req->cmd_flags |= REQ_RW_SYNC; - if (bio_unplug(bio)) - req->cmd_flags |= REQ_UNPLUG; if (bio_rw_meta(bio)) req->cmd_flags |= REQ_RW_META; + if (bio_noidle(bio)) + req->cmd_flags |= REQ_NOIDLE; req->errors = 0; req->hard_sector = req->sector = bio->bi_sector; @@ -1138,6 +1136,15 @@ void init_request_from_bio(struct request *req, struct bio *bio) blk_rq_bio_prep(req->q, req, bio); } +/* + * Only disabling plugging for non-rotational devices if it does tagging + * as well, otherwise we do need the proper merging + */ +static inline bool queue_should_plug(struct request_queue *q) +{ + return !(blk_queue_nonrot(q) && blk_queue_tagged(q)); +} + static int __make_request(struct request_queue *q, struct bio *bio) { struct request *req; @@ -1244,11 +1251,11 @@ get_rq: if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) || bio_flagged(bio, BIO_CPU_AFFINE)) req->cpu = blk_cpu_to_group(smp_processor_id()); - if (!blk_queue_nonrot(q) && elv_queue_empty(q)) + if (queue_should_plug(q) && elv_queue_empty(q)) blk_plug_device(q); add_request(q, req); out: - if (unplug || blk_queue_nonrot(q)) + if (unplug || !queue_should_plug(q)) __generic_unplug_device(q); spin_unlock_irq(q->queue_lock); return 0; @@ -1666,9 +1673,7 @@ EXPORT_SYMBOL(blkdev_dequeue_request); static void blk_account_io_completion(struct request *req, unsigned int bytes) { - struct gendisk *disk = req->rq_disk; - - if (!disk || !blk_do_io_stat(disk->queue)) + if (!blk_do_io_stat(req)) return; if (blk_fs_request(req)) { @@ -1685,9 +1690,7 @@ static void blk_account_io_completion(struct request *req, unsigned int bytes) static void blk_account_io_done(struct request *req) { - struct gendisk *disk = req->rq_disk; - - if (!disk || !blk_do_io_stat(disk->queue)) + if (!blk_do_io_stat(req)) return; /* @@ -1702,7 +1705,7 @@ static void blk_account_io_done(struct request *req) int cpu; cpu = part_stat_lock(); - part = disk_map_sector_rcu(disk, req->sector); + part = disk_map_sector_rcu(req->rq_disk, req->sector); part_stat_inc(cpu, part, ios[rw]); part_stat_add(cpu, part, ticks[rw], duration); diff --git a/block/blk-merge.c b/block/blk-merge.c index 5a244f0..63760ca 100644 --- a/block/blk-merge.c +++ b/block/blk-merge.c @@ -338,6 +338,22 @@ static int ll_merge_requests_fn(struct request_queue *q, struct request *req, return 1; } +static void blk_account_io_merge(struct request *req) +{ + if (blk_do_io_stat(req)) { + struct hd_struct *part; + int cpu; + + cpu = part_stat_lock(); + part = disk_map_sector_rcu(req->rq_disk, req->sector); + + part_round_stats(cpu, part); + part_dec_in_flight(part); + + part_stat_unlock(); + } +} + /* * Has to be called with the request spinlock acquired */ @@ -386,23 +402,14 @@ static int attempt_merge(struct request_queue *q, struct request *req, elv_merge_requests(q, req, next); - if (req->rq_disk) { - struct hd_struct *part; - int cpu; - - cpu = part_stat_lock(); - part = disk_map_sector_rcu(req->rq_disk, req->sector); - - part_round_stats(cpu, part); - part_dec_in_flight(part); - - part_stat_unlock(); - } + blk_account_io_merge(req); req->ioprio = ioprio_best(req->ioprio, next->ioprio); if (blk_rq_cpu_valid(next)) req->cpu = next->cpu; + /* owner-ship of bio passed from next to req */ + next->bio = NULL; __blk_put_request(q, next); return 1; } diff --git a/block/blk-settings.c b/block/blk-settings.c index 59fd05d..69c42ad 100644 --- a/block/blk-settings.c +++ b/block/blk-settings.c @@ -431,7 +431,7 @@ EXPORT_SYMBOL(blk_queue_segment_boundary); * * description: * set required memory and length alignment for direct dma transactions. - * this is used when buiding direct io requests for the queue. + * this is used when building direct io requests for the queue. * **/ void blk_queue_dma_alignment(struct request_queue *q, int mask) diff --git a/block/blk-softirq.c b/block/blk-softirq.c index ce0efc6..ee9c216 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c @@ -64,7 +64,7 @@ static int raise_blk_irq(int cpu, struct request *rq) data->info = rq; data->flags = 0; - __smp_call_function_single(cpu, data); + __smp_call_function_single(cpu, data, 0); return 0; } diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index e29ddfc..73f36be 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -48,28 +48,28 @@ queue_requests_store(struct request_queue *q, const char *page, size_t count) q->nr_requests = nr; blk_queue_congestion_threshold(q); - if (rl->count[READ] >= queue_congestion_on_threshold(q)) - blk_set_queue_congested(q, READ); - else if (rl->count[READ] < queue_congestion_off_threshold(q)) - blk_clear_queue_congested(q, READ); - - if (rl->count[WRITE] >= queue_congestion_on_threshold(q)) - blk_set_queue_congested(q, WRITE); - else if (rl->count[WRITE] < queue_congestion_off_threshold(q)) - blk_clear_queue_congested(q, WRITE); - - if (rl->count[READ] >= q->nr_requests) { - blk_set_queue_full(q, READ); - } else if (rl->count[READ]+1 <= q->nr_requests) { - blk_clear_queue_full(q, READ); - wake_up(&rl->wait[READ]); + if (rl->count[BLK_RW_SYNC] >= queue_congestion_on_threshold(q)) + blk_set_queue_congested(q, BLK_RW_SYNC); + else if (rl->count[BLK_RW_SYNC] < queue_congestion_off_threshold(q)) + blk_clear_queue_congested(q, BLK_RW_SYNC); + + if (rl->count[BLK_RW_ASYNC] >= queue_congestion_on_threshold(q)) + blk_set_queue_congested(q, BLK_RW_ASYNC); + else if (rl->count[BLK_RW_ASYNC] < queue_congestion_off_threshold(q)) + blk_clear_queue_congested(q, BLK_RW_ASYNC); + + if (rl->count[BLK_RW_SYNC] >= q->nr_requests) { + blk_set_queue_full(q, BLK_RW_SYNC); + } else if (rl->count[BLK_RW_SYNC]+1 <= q->nr_requests) { + blk_clear_queue_full(q, BLK_RW_SYNC); + wake_up(&rl->wait[BLK_RW_SYNC]); } - if (rl->count[WRITE] >= q->nr_requests) { - blk_set_queue_full(q, WRITE); - } else if (rl->count[WRITE]+1 <= q->nr_requests) { - blk_clear_queue_full(q, WRITE); - wake_up(&rl->wait[WRITE]); + if (rl->count[BLK_RW_ASYNC] >= q->nr_requests) { + blk_set_queue_full(q, BLK_RW_ASYNC); + } else if (rl->count[BLK_RW_ASYNC]+1 <= q->nr_requests) { + blk_clear_queue_full(q, BLK_RW_ASYNC); + wake_up(&rl->wait[BLK_RW_ASYNC]); } spin_unlock_irq(q->queue_lock); return ret; @@ -209,10 +209,14 @@ static ssize_t queue_iostats_store(struct request_queue *q, const char *page, ssize_t ret = queue_var_store(&stats, page, count); spin_lock_irq(q->queue_lock); + elv_quisce_start(q); + if (stats) queue_flag_set(QUEUE_FLAG_IO_STAT, q); else queue_flag_clear(QUEUE_FLAG_IO_STAT, q); + + elv_quisce_end(q); spin_unlock_irq(q->queue_lock); return ret; diff --git a/block/blk.h b/block/blk.h index 0dce92c..24fcaee 100644 --- a/block/blk.h +++ b/block/blk.h @@ -70,6 +70,10 @@ void blk_queue_congestion_threshold(struct request_queue *q); int blk_dev_init(void); +void elv_quisce_start(struct request_queue *q); +void elv_quisce_end(struct request_queue *q); + + /* * Return the threshold (number of used requests) at which the queue is * considered to be congested. It include a little hysteresis to keep the @@ -102,18 +106,20 @@ static inline int blk_cpu_to_group(int cpu) const struct cpumask *mask = cpu_coregroup_mask(cpu); return cpumask_first(mask); #elif defined(CONFIG_SCHED_SMT) - return first_cpu(per_cpu(cpu_sibling_map, cpu)); + return cpumask_first(topology_thread_cpumask(cpu)); #else return cpu; #endif } -static inline int blk_do_io_stat(struct request_queue *q) +static inline int blk_do_io_stat(struct request *rq) { - if (q) - return blk_queue_io_stat(q); + struct gendisk *disk = rq->rq_disk; - return 0; + if (!disk || !disk->queue) + return 0; + + return blk_queue_io_stat(disk->queue) && (rq->cmd_flags & REQ_ELVPRIV); } #endif diff --git a/block/bsg.c b/block/bsg.c index 0ce8806..206060e 100644 --- a/block/bsg.c +++ b/block/bsg.c @@ -218,9 +218,6 @@ bsg_validate_sgv4_hdr(struct request_queue *q, struct sg_io_v4 *hdr, int *rw) if (hdr->guard != 'Q') return -EINVAL; - if (hdr->dout_xfer_len > (q->max_sectors << 9) || - hdr->din_xfer_len > (q->max_sectors << 9)) - return -EIO; switch (hdr->protocol) { case BSG_PROTOCOL_SCSI: @@ -353,6 +350,8 @@ static void bsg_rq_end_io(struct request *rq, int uptodate) static void bsg_add_command(struct bsg_device *bd, struct request_queue *q, struct bsg_command *bc, struct request *rq) { + int at_head = (0 == (bc->hdr.flags & BSG_FLAG_Q_AT_TAIL)); + /* * add bc command to busy queue and submit rq for io */ @@ -368,7 +367,7 @@ static void bsg_add_command(struct bsg_device *bd, struct request_queue *q, dprintk("%s: queueing rq %p, bc %p\n", bd->name, rq, bc); rq->end_io_data = bc; - blk_execute_rq_nowait(q, NULL, rq, 1, bsg_rq_end_io); + blk_execute_rq_nowait(q, NULL, rq, at_head, bsg_rq_end_io); } static struct bsg_command *bsg_next_done_cmd(struct bsg_device *bd) @@ -924,6 +923,7 @@ static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg) struct request *rq; struct bio *bio, *bidi_bio = NULL; struct sg_io_v4 hdr; + int at_head; u8 sense[SCSI_SENSE_BUFFERSIZE]; if (copy_from_user(&hdr, uarg, sizeof(hdr))) @@ -936,7 +936,9 @@ static long bsg_ioctl(struct file *file, unsigned int cmd, unsigned long arg) bio = rq->bio; if (rq->next_rq) bidi_bio = rq->next_rq->bio; - blk_execute_rq(bd->queue, NULL, rq, 0); + + at_head = (0 == (hdr.flags & BSG_FLAG_Q_AT_TAIL)); + blk_execute_rq(bd->queue, NULL, rq, at_head); ret = blk_complete_sgv4_hdr_rq(rq, &hdr, bio, bidi_bio); if (copy_to_user(uarg, &hdr, sizeof(hdr))) diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 664ebfd..a4809de 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -160,6 +160,7 @@ struct cfq_queue { unsigned long slice_end; long slice_resid; + unsigned int slice_dispatch; /* pending metadata requests */ int meta_pending; @@ -176,13 +177,12 @@ struct cfq_queue { enum cfqq_state_flags { CFQ_CFQQ_FLAG_on_rr = 0, /* on round-robin busy list */ CFQ_CFQQ_FLAG_wait_request, /* waiting for a request */ + CFQ_CFQQ_FLAG_must_dispatch, /* must be allowed a dispatch */ CFQ_CFQQ_FLAG_must_alloc, /* must be allowed rq alloc */ CFQ_CFQQ_FLAG_must_alloc_slice, /* per-slice must_alloc flag */ - CFQ_CFQQ_FLAG_must_dispatch, /* must dispatch, even if expired */ CFQ_CFQQ_FLAG_fifo_expire, /* FIFO checked in this slice */ CFQ_CFQQ_FLAG_idle_window, /* slice idling enabled */ CFQ_CFQQ_FLAG_prio_changed, /* task priority has changed */ - CFQ_CFQQ_FLAG_queue_new, /* queue never been serviced */ CFQ_CFQQ_FLAG_slice_new, /* no requests dispatched in slice */ CFQ_CFQQ_FLAG_sync, /* synchronous queue */ }; @@ -203,13 +203,12 @@ static inline int cfq_cfqq_##name(const struct cfq_queue *cfqq) \ CFQ_CFQQ_FNS(on_rr); CFQ_CFQQ_FNS(wait_request); +CFQ_CFQQ_FNS(must_dispatch); CFQ_CFQQ_FNS(must_alloc); CFQ_CFQQ_FNS(must_alloc_slice); -CFQ_CFQQ_FNS(must_dispatch); CFQ_CFQQ_FNS(fifo_expire); CFQ_CFQQ_FNS(idle_window); CFQ_CFQQ_FNS(prio_changed); -CFQ_CFQQ_FNS(queue_new); CFQ_CFQQ_FNS(slice_new); CFQ_CFQQ_FNS(sync); #undef CFQ_CFQQ_FNS @@ -774,10 +773,15 @@ static void __cfq_set_active_queue(struct cfq_data *cfqd, if (cfqq) { cfq_log_cfqq(cfqd, cfqq, "set_active"); cfqq->slice_end = 0; + cfqq->slice_dispatch = 0; + + cfq_clear_cfqq_wait_request(cfqq); + cfq_clear_cfqq_must_dispatch(cfqq); cfq_clear_cfqq_must_alloc_slice(cfqq); cfq_clear_cfqq_fifo_expire(cfqq); cfq_mark_cfqq_slice_new(cfqq); - cfq_clear_cfqq_queue_new(cfqq); + + del_timer(&cfqd->idle_slice_timer); } cfqd->active_queue = cfqq; @@ -795,7 +799,6 @@ __cfq_slice_expired(struct cfq_data *cfqd, struct cfq_queue *cfqq, if (cfq_cfqq_wait_request(cfqq)) del_timer(&cfqd->idle_slice_timer); - cfq_clear_cfqq_must_dispatch(cfqq); cfq_clear_cfqq_wait_request(cfqq); /* @@ -924,7 +927,6 @@ static void cfq_arm_slice_timer(struct cfq_data *cfqd) (sample_valid(cic->ttime_samples) && cic->ttime_mean > 2)) return; - cfq_mark_cfqq_must_dispatch(cfqq); cfq_mark_cfqq_wait_request(cfqq); /* @@ -1010,7 +1012,7 @@ static struct cfq_queue *cfq_select_queue(struct cfq_data *cfqd) /* * The active queue has run out of time, expire it and select new. */ - if (cfq_slice_used(cfqq)) + if (cfq_slice_used(cfqq) && !cfq_cfqq_must_dispatch(cfqq)) goto expire; /* @@ -1053,66 +1055,6 @@ keep_queue: return cfqq; } -/* - * Dispatch some requests from cfqq, moving them to the request queue - * dispatch list. - */ -static int -__cfq_dispatch_requests(struct cfq_data *cfqd, struct cfq_queue *cfqq, - int max_dispatch) -{ - int dispatched = 0; - - BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list)); - - do { - struct request *rq; - - /* - * follow expired path, else get first next available - */ - rq = cfq_check_fifo(cfqq); - if (rq == NULL) - rq = cfqq->next_rq; - - /* - * finally, insert request into driver dispatch list - */ - cfq_dispatch_insert(cfqd->queue, rq); - - dispatched++; - - if (!cfqd->active_cic) { - atomic_inc(&RQ_CIC(rq)->ioc->refcount); - cfqd->active_cic = RQ_CIC(rq); - } - - if (RB_EMPTY_ROOT(&cfqq->sort_list)) - break; - - /* - * If there is a non-empty RT cfqq waiting for current - * cfqq's timeslice to complete, pre-empt this cfqq - */ - if (!cfq_class_rt(cfqq) && cfqd->busy_rt_queues) - break; - - } while (dispatched < max_dispatch); - - /* - * expire an async queue immediately if it has used up its slice. idle - * queue always expire after 1 dispatch round. - */ - if (cfqd->busy_queues > 1 && ((!cfq_cfqq_sync(cfqq) && - dispatched >= cfq_prio_to_maxrq(cfqd, cfqq)) || - cfq_class_idle(cfqq))) { - cfqq->slice_end = jiffies + 1; - cfq_slice_expired(cfqd, 0); - } - - return dispatched; -} - static int __cfq_forced_dispatch_cfqq(struct cfq_queue *cfqq) { int dispatched = 0; @@ -1146,11 +1088,45 @@ static int cfq_forced_dispatch(struct cfq_data *cfqd) return dispatched; } +/* + * Dispatch a request from cfqq, moving them to the request queue + * dispatch list. + */ +static void cfq_dispatch_request(struct cfq_data *cfqd, struct cfq_queue *cfqq) +{ + struct request *rq; + + BUG_ON(RB_EMPTY_ROOT(&cfqq->sort_list)); + + /* + * follow expired path, else get first next available + */ + rq = cfq_check_fifo(cfqq); + if (!rq) + rq = cfqq->next_rq; + + /* + * insert request into driver dispatch list + */ + cfq_dispatch_insert(cfqd->queue, rq); + + if (!cfqd->active_cic) { + struct cfq_io_context *cic = RQ_CIC(rq); + + atomic_inc(&cic->ioc->refcount); + cfqd->active_cic = cic; + } +} + +/* + * Find the cfqq that we need to service and move a request from that to the + * dispatch list + */ static int cfq_dispatch_requests(struct request_queue *q, int force) { struct cfq_data *cfqd = q->elevator->elevator_data; struct cfq_queue *cfqq; - int dispatched; + unsigned int max_dispatch; if (!cfqd->busy_queues) return 0; @@ -1158,29 +1134,63 @@ static int cfq_dispatch_requests(struct request_queue *q, int force) if (unlikely(force)) return cfq_forced_dispatch(cfqd); - dispatched = 0; - while ((cfqq = cfq_select_queue(cfqd)) != NULL) { - int max_dispatch; + cfqq = cfq_select_queue(cfqd); + if (!cfqq) + return 0; + + /* + * If this is an async queue and we have sync IO in flight, let it wait + */ + if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq)) + return 0; + + max_dispatch = cfqd->cfq_quantum; + if (cfq_class_idle(cfqq)) + max_dispatch = 1; - max_dispatch = cfqd->cfq_quantum; + /* + * Does this cfqq already have too much IO in flight? + */ + if (cfqq->dispatched >= max_dispatch) { + /* + * idle queue must always only have a single IO in flight + */ if (cfq_class_idle(cfqq)) - max_dispatch = 1; + return 0; - if (cfqq->dispatched >= max_dispatch && cfqd->busy_queues > 1) - break; + /* + * We have other queues, don't allow more IO from this one + */ + if (cfqd->busy_queues > 1) + return 0; - if (cfqd->sync_flight && !cfq_cfqq_sync(cfqq)) - break; + /* + * we are the only queue, allow up to 4 times of 'quantum' + */ + if (cfqq->dispatched >= 4 * max_dispatch) + return 0; + } - cfq_clear_cfqq_must_dispatch(cfqq); - cfq_clear_cfqq_wait_request(cfqq); - del_timer(&cfqd->idle_slice_timer); + /* + * Dispatch a request from this cfqq + */ + cfq_dispatch_request(cfqd, cfqq); + cfqq->slice_dispatch++; + cfq_clear_cfqq_must_dispatch(cfqq); - dispatched += __cfq_dispatch_requests(cfqd, cfqq, max_dispatch); + /* + * expire an async queue immediately if it has used up its slice. idle + * queue always expire after 1 dispatch round. + */ + if (cfqd->busy_queues > 1 && ((!cfq_cfqq_sync(cfqq) && + cfqq->slice_dispatch >= cfq_prio_to_maxrq(cfqd, cfqq)) || + cfq_class_idle(cfqq))) { + cfqq->slice_end = jiffies + 1; + cfq_slice_expired(cfqd, 0); } - cfq_log(cfqd, "dispatched=%d", dispatched); - return dispatched; + cfq_log(cfqd, "dispatched a request"); + return 1; } /* @@ -1506,7 +1516,6 @@ retry: cfqq->cfqd = cfqd; cfq_mark_cfqq_prio_changed(cfqq); - cfq_mark_cfqq_queue_new(cfqq); cfq_init_prio_data(cfqq, ioc); @@ -1893,15 +1902,13 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, if (cfqq == cfqd->active_queue) { /* - * if we are waiting for a request for this queue, let it rip - * immediately and flag that we must not expire this queue - * just now + * Remember that we saw a request from this process, but + * don't start queuing just yet. Otherwise we risk seeing lots + * of tiny requests, because we disrupt the normal plugging + * and merging. */ - if (cfq_cfqq_wait_request(cfqq)) { + if (cfq_cfqq_wait_request(cfqq)) cfq_mark_cfqq_must_dispatch(cfqq); - del_timer(&cfqd->idle_slice_timer); - blk_start_queueing(cfqd->queue); - } } else if (cfq_should_preempt(cfqd, cfqq, rq)) { /* * not the active queue - expire current slice if it is @@ -1910,7 +1917,6 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, * this new queue is RT and the current one is BE */ cfq_preempt_queue(cfqd, cfqq); - cfq_mark_cfqq_must_dispatch(cfqq); blk_start_queueing(cfqd->queue); } } @@ -1992,8 +1998,10 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) } if (cfq_slice_used(cfqq) || cfq_class_idle(cfqq)) cfq_slice_expired(cfqd, 1); - else if (sync && RB_EMPTY_ROOT(&cfqq->sort_list)) + else if (sync && !rq_noidle(rq) && + RB_EMPTY_ROOT(&cfqq->sort_list)) { cfq_arm_slice_timer(cfqd); + } } if (!cfqd->rq_in_driver) @@ -2170,6 +2178,12 @@ static void cfq_idle_slice_timer(unsigned long data) timed_out = 0; /* + * We saw a request before the queue expired, let it through + */ + if (cfq_cfqq_must_dispatch(cfqq)) + goto out_kick; + + /* * expired */ if (cfq_slice_used(cfqq)) @@ -2185,10 +2199,8 @@ static void cfq_idle_slice_timer(unsigned long data) /* * not expired and it has a request pending, let it dispatch */ - if (!RB_EMPTY_ROOT(&cfqq->sort_list)) { - cfq_mark_cfqq_must_dispatch(cfqq); + if (!RB_EMPTY_ROOT(&cfqq->sort_list)) goto out_kick; - } } expire: cfq_slice_expired(cfqd, timed_out); diff --git a/block/cmd-filter.c b/block/cmd-filter.c index 504b275..572bbc2 100644 --- a/block/cmd-filter.c +++ b/block/cmd-filter.c @@ -22,6 +22,7 @@ #include <linux/spinlock.h> #include <linux/capability.h> #include <linux/bitops.h> +#include <linux/blkdev.h> #include <scsi/scsi.h> #include <linux/cdrom.h> diff --git a/block/elevator.c b/block/elevator.c index 98259ed..fb81bcc 100644 --- a/block/elevator.c +++ b/block/elevator.c @@ -573,7 +573,7 @@ void elv_requeue_request(struct request_queue *q, struct request *rq) elv_insert(q, rq, ELEVATOR_INSERT_REQUEUE); } -static void elv_drain_elevator(struct request_queue *q) +void elv_drain_elevator(struct request_queue *q) { static int printed; while (q->elevator->ops->elevator_dispatch_fn(q, 1)) @@ -587,6 +587,31 @@ static void elv_drain_elevator(struct request_queue *q) } } +/* + * Call with queue lock held, interrupts disabled + */ +void elv_quisce_start(struct request_queue *q) +{ + queue_flag_set(QUEUE_FLAG_ELVSWITCH, q); + + /* + * make sure we don't have any requests in flight + */ + elv_drain_elevator(q); + while (q->rq.elvpriv) { + blk_start_queueing(q); + spin_unlock_irq(q->queue_lock); + msleep(10); + spin_lock_irq(q->queue_lock); + elv_drain_elevator(q); + } +} + +void elv_quisce_end(struct request_queue *q) +{ + queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q); +} + void elv_insert(struct request_queue *q, struct request *rq, int where) { struct list_head *pos; @@ -677,7 +702,7 @@ void elv_insert(struct request_queue *q, struct request *rq, int where) } if (unplug_it && blk_queue_plugged(q)) { - int nrq = q->rq.count[READ] + q->rq.count[WRITE] + int nrq = q->rq.count[BLK_RW_SYNC] + q->rq.count[BLK_RW_ASYNC] - q->in_flight; if (nrq >= q->unplug_thresh) @@ -1101,18 +1126,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) * Turn on BYPASS and drain all requests w/ elevator private data */ spin_lock_irq(q->queue_lock); - - queue_flag_set(QUEUE_FLAG_ELVSWITCH, q); - - elv_drain_elevator(q); - - while (q->rq.elvpriv) { - blk_start_queueing(q); - spin_unlock_irq(q->queue_lock); - msleep(10); - spin_lock_irq(q->queue_lock); - elv_drain_elevator(q); - } + elv_quisce_start(q); /* * Remember old elevator. @@ -1136,7 +1150,7 @@ static int elevator_switch(struct request_queue *q, struct elevator_type *new_e) */ elevator_exit(old_elevator); spin_lock_irq(q->queue_lock); - queue_flag_clear(QUEUE_FLAG_ELVSWITCH, q); + elv_quisce_end(q); spin_unlock_irq(q->queue_lock); blk_add_trace_msg(q, "elv switch: %s", e->elevator_type->elevator_name); diff --git a/block/scsi_ioctl.c b/block/scsi_ioctl.c index ee9c67d..626ee27 100644 --- a/block/scsi_ioctl.c +++ b/block/scsi_ioctl.c @@ -214,21 +214,10 @@ static int blk_fill_sghdr_rq(struct request_queue *q, struct request *rq, return 0; } -/* - * unmap a request that was previously mapped to this sg_io_hdr. handles - * both sg and non-sg sg_io_hdr. - */ -static int blk_unmap_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr) -{ - blk_rq_unmap_user(rq->bio); - blk_put_request(rq); - return 0; -} - static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr, struct bio *bio) { - int r, ret = 0; + int ret = 0; /* * fill in all the output members @@ -253,12 +242,10 @@ static int blk_complete_sghdr_rq(struct request *rq, struct sg_io_hdr *hdr, ret = -EFAULT; } - rq->bio = bio; - r = blk_unmap_sghdr_rq(rq, hdr); - if (ret) - r = ret; + blk_rq_unmap_user(bio); + blk_put_request(rq); - return r; + return ret; } static int sg_io(struct request_queue *q, struct gendisk *bd_disk, |