diff options
author | Jens Axboe <axboe@kernel.dk> | 2011-10-19 14:30:42 +0200 |
---|---|---|
committer | Jens Axboe <axboe@kernel.dk> | 2011-10-19 14:30:42 +0200 |
commit | 5c04b426f2e8b46cfc7969a35b2631063a3c646c (patch) | |
tree | 2d27d9f5d2fe5d5e8fbc01a467ec58bcb50235c1 /block | |
parent | 499337bb6511e665a236a6a947f819d98ea340c6 (diff) | |
parent | 899e3ee404961a90b828ad527573aaaac39f0ab1 (diff) | |
download | op-kernel-dev-5c04b426f2e8b46cfc7969a35b2631063a3c646c.zip op-kernel-dev-5c04b426f2e8b46cfc7969a35b2631063a3c646c.tar.gz |
Merge branch 'v3.1-rc10' into for-3.2/core
Conflicts:
block/blk-core.c
include/linux/blkdev.h
Signed-off-by: Jens Axboe <axboe@kernel.dk>
Diffstat (limited to 'block')
-rw-r--r-- | block/Kconfig | 10 | ||||
-rw-r--r-- | block/Makefile | 1 | ||||
-rw-r--r-- | block/blk-cgroup.c | 37 | ||||
-rw-r--r-- | block/blk-core.c | 36 | ||||
-rw-r--r-- | block/blk-flush.c | 25 | ||||
-rw-r--r-- | block/blk-softirq.c | 10 | ||||
-rw-r--r-- | block/blk-sysfs.c | 15 | ||||
-rw-r--r-- | block/blk-throttle.c | 4 | ||||
-rw-r--r-- | block/blk.h | 2 | ||||
-rw-r--r-- | block/bsg-lib.c | 298 | ||||
-rw-r--r-- | block/cfq-iosched.c | 21 | ||||
-rw-r--r-- | block/genhd.c | 8 |
12 files changed, 413 insertions, 54 deletions
diff --git a/block/Kconfig b/block/Kconfig index 60be1e0..e97934e 100644 --- a/block/Kconfig +++ b/block/Kconfig @@ -65,6 +65,16 @@ config BLK_DEV_BSG If unsure, say Y. +config BLK_DEV_BSGLIB + bool "Block layer SG support v4 helper lib" + default n + select BLK_DEV_BSG + help + Subsystems will normally enable this if needed. Users will not + normally need to manually enable this. + + If unsure, say N. + config BLK_DEV_INTEGRITY bool "Block layer data integrity support" ---help--- diff --git a/block/Makefile b/block/Makefile index 0fec4b3..514c6e4 100644 --- a/block/Makefile +++ b/block/Makefile @@ -8,6 +8,7 @@ obj-$(CONFIG_BLOCK) := elevator.o blk-core.o blk-tag.o blk-sysfs.o \ blk-iopoll.o blk-lib.o ioctl.o genhd.o scsi_ioctl.o obj-$(CONFIG_BLK_DEV_BSG) += bsg.o +obj-$(CONFIG_BLK_DEV_BSGLIB) += bsg-lib.o obj-$(CONFIG_BLK_CGROUP) += blk-cgroup.o obj-$(CONFIG_BLK_DEV_THROTTLING) += blk-throttle.o obj-$(CONFIG_IOSCHED_NOOP) += noop-iosched.o diff --git a/block/blk-cgroup.c b/block/blk-cgroup.c index bcaf16e..b596e54 100644 --- a/block/blk-cgroup.c +++ b/block/blk-cgroup.c @@ -785,10 +785,10 @@ static int blkio_policy_parse_and_set(char *buf, { char *s[4], *p, *major_s = NULL, *minor_s = NULL; int ret; - unsigned long major, minor, temp; + unsigned long major, minor; int i = 0; dev_t dev; - u64 bps, iops; + u64 temp; memset(s, 0, sizeof(s)); @@ -826,20 +826,23 @@ static int blkio_policy_parse_and_set(char *buf, dev = MKDEV(major, minor); - ret = blkio_check_dev_num(dev); + ret = strict_strtoull(s[1], 10, &temp); if (ret) - return ret; + return -EINVAL; - newpn->dev = dev; + /* For rule removal, do not check for device presence. */ + if (temp) { + ret = blkio_check_dev_num(dev); + if (ret) + return ret; + } - if (s[1] == NULL) - return -EINVAL; + newpn->dev = dev; switch (plid) { case BLKIO_POLICY_PROP: - ret = strict_strtoul(s[1], 10, &temp); - if (ret || (temp < BLKIO_WEIGHT_MIN && temp > 0) || - temp > BLKIO_WEIGHT_MAX) + if ((temp < BLKIO_WEIGHT_MIN && temp > 0) || + temp > BLKIO_WEIGHT_MAX) return -EINVAL; newpn->plid = plid; @@ -850,26 +853,18 @@ static int blkio_policy_parse_and_set(char *buf, switch(fileid) { case BLKIO_THROTL_read_bps_device: case BLKIO_THROTL_write_bps_device: - ret = strict_strtoull(s[1], 10, &bps); - if (ret) - return -EINVAL; - newpn->plid = plid; newpn->fileid = fileid; - newpn->val.bps = bps; + newpn->val.bps = temp; break; case BLKIO_THROTL_read_iops_device: case BLKIO_THROTL_write_iops_device: - ret = strict_strtoull(s[1], 10, &iops); - if (ret) - return -EINVAL; - - if (iops > THROTL_IOPS_MAX) + if (temp > THROTL_IOPS_MAX) return -EINVAL; newpn->plid = plid; newpn->fileid = fileid; - newpn->val.iops = (unsigned int)iops; + newpn->val.iops = (unsigned int)temp; break; } break; diff --git a/block/blk-core.c b/block/blk-core.c index 97e9e54..79e41a7 100644 --- a/block/blk-core.c +++ b/block/blk-core.c @@ -346,9 +346,10 @@ void blk_put_queue(struct request_queue *q) EXPORT_SYMBOL(blk_put_queue); /* - * Note: If a driver supplied the queue lock, it should not zap that lock - * unexpectedly as some queue cleanup components like elevator_exit() and - * blk_throtl_exit() need queue lock. + * Note: If a driver supplied the queue lock, it is disconnected + * by this function. The actual state of the lock doesn't matter + * here as the request_queue isn't accessible after this point + * (QUEUE_FLAG_DEAD is set) and no other requests will be queued. */ void blk_cleanup_queue(struct request_queue *q) { @@ -365,10 +366,8 @@ void blk_cleanup_queue(struct request_queue *q) queue_flag_set_unlocked(QUEUE_FLAG_DEAD, q); mutex_unlock(&q->sysfs_lock); - if (q->elevator) - elevator_exit(q->elevator); - - blk_throtl_exit(q); + if (q->queue_lock != &q->__queue_lock) + q->queue_lock = &q->__queue_lock; blk_put_queue(q); } @@ -1165,7 +1164,7 @@ static bool bio_attempt_front_merge(struct request_queue *q, * true if merge was successful, otherwise false. */ static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q, - struct bio *bio) + struct bio *bio, unsigned int *request_count) { struct blk_plug *plug; struct request *rq; @@ -1174,10 +1173,13 @@ static bool attempt_plug_merge(struct task_struct *tsk, struct request_queue *q, plug = tsk->plug; if (!plug) goto out; + *request_count = 0; list_for_each_entry_reverse(rq, &plug->list, queuelist) { int el_ret; + (*request_count)++; + if (rq->q != q) continue; @@ -1217,6 +1219,7 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio) struct blk_plug *plug; int el_ret, rw_flags, where = ELEVATOR_INSERT_SORT; struct request *req; + unsigned int request_count = 0; /* * low level driver can indicate that it wants pages above a @@ -1235,7 +1238,7 @@ void blk_queue_bio(struct request_queue *q, struct bio *bio) * Check if we can merge with the plugged list before grabbing * any locks. */ - if (attempt_plug_merge(current, q, bio)) + if (attempt_plug_merge(current, q, bio, &request_count)) return; spin_lock_irq(q->queue_lock); @@ -1300,11 +1303,10 @@ get_rq: if (__rq->q != q) plug->should_sort = 1; } + if (request_count >= BLK_MAX_REQUEST_COUNT) + blk_flush_plug_list(plug, false); list_add_tail(&req->queuelist, &plug->list); - plug->count++; drive_stat_acct(req, 1); - if (plug->count >= BLK_MAX_REQUEST_COUNT) - blk_flush_plug_list(plug, false); } else { spin_lock_irq(q->queue_lock); add_acct_request(q, req, where); @@ -1675,6 +1677,7 @@ EXPORT_SYMBOL_GPL(blk_rq_check_limits); int blk_insert_cloned_request(struct request_queue *q, struct request *rq) { unsigned long flags; + int where = ELEVATOR_INSERT_BACK; if (blk_rq_check_limits(q, rq)) return -EIO; @@ -1691,7 +1694,10 @@ int blk_insert_cloned_request(struct request_queue *q, struct request *rq) */ BUG_ON(blk_queued_rq(rq)); - add_acct_request(q, rq, ELEVATOR_INSERT_BACK); + if (rq->cmd_flags & (REQ_FLUSH|REQ_FUA)) + where = ELEVATOR_INSERT_FLUSH; + + add_acct_request(q, rq, where); spin_unlock_irqrestore(q->queue_lock, flags); return 0; @@ -2248,7 +2254,7 @@ static bool blk_end_bidi_request(struct request *rq, int error, * %false - we are done with this request * %true - still buffers pending for this request **/ -static bool __blk_end_bidi_request(struct request *rq, int error, +bool __blk_end_bidi_request(struct request *rq, int error, unsigned int nr_bytes, unsigned int bidi_bytes) { if (blk_update_bidi_request(rq, error, nr_bytes, bidi_bytes)) @@ -2617,7 +2623,6 @@ void blk_start_plug(struct blk_plug *plug) INIT_LIST_HEAD(&plug->list); INIT_LIST_HEAD(&plug->cb_list); plug->should_sort = 0; - plug->count = 0; /* * If this is a nested plug, don't actually assign it. It will be @@ -2701,7 +2706,6 @@ void blk_flush_plug_list(struct blk_plug *plug, bool from_schedule) return; list_splice_init(&plug->list, &list); - plug->count = 0; if (plug->should_sort) { list_sort(NULL, &list, plug_rq_cmp); diff --git a/block/blk-flush.c b/block/blk-flush.c index bb21e4c..491eb30 100644 --- a/block/blk-flush.c +++ b/block/blk-flush.c @@ -95,11 +95,12 @@ static unsigned int blk_flush_policy(unsigned int fflags, struct request *rq) { unsigned int policy = 0; + if (blk_rq_sectors(rq)) + policy |= REQ_FSEQ_DATA; + if (fflags & REQ_FLUSH) { if (rq->cmd_flags & REQ_FLUSH) policy |= REQ_FSEQ_PREFLUSH; - if (blk_rq_sectors(rq)) - policy |= REQ_FSEQ_DATA; if (!(fflags & REQ_FUA) && (rq->cmd_flags & REQ_FUA)) policy |= REQ_FSEQ_POSTFLUSH; } @@ -122,7 +123,7 @@ static void blk_flush_restore_request(struct request *rq) /* make @rq a normal request */ rq->cmd_flags &= ~REQ_FLUSH_SEQ; - rq->end_io = NULL; + rq->end_io = rq->flush.saved_end_io; } /** @@ -300,9 +301,6 @@ void blk_insert_flush(struct request *rq) unsigned int fflags = q->flush_flags; /* may change, cache */ unsigned int policy = blk_flush_policy(fflags, rq); - BUG_ON(rq->end_io); - BUG_ON(!rq->bio || rq->bio != rq->biotail); - /* * @policy now records what operations need to be done. Adjust * REQ_FLUSH and FUA for the driver. @@ -312,6 +310,19 @@ void blk_insert_flush(struct request *rq) rq->cmd_flags &= ~REQ_FUA; /* + * An empty flush handed down from a stacking driver may + * translate into nothing if the underlying device does not + * advertise a write-back cache. In this case, simply + * complete the request. + */ + if (!policy) { + __blk_end_bidi_request(rq, 0, 0, 0); + return; + } + + BUG_ON(!rq->bio || rq->bio != rq->biotail); + + /* * If there's data but flush is not necessary, the request can be * processed directly without going through flush machinery. Queue * for normal execution. @@ -319,6 +330,7 @@ void blk_insert_flush(struct request *rq) if ((policy & REQ_FSEQ_DATA) && !(policy & (REQ_FSEQ_PREFLUSH | REQ_FSEQ_POSTFLUSH))) { list_add_tail(&rq->queuelist, &q->queue_head); + blk_run_queue_async(q); return; } @@ -329,6 +341,7 @@ void blk_insert_flush(struct request *rq) memset(&rq->flush, 0, sizeof(rq->flush)); INIT_LIST_HEAD(&rq->flush.list); rq->cmd_flags |= REQ_FLUSH_SEQ; + rq->flush.saved_end_io = rq->end_io; /* Usually NULL */ rq->end_io = flush_data_end_io; blk_flush_complete_seq(rq, REQ_FSEQ_ACTIONS & ~policy, 0); diff --git a/block/blk-softirq.c b/block/blk-softirq.c index 475fab8..1366a89 100644 --- a/block/blk-softirq.c +++ b/block/blk-softirq.c @@ -115,7 +115,7 @@ void __blk_complete_request(struct request *req) /* * Select completion CPU */ - if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) && req->cpu != -1) { + if (req->cpu != -1) { ccpu = req->cpu; if (!test_bit(QUEUE_FLAG_SAME_FORCE, &q->queue_flags)) { ccpu = blk_cpu_to_group(ccpu); @@ -124,6 +124,14 @@ void __blk_complete_request(struct request *req) } else ccpu = cpu; + /* + * If current CPU and requested CPU are in the same group, running + * softirq in current CPU. One might concern this is just like + * QUEUE_FLAG_SAME_FORCE, but actually not. blk_complete_request() is + * running in interrupt handler, and currently I/O controller doesn't + * support multiple interrupts, so current CPU is unique actually. This + * avoids IPI sending from current CPU to the first CPU of a group. + */ if (ccpu == cpu || ccpu == group_cpu) { struct list_head *list; do_local: diff --git a/block/blk-sysfs.c b/block/blk-sysfs.c index adc923e..a8eff5f 100644 --- a/block/blk-sysfs.c +++ b/block/blk-sysfs.c @@ -258,11 +258,13 @@ queue_rq_affinity_store(struct request_queue *q, const char *page, size_t count) ret = queue_var_store(&val, page, count); spin_lock_irq(q->queue_lock); - if (val) { + if (val == 2) { queue_flag_set(QUEUE_FLAG_SAME_COMP, q); - if (val == 2) - queue_flag_set(QUEUE_FLAG_SAME_FORCE, q); - } else { + queue_flag_set(QUEUE_FLAG_SAME_FORCE, q); + } else if (val == 1) { + queue_flag_set(QUEUE_FLAG_SAME_COMP, q); + queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q); + } else if (val == 0) { queue_flag_clear(QUEUE_FLAG_SAME_COMP, q); queue_flag_clear(QUEUE_FLAG_SAME_FORCE, q); } @@ -477,6 +479,11 @@ static void blk_release_queue(struct kobject *kobj) blk_sync_queue(q); + if (q->elevator) + elevator_exit(q->elevator); + + blk_throtl_exit(q); + if (rl->rq_pool) mempool_destroy(rl->rq_pool); diff --git a/block/blk-throttle.c b/block/blk-throttle.c index f6a7941..a19f58c 100644 --- a/block/blk-throttle.c +++ b/block/blk-throttle.c @@ -746,7 +746,7 @@ static bool tg_may_dispatch(struct throtl_data *td, struct throtl_grp *tg, static void throtl_charge_bio(struct throtl_grp *tg, struct bio *bio) { bool rw = bio_data_dir(bio); - bool sync = bio->bi_rw & REQ_SYNC; + bool sync = rw_is_sync(bio->bi_rw); /* Charge the bio to the group */ tg->bytes_disp[rw] += bio->bi_size; @@ -1150,7 +1150,7 @@ int blk_throtl_bio(struct request_queue *q, struct bio **biop) if (tg_no_rule_group(tg, rw)) { blkiocg_update_dispatch_stats(&tg->blkg, bio->bi_size, - rw, bio->bi_rw & REQ_SYNC); + rw, rw_is_sync(bio->bi_rw)); rcu_read_unlock(); return 0; } diff --git a/block/blk.h b/block/blk.h index d658628..20b900a 100644 --- a/block/blk.h +++ b/block/blk.h @@ -17,6 +17,8 @@ int blk_rq_append_bio(struct request_queue *q, struct request *rq, struct bio *bio); void blk_dequeue_request(struct request *rq); void __blk_queue_free_tags(struct request_queue *q); +bool __blk_end_bidi_request(struct request *rq, int error, + unsigned int nr_bytes, unsigned int bidi_bytes); void blk_rq_timed_out_timer(unsigned long data); void blk_delete_timer(struct request *); diff --git a/block/bsg-lib.c b/block/bsg-lib.c new file mode 100644 index 0000000..6690e6e --- /dev/null +++ b/block/bsg-lib.c @@ -0,0 +1,298 @@ +/* + * BSG helper library + * + * Copyright (C) 2008 James Smart, Emulex Corporation + * Copyright (C) 2011 Red Hat, Inc. All rights reserved. + * Copyright (C) 2011 Mike Christie + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with this program; if not, write to the Free Software + * Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + * + */ +#include <linux/slab.h> +#include <linux/blkdev.h> +#include <linux/delay.h> +#include <linux/scatterlist.h> +#include <linux/bsg-lib.h> +#include <linux/module.h> +#include <scsi/scsi_cmnd.h> + +/** + * bsg_destroy_job - routine to teardown/delete a bsg job + * @job: bsg_job that is to be torn down + */ +static void bsg_destroy_job(struct bsg_job *job) +{ + put_device(job->dev); /* release reference for the request */ + + kfree(job->request_payload.sg_list); + kfree(job->reply_payload.sg_list); + kfree(job); +} + +/** + * bsg_job_done - completion routine for bsg requests + * @job: bsg_job that is complete + * @result: job reply result + * @reply_payload_rcv_len: length of payload recvd + * + * The LLD should call this when the bsg job has completed. + */ +void bsg_job_done(struct bsg_job *job, int result, + unsigned int reply_payload_rcv_len) +{ + struct request *req = job->req; + struct request *rsp = req->next_rq; + int err; + + err = job->req->errors = result; + if (err < 0) + /* we're only returning the result field in the reply */ + job->req->sense_len = sizeof(u32); + else + job->req->sense_len = job->reply_len; + /* we assume all request payload was transferred, residual == 0 */ + req->resid_len = 0; + + if (rsp) { + WARN_ON(reply_payload_rcv_len > rsp->resid_len); + + /* set reply (bidi) residual */ + rsp->resid_len -= min(reply_payload_rcv_len, rsp->resid_len); + } + blk_complete_request(req); +} +EXPORT_SYMBOL_GPL(bsg_job_done); + +/** + * bsg_softirq_done - softirq done routine for destroying the bsg requests + * @rq: BSG request that holds the job to be destroyed + */ +static void bsg_softirq_done(struct request *rq) +{ + struct bsg_job *job = rq->special; + + blk_end_request_all(rq, rq->errors); + bsg_destroy_job(job); +} + +static int bsg_map_buffer(struct bsg_buffer *buf, struct request *req) +{ + size_t sz = (sizeof(struct scatterlist) * req->nr_phys_segments); + + BUG_ON(!req->nr_phys_segments); + + buf->sg_list = kzalloc(sz, GFP_KERNEL); + if (!buf->sg_list) + return -ENOMEM; + sg_init_table(buf->sg_list, req->nr_phys_segments); + buf->sg_cnt = blk_rq_map_sg(req->q, req, buf->sg_list); + buf->payload_len = blk_rq_bytes(req); + return 0; +} + +/** + * bsg_create_job - create the bsg_job structure for the bsg request + * @dev: device that is being sent the bsg request + * @req: BSG request that needs a job structure + */ +static int bsg_create_job(struct device *dev, struct request *req) +{ + struct request *rsp = req->next_rq; + struct request_queue *q = req->q; + struct bsg_job *job; + int ret; + + BUG_ON(req->special); + + job = kzalloc(sizeof(struct bsg_job) + q->bsg_job_size, GFP_KERNEL); + if (!job) + return -ENOMEM; + + req->special = job; + job->req = req; + if (q->bsg_job_size) + job->dd_data = (void *)&job[1]; + job->request = req->cmd; + job->request_len = req->cmd_len; + job->reply = req->sense; + job->reply_len = SCSI_SENSE_BUFFERSIZE; /* Size of sense buffer + * allocated */ + if (req->bio) { + ret = bsg_map_buffer(&job->request_payload, req); + if (ret) + goto failjob_rls_job; + } + if (rsp && rsp->bio) { + ret = bsg_map_buffer(&job->reply_payload, rsp); + if (ret) + goto failjob_rls_rqst_payload; + } + job->dev = dev; + /* take a reference for the request */ + get_device(job->dev); + return 0; + +failjob_rls_rqst_payload: + kfree(job->request_payload.sg_list); +failjob_rls_job: + kfree(job); + return -ENOMEM; +} + +/* + * bsg_goose_queue - restart queue in case it was stopped + * @q: request q to be restarted + */ +void bsg_goose_queue(struct request_queue *q) +{ + if (!q) + return; + + blk_run_queue_async(q); +} +EXPORT_SYMBOL_GPL(bsg_goose_queue); + +/** + * bsg_request_fn - generic handler for bsg requests + * @q: request queue to manage + * + * On error the create_bsg_job function should return a -Exyz error value + * that will be set to the req->errors. + * + * Drivers/subsys should pass this to the queue init function. + */ +void bsg_request_fn(struct request_queue *q) +{ + struct device *dev = q->queuedata; + struct request *req; + struct bsg_job *job; + int ret; + + if (!get_device(dev)) + return; + + while (1) { + req = blk_fetch_request(q); + if (!req) + break; + spin_unlock_irq(q->queue_lock); + + ret = bsg_create_job(dev, req); + if (ret) { + req->errors = ret; + blk_end_request_all(req, ret); + spin_lock_irq(q->queue_lock); + continue; + } + + job = req->special; + ret = q->bsg_job_fn(job); + spin_lock_irq(q->queue_lock); + if (ret) + break; + } + + spin_unlock_irq(q->queue_lock); + put_device(dev); + spin_lock_irq(q->queue_lock); +} +EXPORT_SYMBOL_GPL(bsg_request_fn); + +/** + * bsg_setup_queue - Create and add the bsg hooks so we can receive requests + * @dev: device to attach bsg device to + * @q: request queue setup by caller + * @name: device to give bsg device + * @job_fn: bsg job handler + * @dd_job_size: size of LLD data needed for each job + * + * The caller should have setup the reuqest queue with bsg_request_fn + * as the request_fn. + */ +int bsg_setup_queue(struct device *dev, struct request_queue *q, + char *name, bsg_job_fn *job_fn, int dd_job_size) +{ + int ret; + + q->queuedata = dev; + q->bsg_job_size = dd_job_size; + q->bsg_job_fn = job_fn; + queue_flag_set_unlocked(QUEUE_FLAG_BIDI, q); + blk_queue_softirq_done(q, bsg_softirq_done); + blk_queue_rq_timeout(q, BLK_DEFAULT_SG_TIMEOUT); + + ret = bsg_register_queue(q, dev, name, NULL); + if (ret) { + printk(KERN_ERR "%s: bsg interface failed to " + "initialize - register queue\n", dev->kobj.name); + return ret; + } + + return 0; +} +EXPORT_SYMBOL_GPL(bsg_setup_queue); + +/** + * bsg_remove_queue - Deletes the bsg dev from the q + * @q: the request_queue that is to be torn down. + * + * Notes: + * Before unregistering the queue empty any requests that are blocked + */ +void bsg_remove_queue(struct request_queue *q) +{ + struct request *req; /* block request */ + int counts; /* totals for request_list count and starved */ + + if (!q) + return; + + /* Stop taking in new requests */ + spin_lock_irq(q->queue_lock); + blk_stop_queue(q); + + /* drain all requests in the queue */ + while (1) { + /* need the lock to fetch a request + * this may fetch the same reqeust as the previous pass + */ + req = blk_fetch_request(q); + /* save requests in use and starved */ + counts = q->rq.count[0] + q->rq.count[1] + + q->rq.starved[0] + q->rq.starved[1]; + spin_unlock_irq(q->queue_lock); + /* any requests still outstanding? */ + if (counts == 0) + break; + + /* This may be the same req as the previous iteration, + * always send the blk_end_request_all after a prefetch. + * It is not okay to not end the request because the + * prefetch started the request. + */ + if (req) { + /* return -ENXIO to indicate that this queue is + * going away + */ + req->errors = -ENXIO; + blk_end_request_all(req, -ENXIO); + } + + msleep(200); /* allow bsg to possibly finish */ + spin_lock_irq(q->queue_lock); + } + bsg_unregister_queue(q); +} +EXPORT_SYMBOL_GPL(bsg_remove_queue); diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 1f96ad6..16ace89 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -130,6 +130,8 @@ struct cfq_queue { unsigned long slice_end; long slice_resid; + /* pending priority requests */ + int prio_pending; /* number of requests that are on the dispatch list or inside driver */ int dispatched; @@ -682,6 +684,9 @@ cfq_choose_req(struct cfq_data *cfqd, struct request *rq1, struct request *rq2, if (rq_is_sync(rq1) != rq_is_sync(rq2)) return rq_is_sync(rq1) ? rq1 : rq2; + if ((rq1->cmd_flags ^ rq2->cmd_flags) & REQ_PRIO) + return rq1->cmd_flags & REQ_PRIO ? rq1 : rq2; + s1 = blk_rq_pos(rq1); s2 = blk_rq_pos(rq2); @@ -1209,6 +1214,9 @@ static void cfq_destroy_cfqg(struct cfq_data *cfqd, struct cfq_group *cfqg) hlist_del_init(&cfqg->cfqd_node); + BUG_ON(cfqd->nr_blkcg_linked_grps <= 0); + cfqd->nr_blkcg_linked_grps--; + /* * Put the reference taken at the time of creation so that when all * queues are gone, group can be destroyed. @@ -1604,6 +1612,10 @@ static void cfq_remove_request(struct request *rq) cfqq->cfqd->rq_queued--; cfq_blkiocg_update_io_remove_stats(&(RQ_CFQG(rq))->blkg, rq_data_dir(rq), rq_is_sync(rq)); + if (rq->cmd_flags & REQ_PRIO) { + WARN_ON(!cfqq->prio_pending); + cfqq->prio_pending--; + } } static int cfq_merge(struct request_queue *q, struct request **req, @@ -3357,6 +3369,13 @@ cfq_should_preempt(struct cfq_data *cfqd, struct cfq_queue *new_cfqq, return true; /* + * So both queues are sync. Let the new request get disk time if + * it's a metadata request and the current queue is doing regular IO. + */ + if ((rq->cmd_flags & REQ_PRIO) && !cfqq->prio_pending) + return true; + + /* * Allow an RT request to pre-empt an ongoing non-RT cfqq timeslice. */ if (cfq_class_rt(new_cfqq) && !cfq_class_rt(cfqq)) @@ -3420,6 +3439,8 @@ cfq_rq_enqueued(struct cfq_data *cfqd, struct cfq_queue *cfqq, struct cfq_io_context *cic = RQ_CIC(rq); cfqd->rq_queued++; + if (rq->cmd_flags & REQ_PRIO) + cfqq->prio_pending++; cfq_update_io_thinktime(cfqd, cfqq, cic); cfq_update_io_seektime(cfqd, cfqq, rq); diff --git a/block/genhd.c b/block/genhd.c index 5cb51c5..e2f6790 100644 --- a/block/genhd.c +++ b/block/genhd.c @@ -1146,17 +1146,17 @@ static int diskstats_show(struct seq_file *seqf, void *v) cpu = part_stat_lock(); part_round_stats(cpu, hd); part_stat_unlock(); - seq_printf(seqf, "%4d %7d %s %lu %lu %llu " - "%u %lu %lu %llu %u %u %u %u\n", + seq_printf(seqf, "%4d %7d %s %lu %lu %lu " + "%u %lu %lu %lu %u %u %u %u\n", MAJOR(part_devt(hd)), MINOR(part_devt(hd)), disk_name(gp, hd->partno, buf), part_stat_read(hd, ios[READ]), part_stat_read(hd, merges[READ]), - (unsigned long long)part_stat_read(hd, sectors[READ]), + part_stat_read(hd, sectors[READ]), jiffies_to_msecs(part_stat_read(hd, ticks[READ])), part_stat_read(hd, ios[WRITE]), part_stat_read(hd, merges[WRITE]), - (unsigned long long)part_stat_read(hd, sectors[WRITE]), + part_stat_read(hd, sectors[WRITE]), jiffies_to_msecs(part_stat_read(hd, ticks[WRITE])), part_in_flight(hd), jiffies_to_msecs(part_stat_read(hd, io_ticks)), |