block: add support for IO CPU affinity

This patch adds support for controlling the IO completion CPU of either all requests on a queue, or on a per-request basis. We export a sysfs variable (rq_affinity) which, if set, migrates completions of requests to the CPU that originally submitted it. A bio helper (bio_set_completion_cpu()) is also added, so that queuers can ask for completion on that specific CPU. In testing, this has been show to cut the system time by as much as 20-40% on synthetic workloads where CPU affinity is desired. This requires a little help from the architecture, so it'll only work as designed for archs that are using the new generic smp helper infrastructure. Signed-off-by: Jens Axboe <jens.axboe@oracle.com>
author: Jens Axboe <jens.axboe@oracle.com> 2008-09-13 20:26:01 +0200
committer: Jens Axboe <jens.axboe@oracle.com> 2008-10-09 08:56:09 +0200
commit: c7c22e4d5c1fdebfac4dba76de7d0338c2b0d832 (patch)
tree: ecc3d2517b3471ccc35d4cb4e3b48d4b57205061 /block/blk-core.c
parent: 18887ad910e56066233a07fd3cfb2fa11338b782 (diff)
download: op-kernel-dev-c7c22e4d5c1fdebfac4dba76de7d0338c2b0d832.zip
op-kernel-dev-c7c22e4d5c1fdebfac4dba76de7d0338c2b0d832.tar.gz
1 files changed, 23 insertions, 23 deletions
diff --git a/block/blk-core.c b/block/blk-core.c
index 9c6f818..5484838 100644
--- a/block/blk-core.c
+++ b/block/blk-core.c
@@ -110,7 +110,7 @@ void blk_rq_init(struct request_queue *q, struct request *rq)
 	memset(rq, 0, sizeof(*rq));
 
 	INIT_LIST_HEAD(&rq->queuelist);
-	INIT_LIST_HEAD(&rq->donelist);
+	rq->cpu = -1;
 	rq->q = q;
 	rq->sector = rq->hard_sector = (sector_t) -1;
 	INIT_HLIST_NODE(&rq->hash);
@@ -322,6 +322,21 @@ void blk_unplug(struct request_queue *q)
 }
 EXPORT_SYMBOL(blk_unplug);
 
+static void blk_invoke_request_fn(struct request_queue *q)
+{
+	/*
+	 * one level of recursion is ok and is much faster than kicking
+	 * the unplug handling
+	 */
+	if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
+		q->request_fn(q);
+		queue_flag_clear(QUEUE_FLAG_REENTER, q);
+	} else {
+		queue_flag_set(QUEUE_FLAG_PLUGGED, q);
+		kblockd_schedule_work(q, &q->unplug_work);
+	}
+}
+
 /**
  * blk_start_queue - restart a previously stopped queue
  * @q:    The &struct request_queue in question
@@ -336,18 +351,7 @@ void blk_start_queue(struct request_queue *q)
 	WARN_ON(!irqs_disabled());
 
 	queue_flag_clear(QUEUE_FLAG_STOPPED, q);
-
-	/*
-	 * one level of recursion is ok and is much faster than kicking
-	 * the unplug handling
-	 */
-	if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
-		q->request_fn(q);
-		queue_flag_clear(QUEUE_FLAG_REENTER, q);
-	} else {
-		blk_plug_device(q);
-		kblockd_schedule_work(q, &q->unplug_work);
-	}
+	blk_invoke_request_fn(q);
 }
 EXPORT_SYMBOL(blk_start_queue);
 
@@ -405,15 +409,8 @@ void __blk_run_queue(struct request_queue *q)
 	 * Only recurse once to avoid overrunning the stack, let the unplug
 	 * handling reinvoke the handler shortly if we already got there.
 	 */
-	if (!elv_queue_empty(q)) {
-		if (!queue_flag_test_and_set(QUEUE_FLAG_REENTER, q)) {
-			q->request_fn(q);
-			queue_flag_clear(QUEUE_FLAG_REENTER, q);
-		} else {
-			blk_plug_device(q);
-			kblockd_schedule_work(q, &q->unplug_work);
-		}
-	}
+	if (!elv_queue_empty(q))
+		blk_invoke_request_fn(q);
 }
 EXPORT_SYMBOL(__blk_run_queue);
 
@@ -1056,6 +1053,7 @@ EXPORT_SYMBOL(blk_put_request);
 
 void init_request_from_bio(struct request *req, struct bio *bio)
 {
+	req->cpu = bio->bi_comp_cpu;
 	req->cmd_type = REQ_TYPE_FS;
 
 	/*
@@ -1198,13 +1196,15 @@ get_rq:
 	init_request_from_bio(req, bio);
 
 	spin_lock_irq(q->queue_lock);
+	if (test_bit(QUEUE_FLAG_SAME_COMP, &q->queue_flags) ||
+	    bio_flagged(bio, BIO_CPU_AFFINE))
+		req->cpu = blk_cpu_to_group(smp_processor_id());
 	if (elv_queue_empty(q))
 		blk_plug_device(q);
 	add_request(q, req);
 out:
 	if (sync)
 		__generic_unplug_device(q);
-
 	spin_unlock_irq(q->queue_lock);
 	return 0;
author	Jens Axboe <jens.axboe@oracle.com>	2008-09-13 20:26:01 +0200
committer	Jens Axboe <jens.axboe@oracle.com>	2008-10-09 08:56:09 +0200
commit	c7c22e4d5c1fdebfac4dba76de7d0338c2b0d832 (patch)
tree	ecc3d2517b3471ccc35d4cb4e3b48d4b57205061 /block/blk-core.c
parent	18887ad910e56066233a07fd3cfb2fa11338b782 (diff)
download	op-kernel-dev-c7c22e4d5c1fdebfac4dba76de7d0338c2b0d832.zip op-kernel-dev-c7c22e4d5c1fdebfac4dba76de7d0338c2b0d832.tar.gz