From 062a644d6121d5e2f51c0b2ca0cbc5155ebf845b Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Wed, 15 Sep 2010 17:06:33 -0400 Subject: blk-cgroup: Prepare the base for supporting more than one IO control policies o This patch prepares the base for introducing new IO control policies. Currently all the code is written knowing there is only one policy and that is proportional bandwidth. Creating infrastructure for newer policies to come in. o Also there were many functions which were generated using macro. It was very confusing. Got rid of those. Signed-off-by: Vivek Goyal Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 1 + 1 file changed, 1 insertion(+) (limited to 'block/cfq-iosched.c') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index eb4086f..b9f8619 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -4013,6 +4013,7 @@ static struct blkio_policy_type blkio_policy_cfq = { .blkio_unlink_group_fn = cfq_unlink_blkio_group, .blkio_update_group_weight_fn = cfq_update_blkio_group_weight, }, + .plid = BLKIO_POLICY_PROP, }; #else static struct blkio_policy_type blkio_policy_cfq; -- cgit v1.1 From 749ef9f8423054e326f3a246327ed2db4b6d395f Mon Sep 17 00:00:00 2001 From: Corrado Zoccolo Date: Mon, 20 Sep 2010 15:24:50 +0200 Subject: cfq: improve fsync performance for small files Fsync performance for small files achieved by cfq on high-end disks is lower than what deadline can achieve, due to idling introduced between the sync write happening in process context and the journal commit. Moreover, when competing with a sequential reader, a process writing small files and fsync-ing them is starved. This patch fixes the two problems by: - marking journal commits as WRITE_SYNC, so that they get the REQ_NOIDLE flag set, - force all queues that have REQ_NOIDLE requests to be put in the noidle tree. Having the queue associated to the fsync-ing process and the one associated to journal commits in the noidle tree allows: - switching between them without idling, - fairness vs. competing idling queues, since they will be serviced only after the noidle tree expires its slice. Acked-by: Vivek Goyal Reviewed-by: Jeff Moyer Tested-by: Jeff Moyer Signed-off-by: Corrado Zoccolo Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 18 ++++-------------- 1 file changed, 4 insertions(+), 14 deletions(-) (limited to 'block/cfq-iosched.c') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index b9f8619..6845926 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -216,7 +216,6 @@ struct cfq_data { enum wl_type_t serving_type; unsigned long workload_expires; struct cfq_group *serving_group; - bool noidle_tree_requires_idle; /* * Each priority tree is sorted by next_request position. These @@ -2126,7 +2125,6 @@ static void choose_service_tree(struct cfq_data *cfqd, struct cfq_group *cfqg) slice = max_t(unsigned, slice, CFQ_MIN_TT); cfq_log(cfqd, "workload slice:%d", slice); cfqd->workload_expires = jiffies + slice; - cfqd->noidle_tree_requires_idle = false; } static struct cfq_group *cfq_get_next_cfqg(struct cfq_data *cfqd) @@ -3108,7 +3106,9 @@ cfq_update_idle_window(struct cfq_data *cfqd, struct cfq_queue *cfqq, if (cfqq->queued[0] + cfqq->queued[1] >= 4) cfq_mark_cfqq_deep(cfqq); - if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle || + if (cfqq->next_rq && (cfqq->next_rq->cmd_flags & REQ_NOIDLE)) + enable_idle = 0; + else if (!atomic_read(&cic->ioc->nr_tasks) || !cfqd->cfq_slice_idle || (!cfq_cfqq_deep(cfqq) && CFQQ_SEEKY(cfqq))) enable_idle = 0; else if (sample_valid(cic->ttime_samples)) { @@ -3421,17 +3421,7 @@ static void cfq_completed_request(struct request_queue *q, struct request *rq) cfq_slice_expired(cfqd, 1); else if (sync && cfqq_empty && !cfq_close_cooperator(cfqd, cfqq)) { - cfqd->noidle_tree_requires_idle |= - !(rq->cmd_flags & REQ_NOIDLE); - /* - * Idling is enabled for SYNC_WORKLOAD. - * SYNC_NOIDLE_WORKLOAD idles at the end of the tree - * only if we processed at least one !REQ_NOIDLE request - */ - if (cfqd->serving_type == SYNC_WORKLOAD - || cfqd->noidle_tree_requires_idle - || cfqq->cfqg->nr_cfqq == 1) - cfq_arm_slice_timer(cfqd); + cfq_arm_slice_timer(cfqd); } } -- cgit v1.1 From fe0714377ee2ca161bf2afb7773e22f15f1786d4 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 1 Oct 2010 14:49:49 +0200 Subject: blkio: Recalculate the throttled bio dispatch time upon throttle limit change o Currently any cgroup throttle limit changes are processed asynchronousy and the change does not take affect till a new bio is dispatched from same group. o It might happen that a user sets a redicuously low limit on throttling. Say 1 bytes per second on reads. In such cases simple operations like mount a disk can wait for a very long time. o Once bio is throttled, there is no easy way to come out of that wait even if user increases the read limit later. o This patch fixes it. Now if a user changes the cgroup limits, we recalculate the bio dispatch time according to new limits. o Can't take queueu lock under blkcg_lock, hence after the change I wake up the dispatch thread again which recalculates the time. So there are some variables being synchronized across two threads without lock and I had to make use of barriers. Hoping I have used barriers correctly. Any review of memory barrier code especially will help. Signed-off-by: Vivek Goyal Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'block/cfq-iosched.c') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 6845926..86338d5 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -951,8 +951,8 @@ static inline struct cfq_group *cfqg_of_blkg(struct blkio_group *blkg) return NULL; } -void -cfq_update_blkio_group_weight(struct blkio_group *blkg, unsigned int weight) +void cfq_update_blkio_group_weight(void *key, struct blkio_group *blkg, + unsigned int weight) { cfqg_of_blkg(blkg)->weight = weight; } -- cgit v1.1 From b4627321e18582dcbdeb45d77df29d3177107c65 Mon Sep 17 00:00:00 2001 From: Vivek Goyal Date: Fri, 22 Oct 2010 09:48:43 +0200 Subject: cfq-iosched: Fix a gcc 4.5 warning and put some comments MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit - Andi encountedred following warning with gcc 4.5 linux/block/cfq-iosched.c: In function ‘cfq_dispatch_requests’: linux/block/cfq-iosched.c:2156:3: warning: array subscript is above array bounds - Warning happens due to following code. slice = group_slice * count / max_t(unsigned, cfqg->busy_queues_avg[cfqd->serving_prio], cfq_group_busy_queues_wl(cfqd->serving_prio, cfqd, cfqg)); gcc is complaining about cfqg->busy_queues_avg[] being indexed by CFQ prio classes (RT, BE and IDLE) while the array size is only 2. - At run time, we never access cfqg->busy_queues_avg[IDLE] and return from function before this code hits. - To fix warning increase the array size though it will remain unused. This patch also puts some comments to clarify some of the confusions. - I have taken Jens's patch and modified it a bit. - Compile tested with gcc 4.4 and boot tested. I don't have gcc 4.5 running, Andi can you please test it with gcc 4.5 to make sure it worked. Reported-by: Andi Kleen Signed-off-by: Vivek Goyal Acked-by: Jeff Moyer Signed-off-by: Jens Axboe --- block/cfq-iosched.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'block/cfq-iosched.c') diff --git a/block/cfq-iosched.c b/block/cfq-iosched.c index 86338d5..dfceb63 100644 --- a/block/cfq-iosched.c +++ b/block/cfq-iosched.c @@ -157,6 +157,7 @@ enum wl_prio_t { BE_WORKLOAD = 0, RT_WORKLOAD = 1, IDLE_WORKLOAD = 2, + CFQ_PRIO_NR, }; /* @@ -181,10 +182,19 @@ struct cfq_group { /* number of cfqq currently on this group */ int nr_cfqq; - /* Per group busy queus average. Useful for workload slice calc. */ - unsigned int busy_queues_avg[2]; /* - * rr lists of queues with requests, onle rr for each priority class. + * Per group busy queus average. Useful for workload slice calc. We + * create the array for each prio class but at run time it is used + * only for RT and BE class and slot for IDLE class remains unused. + * This is primarily done to avoid confusion and a gcc warning. + */ + unsigned int busy_queues_avg[CFQ_PRIO_NR]; + /* + * rr lists of queues with requests. We maintain service trees for + * RT and BE classes. These trees are subdivided in subclasses + * of SYNC, SYNC_NOIDLE and ASYNC based on workload type. For IDLE + * class there is no subclassification and all the cfq queues go on + * a single tree service_tree_idle. * Counts are embedded in the cfq_rb_root */ struct cfq_rb_root service_trees[2][3]; -- cgit v1.1