summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorTommaso Cucinotta <tommaso.cucinotta@sssup.it>2016-08-14 16:27:07 +0200
committerIngo Molnar <mingo@kernel.org>2016-09-05 13:29:43 +0200
commit8e1bc301aaf9f9a2d731bf8d50d549ac2dcfdab2 (patch)
tree8798854958a7f7cdba71e79015f694798d6a5857
parent126b3b6842cc848fc9880e7816e0a8d743be51f1 (diff)
downloadop-kernel-dev-8e1bc301aaf9f9a2d731bf8d50d549ac2dcfdab2.zip
op-kernel-dev-8e1bc301aaf9f9a2d731bf8d50d549ac2dcfdab2.tar.gz
sched/deadline: Make CPU heap faster avoiding real swaps on heapify
This change goes from heapify() ops done by swapping with parent/child so that the item to fix moves along, to heapify() ops done by just pulling the parent/child chain by 1 pos, then storing the item to fix just at the end. On a non-trivial heapify(), this performs roughly half stores wrt swaps. This has been measured to achieve up to 10% of speed-up for cpudl_set() calls, with a randomly generated workload of 1K,10K,100K random heap insertions and deletions (75% cpudl_set() calls with is_valid=1 and 25% with is_valid=0), and randomly generated cpu IDs, with up to 256 CPUs, as measured on an Intel Core2 Duo. Signed-off-by: Tommaso Cucinotta <tommaso.cucinotta@sssup.it> Signed-off-by: Peter Zijlstra (Intel) <peterz@infradead.org> Reviewed-by: Luca Abeni <luca.abeni@unitn.it> Reviewed-by: Juri Lelli <juri.lelli@arm.com> Cc: Juri Lelli <juri.lelli@gmail.com> Cc: Linus Torvalds <torvalds@linux-foundation.org> Cc: Peter Zijlstra <peterz@infradead.org> Cc: Thomas Gleixner <tglx@linutronix.de> Cc: linux-dl@retis.sssup.it Link: http://lkml.kernel.org/r/1471184828-12644-3-git-send-email-tommaso.cucinotta@sssup.it Signed-off-by: Ingo Molnar <mingo@kernel.org>
-rw-r--r--kernel/sched/cpudeadline.c66
1 files changed, 45 insertions, 21 deletions
diff --git a/kernel/sched/cpudeadline.c b/kernel/sched/cpudeadline.c
index 0acb0d4..0ace75a 100644
--- a/kernel/sched/cpudeadline.c
+++ b/kernel/sched/cpudeadline.c
@@ -31,48 +31,72 @@ static inline int right_child(int i)
return (i << 1) + 2;
}
-static void cpudl_exchange(struct cpudl *cp, int a, int b)
-{
- int cpu_a = cp->elements[a].cpu, cpu_b = cp->elements[b].cpu;
-
- swap(cp->elements[a].cpu, cp->elements[b].cpu);
- swap(cp->elements[a].dl , cp->elements[b].dl );
-
- swap(cp->elements[cpu_a].idx, cp->elements[cpu_b].idx);
-}
-
static void cpudl_heapify_down(struct cpudl *cp, int idx)
{
int l, r, largest;
+ int orig_cpu = cp->elements[idx].cpu;
+ u64 orig_dl = cp->elements[idx].dl;
+
+ if (left_child(idx) >= cp->size)
+ return;
+
/* adapted from lib/prio_heap.c */
while(1) {
+ u64 largest_dl;
l = left_child(idx);
r = right_child(idx);
largest = idx;
+ largest_dl = orig_dl;
- if ((l < cp->size) && dl_time_before(cp->elements[idx].dl,
- cp->elements[l].dl))
+ if ((l < cp->size) && dl_time_before(orig_dl,
+ cp->elements[l].dl)) {
largest = l;
- if ((r < cp->size) && dl_time_before(cp->elements[largest].dl,
- cp->elements[r].dl))
+ largest_dl = cp->elements[l].dl;
+ }
+ if ((r < cp->size) && dl_time_before(largest_dl,
+ cp->elements[r].dl))
largest = r;
+
if (largest == idx)
break;
- /* Push idx down the heap one level and bump one up */
- cpudl_exchange(cp, largest, idx);
+ /* pull largest child onto idx */
+ cp->elements[idx].cpu = cp->elements[largest].cpu;
+ cp->elements[idx].dl = cp->elements[largest].dl;
+ cp->elements[cp->elements[idx].cpu].idx = idx;
idx = largest;
}
+ /* actual push down of saved original values orig_* */
+ cp->elements[idx].cpu = orig_cpu;
+ cp->elements[idx].dl = orig_dl;
+ cp->elements[cp->elements[idx].cpu].idx = idx;
}
static void cpudl_heapify_up(struct cpudl *cp, int idx)
{
- while (idx > 0 && dl_time_before(cp->elements[parent(idx)].dl,
- cp->elements[idx].dl)) {
- cpudl_exchange(cp, idx, parent(idx));
- idx = parent(idx);
- }
+ int p;
+
+ int orig_cpu = cp->elements[idx].cpu;
+ u64 orig_dl = cp->elements[idx].dl;
+
+ if (idx == 0)
+ return;
+
+ do {
+ p = parent(idx);
+ if (dl_time_before(orig_dl, cp->elements[p].dl))
+ break;
+ /* pull parent onto idx */
+ cp->elements[idx].cpu = cp->elements[p].cpu;
+ cp->elements[idx].dl = cp->elements[p].dl;
+ cp->elements[cp->elements[idx].cpu].idx = idx;
+ idx = p;
+ } while (idx != 0);
+ /* actual push up of saved original values orig_* */
+ cp->elements[idx].cpu = orig_cpu;
+ cp->elements[idx].dl = orig_dl;
+ cp->elements[cp->elements[idx].cpu].idx = idx;
}
static void cpudl_heapify(struct cpudl *cp, int idx)
OpenPOWER on IntegriCloud