From 8932a63d5edb02f714d50c26583152fe0a97a69c Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Thu, 19 Apr 2012 12:20:14 -0700 Subject: rcu: Reduce cache-miss initialization latencies for large systems Commit #0209f649 (rcu: limit rcu_node leaf-level fanout) set an upper limit of 16 on the leaf-level fanout for the rcu_node tree. This was needed to reduce lock contention that was induced by the synchronization of scheduling-clock interrupts, which was in turn needed to improve energy efficiency for moderate-sized lightly loaded servers. However, reducing the leaf-level fanout means that there are more leaf-level rcu_node structures in the tree, which in turn means that RCU's grace-period initialization incurs more cache misses. This is not a problem on moderate-sized servers with only a few tens of CPUs, but becomes a major source of real-time latency spikes on systems with many hundreds of CPUs. In addition, the workloads running on these large systems tend to be CPU-bound, which eliminates the energy-efficiency advantages of synchronizing scheduling-clock interrupts. Therefore, these systems need maximal values for the rcu_node leaf-level fanout. This commit addresses this problem by introducing a new kernel parameter named RCU_FANOUT_LEAF that directly controls the leaf-level fanout. This parameter defaults to 16 to handle the common case of a moderate sized lightly loaded servers, but may be set higher on larger systems. Reported-by: Mike Galbraith Reported-by: Dimitri Sivanich Signed-off-by: Paul E. McKenney --- kernel/rcutree.h | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) (limited to 'kernel/rcutree.h') diff --git a/kernel/rcutree.h b/kernel/rcutree.h index cdd1be0..a905c20 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -29,18 +29,14 @@ #include /* - * Define shape of hierarchy based on NR_CPUS and CONFIG_RCU_FANOUT. + * Define shape of hierarchy based on NR_CPUS, CONFIG_RCU_FANOUT, and + * CONFIG_RCU_FANOUT_LEAF. * In theory, it should be possible to add more levels straightforwardly. * In practice, this did work well going from three levels to four. * Of course, your mileage may vary. */ #define MAX_RCU_LVLS 4 -#if CONFIG_RCU_FANOUT > 16 -#define RCU_FANOUT_LEAF 16 -#else /* #if CONFIG_RCU_FANOUT > 16 */ -#define RCU_FANOUT_LEAF (CONFIG_RCU_FANOUT) -#endif /* #else #if CONFIG_RCU_FANOUT > 16 */ -#define RCU_FANOUT_1 (RCU_FANOUT_LEAF) +#define RCU_FANOUT_1 (CONFIG_RCU_FANOUT_LEAF) #define RCU_FANOUT_2 (RCU_FANOUT_1 * CONFIG_RCU_FANOUT) #define RCU_FANOUT_3 (RCU_FANOUT_2 * CONFIG_RCU_FANOUT) #define RCU_FANOUT_4 (RCU_FANOUT_3 * CONFIG_RCU_FANOUT) -- cgit v1.1 From 616c310e83b872024271c915c1b9ab505b9efad9 Mon Sep 17 00:00:00 2001 From: "Paul E. McKenney" Date: Tue, 27 Mar 2012 16:02:08 -0700 Subject: rcu: Move PREEMPT_RCU preemption to switch_to() invocation Currently, PREEMPT_RCU readers are enqueued upon entry to the scheduler. This is inefficient because enqueuing is required only if there is a context switch, and entry to the scheduler does not guarantee a context switch. The commit therefore moves the enqueuing to immediately precede the call to switch_to() from the scheduler. Signed-off-by: Paul E. McKenney Signed-off-by: Paul E. McKenney Tested-by: Linus Torvalds --- kernel/rcutree.h | 1 - 1 file changed, 1 deletion(-) (limited to 'kernel/rcutree.h') diff --git a/kernel/rcutree.h b/kernel/rcutree.h index cdd1be0..d6b70b08 100644 --- a/kernel/rcutree.h +++ b/kernel/rcutree.h @@ -423,7 +423,6 @@ DECLARE_PER_CPU(char, rcu_cpu_has_work); /* Forward declarations for rcutree_plugin.h */ static void rcu_bootup_announce(void); long rcu_batches_completed(void); -static void rcu_preempt_note_context_switch(int cpu); static int rcu_preempt_blocked_readers_cgp(struct rcu_node *rnp); #ifdef CONFIG_HOTPLUG_CPU static void rcu_report_unblock_qs_rnp(struct rcu_node *rnp, -- cgit v1.1