summaryrefslogtreecommitdiffstats
path: root/kernel/time
diff options
context:
space:
mode:
Diffstat (limited to 'kernel/time')
-rw-r--r--kernel/time/Kconfig51
-rw-r--r--kernel/time/sched_clock.c2
-rw-r--r--kernel/time/tick-sched.c71
-rw-r--r--kernel/time/timer_list.c41
4 files changed, 106 insertions, 59 deletions
diff --git a/kernel/time/Kconfig b/kernel/time/Kconfig
index 70f27e8..2b62fe8 100644
--- a/kernel/time/Kconfig
+++ b/kernel/time/Kconfig
@@ -105,7 +105,6 @@ config NO_HZ_FULL
select RCU_USER_QS
select RCU_NOCB_CPU
select VIRT_CPU_ACCOUNTING_GEN
- select CONTEXT_TRACKING_FORCE
select IRQ_WORK
help
Adaptively try to shutdown the tick whenever possible, even when
@@ -134,6 +133,56 @@ config NO_HZ_FULL_ALL
Note the boot CPU will still be kept outside the range to
handle the timekeeping duty.
+config NO_HZ_FULL_SYSIDLE
+ bool "Detect full-system idle state for full dynticks system"
+ depends on NO_HZ_FULL
+ default n
+ help
+ At least one CPU must keep the scheduling-clock tick running for
+ timekeeping purposes whenever there is a non-idle CPU, where
+ "non-idle" also includes dynticks CPUs as long as they are
+ running non-idle tasks. Because the underlying adaptive-tick
+ support cannot distinguish between all CPUs being idle and
+ all CPUs each running a single task in dynticks mode, the
+ underlying support simply ensures that there is always a CPU
+ handling the scheduling-clock tick, whether or not all CPUs
+ are idle. This Kconfig option enables scalable detection of
+ the all-CPUs-idle state, thus allowing the scheduling-clock
+ tick to be disabled when all CPUs are idle. Note that scalable
+ detection of the all-CPUs-idle state means that larger systems
+ will be slower to declare the all-CPUs-idle state.
+
+ Say Y if you would like to help debug all-CPUs-idle detection.
+
+ Say N if you are unsure.
+
+config NO_HZ_FULL_SYSIDLE_SMALL
+ int "Number of CPUs above which large-system approach is used"
+ depends on NO_HZ_FULL_SYSIDLE
+ range 1 NR_CPUS
+ default 8
+ help
+ The full-system idle detection mechanism takes a lazy approach
+ on large systems, as is required to attain decent scalability.
+ However, on smaller systems, scalability is not anywhere near as
+ large a concern as is energy efficiency. The sysidle subsystem
+ therefore uses a fast but non-scalable algorithm for small
+ systems and a lazier but scalable algorithm for large systems.
+ This Kconfig parameter defines the number of CPUs in the largest
+ system that will be considered to be "small".
+
+ The default value will be fine in most cases. Battery-powered
+ systems that (1) enable NO_HZ_FULL_SYSIDLE, (2) have larger
+ numbers of CPUs, and (3) are suffering from battery-lifetime
+ problems due to long sysidle latencies might wish to experiment
+ with larger values for this Kconfig parameter. On the other
+ hand, they might be even better served by disabling NO_HZ_FULL
+ entirely, given that NO_HZ_FULL is intended for HPC and
+ real-time workloads that at present do not tend to be run on
+ battery-powered systems.
+
+ Take the default if you are unsure.
+
config NO_HZ
bool "Old Idle dynticks config"
depends on !ARCH_USES_GETTIMEOFFSET && GENERIC_CLOCKEVENTS
diff --git a/kernel/time/sched_clock.c b/kernel/time/sched_clock.c
index a326f27..0b479a6 100644
--- a/kernel/time/sched_clock.c
+++ b/kernel/time/sched_clock.c
@@ -121,7 +121,7 @@ void __init setup_sched_clock(u32 (*read)(void), int bits, unsigned long rate)
BUG_ON(bits > 32);
WARN_ON(!irqs_disabled());
read_sched_clock = read;
- sched_clock_mask = (1 << bits) - 1;
+ sched_clock_mask = (1ULL << bits) - 1;
cd.rate = rate;
/* calculate the mult/shift to convert counter ticks to ns. */
diff --git a/kernel/time/tick-sched.c b/kernel/time/tick-sched.c
index 6960172..3612fc7 100644
--- a/kernel/time/tick-sched.c
+++ b/kernel/time/tick-sched.c
@@ -23,6 +23,7 @@
#include <linux/irq_work.h>
#include <linux/posix-timers.h>
#include <linux/perf_event.h>
+#include <linux/context_tracking.h>
#include <asm/irq_regs.h>
@@ -148,8 +149,8 @@ static void tick_sched_handle(struct tick_sched *ts, struct pt_regs *regs)
}
#ifdef CONFIG_NO_HZ_FULL
-static cpumask_var_t nohz_full_mask;
-bool have_nohz_full_mask;
+cpumask_var_t tick_nohz_full_mask;
+bool tick_nohz_full_running;
static bool can_stop_full_tick(void)
{
@@ -182,7 +183,8 @@ static bool can_stop_full_tick(void)
* Don't allow the user to think they can get
* full NO_HZ with this machine.
*/
- WARN_ONCE(1, "NO_HZ FULL will not work with unstable sched clock");
+ WARN_ONCE(tick_nohz_full_running,
+ "NO_HZ FULL will not work with unstable sched clock");
return false;
}
#endif
@@ -196,7 +198,7 @@ static void tick_nohz_restart_sched_tick(struct tick_sched *ts, ktime_t now);
* Re-evaluate the need for the tick on the current CPU
* and restart it if necessary.
*/
-void tick_nohz_full_check(void)
+void __tick_nohz_full_check(void)
{
struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
@@ -210,7 +212,7 @@ void tick_nohz_full_check(void)
static void nohz_full_kick_work_func(struct irq_work *work)
{
- tick_nohz_full_check();
+ __tick_nohz_full_check();
}
static DEFINE_PER_CPU(struct irq_work, nohz_full_kick_work) = {
@@ -229,7 +231,7 @@ void tick_nohz_full_kick(void)
static void nohz_full_kick_ipi(void *info)
{
- tick_nohz_full_check();
+ __tick_nohz_full_check();
}
/*
@@ -238,12 +240,13 @@ static void nohz_full_kick_ipi(void *info)
*/
void tick_nohz_full_kick_all(void)
{
- if (!have_nohz_full_mask)
+ if (!tick_nohz_full_running)
return;
preempt_disable();
- smp_call_function_many(nohz_full_mask,
+ smp_call_function_many(tick_nohz_full_mask,
nohz_full_kick_ipi, NULL, false);
+ tick_nohz_full_kick();
preempt_enable();
}
@@ -252,7 +255,7 @@ void tick_nohz_full_kick_all(void)
* It might need the tick due to per task/process properties:
* perf events, posix cpu timers, ...
*/
-void tick_nohz_task_switch(struct task_struct *tsk)
+void __tick_nohz_task_switch(struct task_struct *tsk)
{
unsigned long flags;
@@ -268,37 +271,29 @@ out:
local_irq_restore(flags);
}
-int tick_nohz_full_cpu(int cpu)
-{
- if (!have_nohz_full_mask)
- return 0;
-
- return cpumask_test_cpu(cpu, nohz_full_mask);
-}
-
/* Parse the boot-time nohz CPU list from the kernel parameters. */
static int __init tick_nohz_full_setup(char *str)
{
int cpu;
- alloc_bootmem_cpumask_var(&nohz_full_mask);
- if (cpulist_parse(str, nohz_full_mask) < 0) {
+ alloc_bootmem_cpumask_var(&tick_nohz_full_mask);
+ if (cpulist_parse(str, tick_nohz_full_mask) < 0) {
pr_warning("NOHZ: Incorrect nohz_full cpumask\n");
return 1;
}
cpu = smp_processor_id();
- if (cpumask_test_cpu(cpu, nohz_full_mask)) {
+ if (cpumask_test_cpu(cpu, tick_nohz_full_mask)) {
pr_warning("NO_HZ: Clearing %d from nohz_full range for timekeeping\n", cpu);
- cpumask_clear_cpu(cpu, nohz_full_mask);
+ cpumask_clear_cpu(cpu, tick_nohz_full_mask);
}
- have_nohz_full_mask = true;
+ tick_nohz_full_running = true;
return 1;
}
__setup("nohz_full=", tick_nohz_full_setup);
-static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb,
+static int tick_nohz_cpu_down_callback(struct notifier_block *nfb,
unsigned long action,
void *hcpu)
{
@@ -310,7 +305,7 @@ static int __cpuinit tick_nohz_cpu_down_callback(struct notifier_block *nfb,
* If we handle the timekeeping duty for full dynticks CPUs,
* we can't safely shutdown that CPU.
*/
- if (have_nohz_full_mask && tick_do_timer_cpu == cpu)
+ if (tick_nohz_full_running && tick_do_timer_cpu == cpu)
return NOTIFY_BAD;
break;
}
@@ -329,14 +324,14 @@ static int tick_nohz_init_all(void)
int err = -1;
#ifdef CONFIG_NO_HZ_FULL_ALL
- if (!alloc_cpumask_var(&nohz_full_mask, GFP_KERNEL)) {
+ if (!alloc_cpumask_var(&tick_nohz_full_mask, GFP_KERNEL)) {
pr_err("NO_HZ: Can't allocate full dynticks cpumask\n");
return err;
}
err = 0;
- cpumask_setall(nohz_full_mask);
- cpumask_clear_cpu(smp_processor_id(), nohz_full_mask);
- have_nohz_full_mask = true;
+ cpumask_setall(tick_nohz_full_mask);
+ cpumask_clear_cpu(smp_processor_id(), tick_nohz_full_mask);
+ tick_nohz_full_running = true;
#endif
return err;
}
@@ -345,17 +340,18 @@ void __init tick_nohz_init(void)
{
int cpu;
- if (!have_nohz_full_mask) {
+ if (!tick_nohz_full_running) {
if (tick_nohz_init_all() < 0)
return;
}
+ for_each_cpu(cpu, tick_nohz_full_mask)
+ context_tracking_cpu_set(cpu);
+
cpu_notifier(tick_nohz_cpu_down_callback, 0);
- cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), nohz_full_mask);
+ cpulist_scnprintf(nohz_full_buf, sizeof(nohz_full_buf), tick_nohz_full_mask);
pr_info("NO_HZ: Full dynticks CPUs: %s.\n", nohz_full_buf);
}
-#else
-#define have_nohz_full_mask (0)
#endif
/*
@@ -733,7 +729,7 @@ static bool can_stop_idle_tick(int cpu, struct tick_sched *ts)
return false;
}
- if (have_nohz_full_mask) {
+ if (tick_nohz_full_enabled()) {
/*
* Keep the tick alive to guarantee timekeeping progression
* if there are full dynticks CPUs around
@@ -827,13 +823,10 @@ void tick_nohz_irq_exit(void)
{
struct tick_sched *ts = &__get_cpu_var(tick_cpu_sched);
- if (ts->inidle) {
- /* Cancel the timer because CPU already waken up from the C-states*/
- menu_hrtimer_cancel();
+ if (ts->inidle)
__tick_nohz_idle_enter(ts);
- } else {
+ else
tick_nohz_full_stop_tick(ts);
- }
}
/**
@@ -931,8 +924,6 @@ void tick_nohz_idle_exit(void)
ts->inidle = 0;
- /* Cancel the timer because CPU already waken up from the C-states*/
- menu_hrtimer_cancel();
if (ts->idle_active || ts->tick_stopped)
now = ktime_get();
diff --git a/kernel/time/timer_list.c b/kernel/time/timer_list.c
index 3bdf283..61ed862 100644
--- a/kernel/time/timer_list.c
+++ b/kernel/time/timer_list.c
@@ -265,10 +265,9 @@ static inline void timer_list_header(struct seq_file *m, u64 now)
static int timer_list_show(struct seq_file *m, void *v)
{
struct timer_list_iter *iter = v;
- u64 now = ktime_to_ns(ktime_get());
if (iter->cpu == -1 && !iter->second_pass)
- timer_list_header(m, now);
+ timer_list_header(m, iter->now);
else if (!iter->second_pass)
print_cpu(m, iter->cpu, iter->now);
#ifdef CONFIG_GENERIC_CLOCKEVENTS
@@ -298,33 +297,41 @@ void sysrq_timer_list_show(void)
return;
}
-static void *timer_list_start(struct seq_file *file, loff_t *offset)
+static void *move_iter(struct timer_list_iter *iter, loff_t offset)
{
- struct timer_list_iter *iter = file->private;
-
- if (!*offset) {
- iter->cpu = -1;
- iter->now = ktime_to_ns(ktime_get());
- } else if (iter->cpu >= nr_cpu_ids) {
+ for (; offset; offset--) {
+ iter->cpu = cpumask_next(iter->cpu, cpu_online_mask);
+ if (iter->cpu >= nr_cpu_ids) {
#ifdef CONFIG_GENERIC_CLOCKEVENTS
- if (!iter->second_pass) {
- iter->cpu = -1;
- iter->second_pass = true;
- } else
- return NULL;
+ if (!iter->second_pass) {
+ iter->cpu = -1;
+ iter->second_pass = true;
+ } else
+ return NULL;
#else
- return NULL;
+ return NULL;
#endif
+ }
}
return iter;
}
+static void *timer_list_start(struct seq_file *file, loff_t *offset)
+{
+ struct timer_list_iter *iter = file->private;
+
+ if (!*offset)
+ iter->now = ktime_to_ns(ktime_get());
+ iter->cpu = -1;
+ iter->second_pass = false;
+ return move_iter(iter, *offset);
+}
+
static void *timer_list_next(struct seq_file *file, void *v, loff_t *offset)
{
struct timer_list_iter *iter = file->private;
- iter->cpu = cpumask_next(iter->cpu, cpu_online_mask);
++*offset;
- return timer_list_start(file, offset);
+ return move_iter(iter, 1);
}
static void timer_list_stop(struct seq_file *seq, void *v)
OpenPOWER on IntegriCloud