summaryrefslogtreecommitdiffstats
path: root/drivers/cpuidle/governors/menu.c
diff options
context:
space:
mode:
authorLinus Torvalds <torvalds@linux-foundation.org>2018-10-23 10:28:21 +0100
committerLinus Torvalds <torvalds@linux-foundation.org>2018-10-23 10:28:21 +0100
commit12dd08fa954fb7c327382ead3bb9ac861f9b9b69 (patch)
tree86467d314ad13181fc5b053ec7cd48ce4ee183fa /drivers/cpuidle/governors/menu.c
parent72f86d080560d77069dd67b067a69578731320e4 (diff)
parentcc19b05e3883efbbfd525747a86d0567b12c4d12 (diff)
downloadop-kernel-dev-12dd08fa954fb7c327382ead3bb9ac861f9b9b69.zip
op-kernel-dev-12dd08fa954fb7c327382ead3bb9ac861f9b9b69.tar.gz
Merge tag 'pm-4.20-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm
Pull power management updates from Rafael Wysocki: "These make hibernation on 32-bit x86 systems work in all of the cases in which it works on 64-bit x86 ones, update the menu cpuidle governor and the "polling" state to make them more efficient, add more hardware support to cpufreq drivers and fix issues with some of them, fix a bug in the conservative cpufreq governor, fix the operating performance points (OPP) framework and make it more stable, update the devfreq subsystem to take changes in the APIs used by into account and clean up some things all over. Specifics: - Backport hibernation bug fixes from x86-64 to x86-32 and consolidate hibernation handling on x86 to allow 32-bit systems to work in all of the cases in which 64-bit ones work (Zhimin Gu, Chen Yu). - Fix hibernation documentation (Vladimir D. Seleznev). - Update the menu cpuidle governor to fix a couple of issues with it, make it more efficient in some cases and clean it up (Rafael Wysocki). - Rework the cpuidle polling state implementation to make it more efficient (Rafael Wysocki). - Clean up the cpuidle core somewhat (Fieah Lim). - Fix the cpufreq conservative governor to take policy limits into account properly in some cases (Rafael Wysocki). - Add support for retrieving guaranteed performance information to the ACPI CPPC library and make the intel_pstate driver use it to expose the CPU base frequency via sysfs on systems with the hardware-managed P-states (HWP) feature enabled (Srinivas Pandruvada). - Fix clang warning in the CPPC cpufreq driver (Nathan Chancellor). - Get rid of device_node.name printing from cpufreq (Rob Herring). - Remove unnecessary unlikely() from the cpufreq core (Igor Stoppa). - Add support for the r8a7744 SoC to the cpufreq-dt driver (Biju Das). - Update the dt-platdev cpufreq driver to allow RK3399 to have separate tunables per cluster (Dmitry Torokhov). - Fix the dma_alloc_coherent() usage in the tegra186 cpufreq driver (Christoph Hellwig). - Make the imx6q cpufreq driver read OCOTP through nvmem for imx6ul/imx6ull (Anson Huang). - Fix several bugs in the operating performance points (OPP) framework and make it more stable (Viresh Kumar, Dave Gerlach). - Update the devfreq subsystem to take changes in the APIs used by into account, fix some issues with it and make it stop print device_node.name directly (Bjorn Andersson, Enric Balletbo i Serra, Matthias Kaehlcke, Rob Herring, Vincent Donnefort, zhong jiang). - Prepare the generic power domains (genpd) framework for dealing with domains containing CPUs (Ulf Hansson). - Prevent sysfs attributes representing low-power S0 residency counters from being exposed if low-power S0 support is not indicated in ACPI FADT (Rajneesh Bhardwaj). - Get rid of custom CPU features macros for Intel CPUs from the intel_idle and RAPL drivers (Andy Shevchenko). - Update the tasks freezer to list tasks that refused to freeze and caused a system transition to a sleep state to be aborted (Todd Brandt). - Update the pm-graph set of tools to v5.2 (Todd Brandt). - Fix some issues in the cpupower utility (Anders Roxell, Prarit Bhargava)" * tag 'pm-4.20-rc1' of git://git.kernel.org/pub/scm/linux/kernel/git/rafael/linux-pm: (73 commits) PM / Domains: Document flags for genpd PM / Domains: Deal with multiple states but no governor in genpd PM / Domains: Don't treat zero found compatible idle states as an error cpuidle: menu: Avoid computations when result will be discarded cpuidle: menu: Drop redundant comparison cpufreq: tegra186: don't pass GFP_DMA32 to dma_alloc_coherent() cpufreq: conservative: Take limits changes into account properly Documentation: intel_pstate: Add base_frequency information cpufreq: intel_pstate: Add base_frequency attribute ACPI / CPPC: Add support for guaranteed performance cpuidle: menu: Simplify checks related to the polling state PM / tools: sleepgraph and bootgraph: upgrade to v5.2 PM / tools: sleepgraph: first batch of v5.2 changes cpupower: Fix coredump on VMWare cpupower: Fix AMD Family 0x17 msr_pstate size cpufreq: imx6q: read OCOTP through nvmem for imx6ul/imx6ull cpufreq: dt-platdev: allow RK3399 to have separate tunables per cluster cpuidle: poll_state: Revise loop termination condition cpuidle: menu: Move the latency_req == 0 special case check cpuidle: menu: Avoid computations for very close timers ...
Diffstat (limited to 'drivers/cpuidle/governors/menu.c')
-rw-r--r--drivers/cpuidle/governors/menu.c114
1 files changed, 65 insertions, 49 deletions
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index e26a409..575a68f 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -124,7 +124,6 @@ struct menu_device {
int tick_wakeup;
unsigned int next_timer_us;
- unsigned int predicted_us;
unsigned int bucket;
unsigned int correction_factor[BUCKETS];
unsigned int intervals[INTERVALS];
@@ -197,10 +196,11 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev);
* of points is below a threshold. If it is... then use the
* average of these 8 points as the estimated value.
*/
-static unsigned int get_typical_interval(struct menu_device *data)
+static unsigned int get_typical_interval(struct menu_device *data,
+ unsigned int predicted_us)
{
int i, divisor;
- unsigned int max, thresh, avg;
+ unsigned int min, max, thresh, avg;
uint64_t sum, variance;
thresh = UINT_MAX; /* Discard outliers above this value */
@@ -208,6 +208,7 @@ static unsigned int get_typical_interval(struct menu_device *data)
again:
/* First calculate the average of past intervals */
+ min = UINT_MAX;
max = 0;
sum = 0;
divisor = 0;
@@ -218,8 +219,19 @@ again:
divisor++;
if (value > max)
max = value;
+
+ if (value < min)
+ min = value;
}
}
+
+ /*
+ * If the result of the computation is going to be discarded anyway,
+ * avoid the computation altogether.
+ */
+ if (min >= predicted_us)
+ return UINT_MAX;
+
if (divisor == INTERVALS)
avg = sum >> INTERVAL_SHIFT;
else
@@ -286,10 +298,9 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
struct menu_device *data = this_cpu_ptr(&menu_devices);
int latency_req = cpuidle_governor_latency_req(dev->cpu);
int i;
- int first_idx;
int idx;
unsigned int interactivity_req;
- unsigned int expected_interval;
+ unsigned int predicted_us;
unsigned long nr_iowaiters, cpu_load;
ktime_t delta_next;
@@ -298,50 +309,36 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
data->needs_update = 0;
}
- /* Special case when user has set very strict latency requirement */
- if (unlikely(latency_req == 0)) {
- *stop_tick = false;
- return 0;
- }
-
/* determine the expected residency time, round up */
data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length(&delta_next));
get_iowait_load(&nr_iowaiters, &cpu_load);
data->bucket = which_bucket(data->next_timer_us, nr_iowaiters);
+ if (unlikely(drv->state_count <= 1 || latency_req == 0) ||
+ ((data->next_timer_us < drv->states[1].target_residency ||
+ latency_req < drv->states[1].exit_latency) &&
+ !drv->states[0].disabled && !dev->states_usage[0].disable)) {
+ /*
+ * In this case state[0] will be used no matter what, so return
+ * it right away and keep the tick running.
+ */
+ *stop_tick = false;
+ return 0;
+ }
+
/*
* Force the result of multiplication to be 64 bits even if both
* operands are 32 bits.
* Make sure to round up for half microseconds.
*/
- data->predicted_us = DIV_ROUND_CLOSEST_ULL((uint64_t)data->next_timer_us *
+ predicted_us = DIV_ROUND_CLOSEST_ULL((uint64_t)data->next_timer_us *
data->correction_factor[data->bucket],
RESOLUTION * DECAY);
-
- expected_interval = get_typical_interval(data);
- expected_interval = min(expected_interval, data->next_timer_us);
-
- first_idx = 0;
- if (drv->states[0].flags & CPUIDLE_FLAG_POLLING) {
- struct cpuidle_state *s = &drv->states[1];
- unsigned int polling_threshold;
-
- /*
- * Default to a physical idle state, not to busy polling, unless
- * a timer is going to trigger really really soon.
- */
- polling_threshold = max_t(unsigned int, 20, s->target_residency);
- if (data->next_timer_us > polling_threshold &&
- latency_req > s->exit_latency && !s->disabled &&
- !dev->states_usage[1].disable)
- first_idx = 1;
- }
-
/*
* Use the lowest expected idle interval to pick the idle state.
*/
- data->predicted_us = min(data->predicted_us, expected_interval);
+ predicted_us = min(predicted_us, get_typical_interval(data, predicted_us));
if (tick_nohz_tick_stopped()) {
/*
@@ -352,34 +349,46 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* the known time till the closest timer event for the idle
* state selection.
*/
- if (data->predicted_us < TICK_USEC)
- data->predicted_us = ktime_to_us(delta_next);
+ if (predicted_us < TICK_USEC)
+ predicted_us = ktime_to_us(delta_next);
} else {
/*
* Use the performance multiplier and the user-configurable
* latency_req to determine the maximum exit latency.
*/
- interactivity_req = data->predicted_us / performance_multiplier(nr_iowaiters, cpu_load);
+ interactivity_req = predicted_us / performance_multiplier(nr_iowaiters, cpu_load);
if (latency_req > interactivity_req)
latency_req = interactivity_req;
}
- expected_interval = data->predicted_us;
/*
* Find the idle state with the lowest power while satisfying
* our constraints.
*/
idx = -1;
- for (i = first_idx; i < drv->state_count; i++) {
+ for (i = 0; i < drv->state_count; i++) {
struct cpuidle_state *s = &drv->states[i];
struct cpuidle_state_usage *su = &dev->states_usage[i];
if (s->disabled || su->disable)
continue;
+
if (idx == -1)
idx = i; /* first enabled state */
- if (s->target_residency > data->predicted_us) {
- if (data->predicted_us < TICK_USEC)
+
+ if (s->target_residency > predicted_us) {
+ /*
+ * Use a physical idle state, not busy polling, unless
+ * a timer is going to trigger soon enough.
+ */
+ if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) &&
+ s->exit_latency <= latency_req &&
+ s->target_residency <= data->next_timer_us) {
+ predicted_us = s->target_residency;
+ idx = i;
+ break;
+ }
+ if (predicted_us < TICK_USEC)
break;
if (!tick_nohz_tick_stopped()) {
@@ -389,7 +398,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* tick in that case and let the governor run
* again in the next iteration of the loop.
*/
- expected_interval = drv->states[idx].target_residency;
+ predicted_us = drv->states[idx].target_residency;
break;
}
@@ -403,7 +412,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
s->target_residency <= ktime_to_us(delta_next))
idx = i;
- goto out;
+ return idx;
}
if (s->exit_latency > latency_req) {
/*
@@ -412,7 +421,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* expected idle duration so that the tick is retained
* as long as that target residency is low enough.
*/
- expected_interval = drv->states[idx].target_residency;
+ predicted_us = drv->states[idx].target_residency;
break;
}
idx = i;
@@ -426,7 +435,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
* expected idle duration is shorter than the tick period length.
*/
if (((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) ||
- expected_interval < TICK_USEC) && !tick_nohz_tick_stopped()) {
+ predicted_us < TICK_USEC) && !tick_nohz_tick_stopped()) {
unsigned int delta_next_us = ktime_to_us(delta_next);
*stop_tick = false;
@@ -450,10 +459,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
}
}
-out:
- data->last_state_idx = idx;
-
- return data->last_state_idx;
+ return idx;
}
/**
@@ -512,9 +518,19 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
* duration predictor do a better job next time.
*/
measured_us = 9 * MAX_INTERESTING / 10;
+ } else if ((drv->states[last_idx].flags & CPUIDLE_FLAG_POLLING) &&
+ dev->poll_time_limit) {
+ /*
+ * The CPU exited the "polling" state due to a time limit, so
+ * the idle duration prediction leading to the selection of that
+ * state was inaccurate. If a better prediction had been made,
+ * the CPU might have been woken up from idle by the next timer.
+ * Assume that to be the case.
+ */
+ measured_us = data->next_timer_us;
} else {
/* measured value */
- measured_us = cpuidle_get_last_residency(dev);
+ measured_us = dev->last_residency;
/* Deduct exit latency */
if (measured_us > 2 * target->exit_latency)
OpenPOWER on IntegriCloud