From 9fd0c40451468754754271ba0cbb63b6927911df Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Thu, 3 May 2018 10:09:10 +0100
Subject: cpupower: fix spelling mistake: "logilename" -> "logfilename"

Trivial fix to spelling mistake in dprintf message

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Signed-off-by: Shuah Khan (Samsung OSG) <shuah@kernel.org>
---
 tools/power/cpupower/bench/parse.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/power/cpupower/bench/parse.c b/tools/power/cpupower/bench/parse.c
index 9b65f05..9ba8a44 100644
--- a/tools/power/cpupower/bench/parse.c
+++ b/tools/power/cpupower/bench/parse.c
@@ -104,7 +104,7 @@ FILE *prepare_output(const char *dirname)
 			dirname, time(NULL));
 	}
 
-	dprintf("logilename: %s\n", filename);
+	dprintf("logfilename: %s\n", filename);
 
 	output = fopen(filename, "w+");
 	if (output == NULL) {
-- 
cgit v1.1


From f9652d5cae04eb5e85303c087f5842d320499c65 Mon Sep 17 00:00:00 2001
From: Abhishek Goel <huntbag@linux.vnet.ibm.com>
Date: Mon, 28 May 2018 06:03:03 -0500
Subject: cpupower : Fix header name to read idle state name

The names of the idle states in the output of cpupower monitor command are
truncated to 4 characters. On POWER9, this creates ambiguity as the states
are named "stop0", "stop1", etc.

root:~# cpupower monitor
              |Idle_Stats
PKG |CORE|CPU | snoo | stop | stop | stop | stop | stop | stop
   0|   0|   0|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00|  1.90
   0|   0|   1|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00
   0|   0|   2|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00
   0|   0|   3|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00

This patch modifies the output to print the state name that results in a
legible output. The names will be printed with atmost 1 padding in left.

root:~# cpupower monitor
              | Idle_Stats
 PKG|CORE| CPU|snooze|stop0L| stop0|stop1L| stop1|stop2L| stop2
   0|   0|   0|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00|  0.72
   0|   0|   1|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00
   0|   0|   2|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00
   0|   0|   3|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00|  0.00

This patch does not affect the output for intel.
Output for intel before applying the patch:

root:~# cpupower monitor
    |Idle_Stats
CPU | POLL | C1-S | C1E- | C3-S | C6-S | C7s- | C8-S | C9-S | C10-
   0|  0.00|  0.14|  0.39|  0.35|  7.41|  0.00| 17.67|  1.01| 70.03
   2|  0.00|  0.19|  0.47|  0.10|  6.50|  0.00| 29.66|  2.17| 58.07
   1|  0.00|  0.11|  0.50|  1.50|  9.11|  0.18| 18.19|  0.40| 66.63
   3|  0.00|  0.67|  0.42|  0.03|  5.84|  0.00| 12.58|  0.77| 77.14

Output for intel after applying the patch:

root:~# cpupower monitor
    | Idle_Stats
 CPU| POLL | C1-S | C1E- | C3-S | C6-S | C7s- | C8-S | C9-S | C10-
   0|  0.03|  0.33|  1.01|  0.27|  3.03|  0.00| 19.18|  0.00| 71.24
   2|  0.00|  1.58|  0.58|  0.42|  8.55|  0.09| 21.11|  0.99| 63.32
   1|  0.00|  1.26|  0.88|  0.43|  9.00|  0.02|  7.78|  4.65| 71.91
   3|  0.00|  0.30|  0.42|  0.06| 13.62|  0.21| 30.29|  0.00| 52.45

Signed-off-by: Abhishek Goel <huntbag@linux.vnet.ibm.com>
Signed-off-by: Shuah Khan (Samsung OSG) <shuah@kernel.org>
---
 .../cpupower/utils/idle_monitor/cpuidle_sysfs.c    | 15 ++++++++++
 .../cpupower/utils/idle_monitor/cpupower-monitor.c | 35 ++++++++++++----------
 .../cpupower/utils/idle_monitor/cpupower-monitor.h |  9 ++++++
 3 files changed, 44 insertions(+), 15 deletions(-)

diff --git a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
index 5b3205f..5b8c495 100644
--- a/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
+++ b/tools/power/cpupower/utils/idle_monitor/cpuidle_sysfs.c
@@ -126,6 +126,20 @@ void fix_up_intel_idle_driver_name(char *tmp, int num)
 	}
 }
 
+#ifdef __powerpc__
+void map_power_idle_state_name(char *tmp)
+{
+	if (!strncmp(tmp, "stop0_lite", CSTATE_NAME_LEN))
+		strcpy(tmp, "stop0L");
+	else if (!strncmp(tmp, "stop1_lite", CSTATE_NAME_LEN))
+		strcpy(tmp, "stop1L");
+	else if (!strncmp(tmp, "stop2_lite", CSTATE_NAME_LEN))
+		strcpy(tmp, "stop2L");
+}
+#else
+void map_power_idle_state_name(char *tmp) { }
+#endif
+
 static struct cpuidle_monitor *cpuidle_register(void)
 {
 	int num;
@@ -145,6 +159,7 @@ static struct cpuidle_monitor *cpuidle_register(void)
 		if (tmp == NULL)
 			continue;
 
+		map_power_idle_state_name(tmp);
 		fix_up_intel_idle_driver_name(tmp, num);
 		strncpy(cpuidle_cstates[num].name, tmp, CSTATE_NAME_LEN - 1);
 		free(tmp);
diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
index 05f953f..051da0a 100644
--- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
+++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.c
@@ -70,36 +70,43 @@ void print_n_spaces(int n)
 		printf(" ");
 }
 
-/* size of s must be at least n + 1 */
+/*s is filled with left and right spaces
+ *to make its length atleast n+1
+ */
 int fill_string_with_spaces(char *s, int n)
 {
+	char *temp;
 	int len = strlen(s);
-	if (len > n)
+
+	if (len >= n)
 		return -1;
+
+	temp = malloc(sizeof(char) * (n+1));
 	for (; len < n; len++)
 		s[len] = ' ';
 	s[len] = '\0';
+	snprintf(temp, n+1, " %s", s);
+	strcpy(s, temp);
+	free(temp);
 	return 0;
 }
 
+#define MAX_COL_WIDTH 6
 void print_header(int topology_depth)
 {
 	int unsigned mon;
 	int state, need_len;
 	cstate_t s;
 	char buf[128] = "";
-	int percent_width = 4;
 
 	fill_string_with_spaces(buf, topology_depth * 5 - 1);
 	printf("%s|", buf);
 
 	for (mon = 0; mon < avail_monitors; mon++) {
-		need_len = monitors[mon]->hw_states_num * (percent_width + 3)
+		need_len = monitors[mon]->hw_states_num * (MAX_COL_WIDTH + 1)
 			- 1;
-		if (mon != 0) {
-			printf("|| ");
-			need_len--;
-		}
+		if (mon != 0)
+			printf("||");
 		sprintf(buf, "%s", monitors[mon]->name);
 		fill_string_with_spaces(buf, need_len);
 		printf("%s", buf);
@@ -107,23 +114,21 @@ void print_header(int topology_depth)
 	printf("\n");
 
 	if (topology_depth > 2)
-		printf("PKG |");
+		printf(" PKG|");
 	if (topology_depth > 1)
 		printf("CORE|");
 	if (topology_depth > 0)
-		printf("CPU |");
+		printf(" CPU|");
 
 	for (mon = 0; mon < avail_monitors; mon++) {
 		if (mon != 0)
-			printf("|| ");
-		else
-			printf(" ");
+			printf("||");
 		for (state = 0; state < monitors[mon]->hw_states_num; state++) {
 			if (state != 0)
-				printf(" | ");
+				printf("|");
 			s = monitors[mon]->hw_states[state];
 			sprintf(buf, "%s", s.name);
-			fill_string_with_spaces(buf, percent_width);
+			fill_string_with_spaces(buf, MAX_COL_WIDTH);
 			printf("%s", buf);
 		}
 		printf(" ");
diff --git a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h
index 9e43f33..2ae50b4 100644
--- a/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h
+++ b/tools/power/cpupower/utils/idle_monitor/cpupower-monitor.h
@@ -15,7 +15,16 @@
 
 #define MONITORS_MAX 20
 #define MONITOR_NAME_LEN 20
+
+/* CSTATE_NAME_LEN is limited by header field width defined
+ * in cpupower-monitor.c. Header field width is defined to be
+ * sum of percent width and two spaces for padding.
+ */
+#ifdef __powerpc__
+#define CSTATE_NAME_LEN 7
+#else
 #define CSTATE_NAME_LEN 5
+#endif
 #define CSTATE_DESC_LEN 60
 
 int cpu_count;
-- 
cgit v1.1


From ac28927659bec665be97fc2c2dfc059f1f913fbb Mon Sep 17 00:00:00 2001
From: Arnd Bergmann <arnd@arndb.de>
Date: Tue, 5 Jun 2018 13:44:24 +0200
Subject: cpufreq: kryo: allow building as a loadable module

Building the kryo cpufreq driver while QCOM_SMEM is a loadable module
results in a link error:

drivers/cpufreq/qcom-cpufreq-kryo.o: In function `qcom_cpufreq_kryo_probe':
qcom-cpufreq-kryo.c:(.text+0xbc): undefined reference to `qcom_smem_get'

The problem is that Kconfig ignores interprets the dependency as met
when the dependent symbol is a 'bool' one. By making it 'tristate',
it will be forced to be a module here, which builds successfully.

Fixes: 46e2856b8e18 (cpufreq: Add Kryo CPU scaling driver)
Signed-off-by: Arnd Bergmann <arnd@arndb.de>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/Kconfig.arm | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpufreq/Kconfig.arm b/drivers/cpufreq/Kconfig.arm
index c7ce928..52f5f1a 100644
--- a/drivers/cpufreq/Kconfig.arm
+++ b/drivers/cpufreq/Kconfig.arm
@@ -125,7 +125,7 @@ config ARM_OMAP2PLUS_CPUFREQ
 	default ARCH_OMAP2PLUS
 
 config ARM_QCOM_CPUFREQ_KRYO
-	bool "Qualcomm Kryo based CPUFreq"
+	tristate "Qualcomm Kryo based CPUFreq"
 	depends on ARM64
 	depends on QCOM_QFPROM
 	depends on QCOM_SMEM
-- 
cgit v1.1


From 08e9cc4032626792b49009547454bcef44c2e12b Mon Sep 17 00:00:00 2001
From: Colin Ian King <colin.king@canonical.com>
Date: Fri, 1 Jun 2018 14:05:12 +0100
Subject: cpufreq: ACPI: make function acpi_cpufreq_fast_switch() static

The acpi_cpufreq_fast_switch() function is local to the source and
does not need to be in global scope, so make it static.

Cleans up sparse warning:
drivers/cpufreq/acpi-cpufreq.c:468:14: warning: symbol
'acpi_cpufreq_fast_switch' was not declared. Should it be static?

Signed-off-by: Colin Ian King <colin.king@canonical.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/acpi-cpufreq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c
index 9449657..32ba4bc 100644
--- a/drivers/cpufreq/acpi-cpufreq.c
+++ b/drivers/cpufreq/acpi-cpufreq.c
@@ -465,8 +465,8 @@ static int acpi_cpufreq_target(struct cpufreq_policy *policy,
 	return result;
 }
 
-unsigned int acpi_cpufreq_fast_switch(struct cpufreq_policy *policy,
-				      unsigned int target_freq)
+static unsigned int acpi_cpufreq_fast_switch(struct cpufreq_policy *policy,
+					     unsigned int target_freq)
 {
 	struct acpi_cpufreq_data *data = policy->driver_data;
 	struct acpi_processor_performance *perf;
-- 
cgit v1.1


From e5d295b06d69a1924665a16a4987be475addd00f Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Thu, 31 May 2018 17:21:43 -0500
Subject: cpufreq: ti-cpufreq: Fix an incorrect error return value

Commit 05829d9431df (cpufreq: ti-cpufreq: kfree opp_data when
failure) has fixed a memory leak in the failure path, however
the patch returned a positive value on get_cpu_device() failure
instead of the previous negative value. Fix this incorrect error
return value properly.

Fixes: 05829d9431df (cpufreq: ti-cpufreq: kfree opp_data when failure)
Cc: 4.14+ <stable@vger.kernel.org> # v4.14+
Signed-off-by: Suman Anna <s-anna@ti.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/ti-cpufreq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpufreq/ti-cpufreq.c b/drivers/cpufreq/ti-cpufreq.c
index 6ba709b..896caba 100644
--- a/drivers/cpufreq/ti-cpufreq.c
+++ b/drivers/cpufreq/ti-cpufreq.c
@@ -226,7 +226,7 @@ static int ti_cpufreq_probe(struct platform_device *pdev)
 	opp_data->cpu_dev = get_cpu_device(0);
 	if (!opp_data->cpu_dev) {
 		pr_err("%s: Failed to get device for CPU0\n", __func__);
-		ret = ENODEV;
+		ret = -ENODEV;
 		goto free_opp_data;
 	}
 
-- 
cgit v1.1


From d7231f993ad4081da2c2784e2692617e2bd0551e Mon Sep 17 00:00:00 2001
From: Suman Anna <s-anna@ti.com>
Date: Thu, 31 May 2018 17:21:44 -0500
Subject: cpufreq: ti-cpufreq: Use devres managed API in probe()

The ti_cpufreq_probe() function uses regular kzalloc to allocate
the ti_cpufreq_data structure and kfree for freeing this memory
on failures. Simplify this code by using the devres managed
API.

Signed-off-by: Suman Anna <s-anna@ti.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/ti-cpufreq.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/cpufreq/ti-cpufreq.c b/drivers/cpufreq/ti-cpufreq.c
index 896caba..3f0e2a1 100644
--- a/drivers/cpufreq/ti-cpufreq.c
+++ b/drivers/cpufreq/ti-cpufreq.c
@@ -217,7 +217,7 @@ static int ti_cpufreq_probe(struct platform_device *pdev)
 	if (!match)
 		return -ENODEV;
 
-	opp_data = kzalloc(sizeof(*opp_data), GFP_KERNEL);
+	opp_data = devm_kzalloc(&pdev->dev, sizeof(*opp_data), GFP_KERNEL);
 	if (!opp_data)
 		return -ENOMEM;
 
@@ -226,8 +226,7 @@ static int ti_cpufreq_probe(struct platform_device *pdev)
 	opp_data->cpu_dev = get_cpu_device(0);
 	if (!opp_data->cpu_dev) {
 		pr_err("%s: Failed to get device for CPU0\n", __func__);
-		ret = -ENODEV;
-		goto free_opp_data;
+		return -ENODEV;
 	}
 
 	opp_data->opp_node = dev_pm_opp_of_get_opp_desc_node(opp_data->cpu_dev);
@@ -285,8 +284,6 @@ register_cpufreq_dt:
 
 fail_put_node:
 	of_node_put(opp_data->opp_node);
-free_opp_data:
-	kfree(opp_data);
 
 	return ret;
 }
-- 
cgit v1.1


From e0efd5be63e821066b5e6325cf237eb41367552f Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Tue, 5 Jun 2018 14:42:39 -0700
Subject: cpufreq: intel_pstate: Add HWP boost utility and sched util hooks

Added two utility functions to HWP boost up gradually and boost down to
the default cached HWP request values.

Boost up:
Boost up updates HWP request minimum value in steps. This minimum value
can reach upto at HWP request maximum values depends on how frequently,
this boost up function is called. At max, boost up will take three steps
to reach the maximum, depending on the current HWP request levels and HWP
capabilities. For example, if the current settings are:
If P0 (Turbo max) = P1 (Guaranteed max) = min
        No boost at all.
If P0 (Turbo max) > P1 (Guaranteed max) = min
        Should result in one level boost only for P0.
If P0 (Turbo max) = P1 (Guaranteed max) > min
        Should result in two level boost:
                (min + p1)/2 and P1.
If P0 (Turbo max) > P1 (Guaranteed max) > min
        Should result in three level boost:
                (min + p1)/2, P1 and P0.
We don't set any level between P0 and P1 as there is no guarantee that
they will be honored.

Boost down:
After the system is idle for hold time of 3ms, the HWP request is reset
to the default value from HWP init or user modified one via sysfs.

Caching of HWP Request and Capabilities
Store the HWP request value last set using MSR_HWP_REQUEST and read
MSR_HWP_CAPABILITIES. This avoid reading of MSRs in the boost utility
functions.

These boost utility functions calculated limits are based on the latest
HWP request value, which can be modified by setpolicy() callback. So if
user space modifies the minimum perf value, that will be accounted for
every time the boost up is called. There will be case when there can be
contention with the user modified minimum perf, in that case user value
will gain precedence. For example just before HWP_REQUEST MSR is updated
from setpolicy() callback, the boost up function is called via scheduler
tick callback. Here the cached MSR value is already the latest and limits
are updated based on the latest user limits, but on return the MSR write
callback called from setpolicy() callback will update the HWP_REQUEST
value. This will be used till next time the boost up function is called.

In addition add a variable to control HWP dynamic boosting. When HWP
dynamic boost is active then set the HWP specific update util hook. The
contents in the utility hooks will be filled in the subsequent patches.

Reported-by: Mel Gorman <mgorman@techsingularity.net>
Tested-by: Giovanni Gherdovich <ggherdovich@suse.cz>
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 100 +++++++++++++++++++++++++++++++++++++++--
 1 file changed, 97 insertions(+), 3 deletions(-)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 08960a5..3949e38 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -221,6 +221,9 @@ struct global_params {
  *			preference/bias
  * @epp_saved:		Saved EPP/EPB during system suspend or CPU offline
  *			operation
+ * @hwp_req_cached:	Cached value of the last HWP Request MSR
+ * @hwp_cap_cached:	Cached value of the last HWP Capabilities MSR
+ * @hwp_boost_min:	Last HWP boosted min performance
  *
  * This structure stores per CPU instance data for all CPUs.
  */
@@ -253,6 +256,9 @@ struct cpudata {
 	s16 epp_policy;
 	s16 epp_default;
 	s16 epp_saved;
+	u64 hwp_req_cached;
+	u64 hwp_cap_cached;
+	u32 hwp_boost_min;
 };
 
 static struct cpudata **all_cpu_data;
@@ -285,6 +291,7 @@ static struct pstate_funcs pstate_funcs __read_mostly;
 
 static int hwp_active __read_mostly;
 static bool per_cpu_limits __read_mostly;
+static bool hwp_boost __read_mostly;
 
 static struct cpufreq_driver *intel_pstate_driver __read_mostly;
 
@@ -689,6 +696,7 @@ static void intel_pstate_get_hwp_max(unsigned int cpu, int *phy_max,
 	u64 cap;
 
 	rdmsrl_on_cpu(cpu, MSR_HWP_CAPABILITIES, &cap);
+	WRITE_ONCE(all_cpu_data[cpu]->hwp_cap_cached, cap);
 	if (global.no_turbo)
 		*current_max = HWP_GUARANTEED_PERF(cap);
 	else
@@ -763,6 +771,7 @@ update_epp:
 		intel_pstate_set_epb(cpu, epp);
 	}
 skip_epp:
+	WRITE_ONCE(cpu_data->hwp_req_cached, value);
 	wrmsrl_on_cpu(cpu, MSR_HWP_REQUEST, value);
 }
 
@@ -1381,6 +1390,81 @@ static void intel_pstate_get_cpu_pstates(struct cpudata *cpu)
 	intel_pstate_set_min_pstate(cpu);
 }
 
+/*
+ * Long hold time will keep high perf limits for long time,
+ * which negatively impacts perf/watt for some workloads,
+ * like specpower. 3ms is based on experiements on some
+ * workoads.
+ */
+static int hwp_boost_hold_time_ns = 3 * NSEC_PER_MSEC;
+
+static inline void intel_pstate_hwp_boost_up(struct cpudata *cpu)
+{
+	u64 hwp_req = READ_ONCE(cpu->hwp_req_cached);
+	u32 max_limit = (hwp_req & 0xff00) >> 8;
+	u32 min_limit = (hwp_req & 0xff);
+	u32 boost_level1;
+
+	/*
+	 * Cases to consider (User changes via sysfs or boot time):
+	 * If, P0 (Turbo max) = P1 (Guaranteed max) = min:
+	 *	No boost, return.
+	 * If, P0 (Turbo max) > P1 (Guaranteed max) = min:
+	 *     Should result in one level boost only for P0.
+	 * If, P0 (Turbo max) = P1 (Guaranteed max) > min:
+	 *     Should result in two level boost:
+	 *         (min + p1)/2 and P1.
+	 * If, P0 (Turbo max) > P1 (Guaranteed max) > min:
+	 *     Should result in three level boost:
+	 *        (min + p1)/2, P1 and P0.
+	 */
+
+	/* If max and min are equal or already at max, nothing to boost */
+	if (max_limit == min_limit || cpu->hwp_boost_min >= max_limit)
+		return;
+
+	if (!cpu->hwp_boost_min)
+		cpu->hwp_boost_min = min_limit;
+
+	/* level at half way mark between min and guranteed */
+	boost_level1 = (HWP_GUARANTEED_PERF(cpu->hwp_cap_cached) + min_limit) >> 1;
+
+	if (cpu->hwp_boost_min < boost_level1)
+		cpu->hwp_boost_min = boost_level1;
+	else if (cpu->hwp_boost_min < HWP_GUARANTEED_PERF(cpu->hwp_cap_cached))
+		cpu->hwp_boost_min = HWP_GUARANTEED_PERF(cpu->hwp_cap_cached);
+	else if (cpu->hwp_boost_min == HWP_GUARANTEED_PERF(cpu->hwp_cap_cached) &&
+		 max_limit != HWP_GUARANTEED_PERF(cpu->hwp_cap_cached))
+		cpu->hwp_boost_min = max_limit;
+	else
+		return;
+
+	hwp_req = (hwp_req & ~GENMASK_ULL(7, 0)) | cpu->hwp_boost_min;
+	wrmsrl(MSR_HWP_REQUEST, hwp_req);
+	cpu->last_update = cpu->sample.time;
+}
+
+static inline void intel_pstate_hwp_boost_down(struct cpudata *cpu)
+{
+	if (cpu->hwp_boost_min) {
+		bool expired;
+
+		/* Check if we are idle for hold time to boost down */
+		expired = time_after64(cpu->sample.time, cpu->last_update +
+				       hwp_boost_hold_time_ns);
+		if (expired) {
+			wrmsrl(MSR_HWP_REQUEST, cpu->hwp_req_cached);
+			cpu->hwp_boost_min = 0;
+		}
+	}
+	cpu->last_update = cpu->sample.time;
+}
+
+static inline void intel_pstate_update_util_hwp(struct update_util_data *data,
+						u64 time, unsigned int flags)
+{
+}
+
 static inline void intel_pstate_calc_avg_perf(struct cpudata *cpu)
 {
 	struct sample *sample = &cpu->sample;
@@ -1684,7 +1768,7 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
 {
 	struct cpudata *cpu = all_cpu_data[cpu_num];
 
-	if (hwp_active)
+	if (hwp_active && !hwp_boost)
 		return;
 
 	if (cpu->update_util_set)
@@ -1693,7 +1777,9 @@ static void intel_pstate_set_update_util_hook(unsigned int cpu_num)
 	/* Prevent intel_pstate_update_util() from using stale data. */
 	cpu->sample.time = 0;
 	cpufreq_add_update_util_hook(cpu_num, &cpu->update_util,
-				     intel_pstate_update_util);
+				     (hwp_active ?
+				      intel_pstate_update_util_hwp :
+				      intel_pstate_update_util));
 	cpu->update_util_set = true;
 }
 
@@ -1805,8 +1891,16 @@ static int intel_pstate_set_policy(struct cpufreq_policy *policy)
 		intel_pstate_set_update_util_hook(policy->cpu);
 	}
 
-	if (hwp_active)
+	if (hwp_active) {
+		/*
+		 * When hwp_boost was active before and dynamically it
+		 * was turned off, in that case we need to clear the
+		 * update util hook.
+		 */
+		if (!hwp_boost)
+			intel_pstate_clear_update_util_hook(policy->cpu);
 		intel_pstate_hwp_set(policy->cpu);
+	}
 
 	mutex_unlock(&intel_pstate_limits_lock);
 
-- 
cgit v1.1


From 52ccc4314293272397b117f3cc6f0f368c81431c Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Tue, 5 Jun 2018 14:42:40 -0700
Subject: cpufreq: intel_pstate: HWP boost performance on IO wakeup

This change uses SCHED_CPUFREQ_IOWAIT flag to boost HWP performance.
Since SCHED_CPUFREQ_IOWAIT flag is set frequently, we don't start
boosting steps unless we see two consecutive flags in two ticks. This
avoids boosting due to IO because of regular system activities.

To avoid synchronization issues, the actual processing of the flag is
done on the local CPU callback.

Reported-by: Mel Gorman <mgorman@techsingularity.net>
Tested-by: Giovanni Gherdovich <ggherdovich@suse.cz>
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 39 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 39 insertions(+)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 3949e38..5b2b6b6 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -223,6 +223,8 @@ struct global_params {
  *			operation
  * @hwp_req_cached:	Cached value of the last HWP Request MSR
  * @hwp_cap_cached:	Cached value of the last HWP Capabilities MSR
+ * @last_io_update:	Last time when IO wake flag was set
+ * @sched_flags:	Store scheduler flags for possible cross CPU update
  * @hwp_boost_min:	Last HWP boosted min performance
  *
  * This structure stores per CPU instance data for all CPUs.
@@ -258,6 +260,8 @@ struct cpudata {
 	s16 epp_saved;
 	u64 hwp_req_cached;
 	u64 hwp_cap_cached;
+	u64 last_io_update;
+	unsigned int sched_flags;
 	u32 hwp_boost_min;
 };
 
@@ -1460,9 +1464,44 @@ static inline void intel_pstate_hwp_boost_down(struct cpudata *cpu)
 	cpu->last_update = cpu->sample.time;
 }
 
+static inline void intel_pstate_update_util_hwp_local(struct cpudata *cpu,
+						      u64 time)
+{
+	cpu->sample.time = time;
+
+	if (cpu->sched_flags & SCHED_CPUFREQ_IOWAIT) {
+		bool do_io = false;
+
+		cpu->sched_flags = 0;
+		/*
+		 * Set iowait_boost flag and update time. Since IO WAIT flag
+		 * is set all the time, we can't just conclude that there is
+		 * some IO bound activity is scheduled on this CPU with just
+		 * one occurrence. If we receive at least two in two
+		 * consecutive ticks, then we treat as boost candidate.
+		 */
+		if (time_before64(time, cpu->last_io_update + 2 * TICK_NSEC))
+			do_io = true;
+
+		cpu->last_io_update = time;
+
+		if (do_io)
+			intel_pstate_hwp_boost_up(cpu);
+
+	} else {
+		intel_pstate_hwp_boost_down(cpu);
+	}
+}
+
 static inline void intel_pstate_update_util_hwp(struct update_util_data *data,
 						u64 time, unsigned int flags)
 {
+	struct cpudata *cpu = container_of(data, struct cpudata, update_util);
+
+	cpu->sched_flags |= flags;
+
+	if (smp_processor_id() == cpu->cpu)
+		intel_pstate_update_util_hwp_local(cpu, time);
 }
 
 static inline void intel_pstate_calc_avg_perf(struct cpudata *cpu)
-- 
cgit v1.1


From aaaece3de9d7709d79004dd5d5aa7c9b366f0675 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Tue, 5 Jun 2018 14:42:41 -0700
Subject: cpufreq: intel_pstate: New sysfs entry to control HWP boost

A new attribute is added to intel_pstate sysfs to enable/disable
HWP dynamic performance boost.

Reported-by: Mel Gorman <mgorman@techsingularity.net>
Tested-by: Giovanni Gherdovich <ggherdovich@suse.cz>
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 30 ++++++++++++++++++++++++++++++
 1 file changed, 30 insertions(+)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 5b2b6b6..70bf63b 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1033,6 +1033,30 @@ static ssize_t store_min_perf_pct(struct kobject *a, struct attribute *b,
 	return count;
 }
 
+static ssize_t show_hwp_dynamic_boost(struct kobject *kobj,
+				struct attribute *attr, char *buf)
+{
+	return sprintf(buf, "%u\n", hwp_boost);
+}
+
+static ssize_t store_hwp_dynamic_boost(struct kobject *a, struct attribute *b,
+				       const char *buf, size_t count)
+{
+	unsigned int input;
+	int ret;
+
+	ret = kstrtouint(buf, 10, &input);
+	if (ret)
+		return ret;
+
+	mutex_lock(&intel_pstate_driver_lock);
+	hwp_boost = !!input;
+	intel_pstate_update_policies();
+	mutex_unlock(&intel_pstate_driver_lock);
+
+	return count;
+}
+
 show_one(max_perf_pct, max_perf_pct);
 show_one(min_perf_pct, min_perf_pct);
 
@@ -1042,6 +1066,7 @@ define_one_global_rw(max_perf_pct);
 define_one_global_rw(min_perf_pct);
 define_one_global_ro(turbo_pct);
 define_one_global_ro(num_pstates);
+define_one_global_rw(hwp_dynamic_boost);
 
 static struct attribute *intel_pstate_attributes[] = {
 	&status.attr,
@@ -1082,6 +1107,11 @@ static void __init intel_pstate_sysfs_expose_params(void)
 	rc = sysfs_create_file(intel_pstate_kobject, &min_perf_pct.attr);
 	WARN_ON(rc);
 
+	if (hwp_active) {
+		rc = sysfs_create_file(intel_pstate_kobject,
+				       &hwp_dynamic_boost.attr);
+		WARN_ON(rc);
+	}
 }
 /************************** sysfs end ************************/
 
-- 
cgit v1.1


From 657c292ce1bb67b1e61cf927a2b6ea135fb700df Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 31 May 2018 12:59:55 +0200
Subject: PM / Domains: dt: Allow power-domain property to be a list of
 specifiers

To be able to describe topologies where devices are partitioned across
multiple power domains, let's extend the power-domain property to allow
being a list of PM domain specifiers.

Suggested-by: Jon Hunter <jonathanh@nvidia.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Rob Herring <robh@kernel.org>
Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 .../devicetree/bindings/power/power_domain.txt        | 19 ++++++++++++++-----
 1 file changed, 14 insertions(+), 5 deletions(-)

diff --git a/Documentation/devicetree/bindings/power/power_domain.txt b/Documentation/devicetree/bindings/power/power_domain.txt
index 4733f76..9b387f8 100644
--- a/Documentation/devicetree/bindings/power/power_domain.txt
+++ b/Documentation/devicetree/bindings/power/power_domain.txt
@@ -111,8 +111,8 @@ Example 3:
 ==PM domain consumers==
 
 Required properties:
- - power-domains : A phandle and PM domain specifier as defined by bindings of
-                   the power controller specified by phandle.
+ - power-domains : A list of PM domain specifiers, as defined by bindings of
+		the power controller that is the PM domain provider.
 
 Example:
 
@@ -122,9 +122,18 @@ Example:
 		power-domains = <&power 0>;
 	};
 
-The node above defines a typical PM domain consumer device, which is located
-inside a PM domain with index 0 of a power controller represented by a node
-with the label "power".
+	leaky-device@12351000 {
+		compatible = "foo,i-leak-current";
+		reg = <0x12351000 0x1000>;
+		power-domains = <&power 0>, <&power 1> ;
+	};
+
+The first example above defines a typical PM domain consumer device, which is
+located inside a PM domain with index 0 of a power controller represented by a
+node with the label "power".
+In the second example the consumer device are partitioned across two PM domains,
+the first with index 0 and the second with index 1, of a power controller that
+is represented by a node with the label "power.
 
 Optional properties:
 - required-opps: This contains phandle to an OPP node in another device's OPP
-- 
cgit v1.1


From bcd931f298d4a5660a4ff6f6629831d917a916d8 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 31 May 2018 12:59:56 +0200
Subject: PM / Domains: Don't attach devices in genpd with multi PM domains

The power-domain DT property may now contain a list of PM domain
specifiers, which represents that a device are partitioned across multiple
PM domains. This leads to a new situation in genpd_dev_pm_attach(), as only
one PM domain can be attached per device.

To remain things simple for the most common configuration, when a single PM
domain is used, let's treat the multiple PM domain case as being specific.

In other words, let's change genpd_dev_pm_attach() to check for multiple PM
domains and prevent it from attach any PM domain for this case. Instead,
leave this to be managed separately, from following changes to genpd.

Suggested-by: Jon Hunter <jonathanh@nvidia.com>
Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Jon Hunter <jonathanh@nvidia.com>
Tested-by: Jon Hunter <jonathanh@nvidia.com>
Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 18 +++++++++++++-----
 1 file changed, 13 insertions(+), 5 deletions(-)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 6f403d6..908c447 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -2229,10 +2229,10 @@ static void genpd_dev_pm_sync(struct device *dev)
  * attaches the device to retrieved pm_domain ops.
  *
  * Returns 1 on successfully attached PM domain, 0 when the device don't need a
- * PM domain or a negative error code in case of failures. Note that if a
- * power-domain exists for the device, but it cannot be found or turned on,
- * then return -EPROBE_DEFER to ensure that the device is not probed and to
- * re-try again later.
+ * PM domain or when multiple power-domains exists for it, else a negative error
+ * code. Note that if a power-domain exists for the device, but it cannot be
+ * found or turned on, then return -EPROBE_DEFER to ensure that the device is
+ * not probed and to re-try again later.
  */
 int genpd_dev_pm_attach(struct device *dev)
 {
@@ -2243,10 +2243,18 @@ int genpd_dev_pm_attach(struct device *dev)
 	if (!dev->of_node)
 		return 0;
 
+	/*
+	 * Devices with multiple PM domains must be attached separately, as we
+	 * can only attach one PM domain per device.
+	 */
+	if (of_count_phandle_with_args(dev->of_node, "power-domains",
+				       "#power-domain-cells") != 1)
+		return 0;
+
 	ret = of_parse_phandle_with_args(dev->of_node, "power-domains",
 					"#power-domain-cells", 0, &pd_args);
 	if (ret < 0)
-		return 0;
+		return ret;
 
 	mutex_lock(&gpd_list_lock);
 	pd = genpd_get_from_provider(&pd_args);
-- 
cgit v1.1


From 8cb1cbd644d5bba5b72eedd632f249c1c677b792 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 31 May 2018 12:59:57 +0200
Subject: PM / Domains: Split genpd_dev_pm_attach()

To extend genpd to deal with allowing multiple PM domains per device, some
of the code in genpd_dev_pm_attach() can be re-used. Let's prepare for this
by moving some of the code into a sub-function.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Jon Hunter <jonathanh@nvidia.com>
Tested-by: Jon Hunter <jonathanh@nvidia.com>
Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 60 +++++++++++++++++++++++++--------------------
 1 file changed, 33 insertions(+), 27 deletions(-)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 908c447..b1fcbf9 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -2221,38 +2221,15 @@ static void genpd_dev_pm_sync(struct device *dev)
 	genpd_queue_power_off_work(pd);
 }
 
-/**
- * genpd_dev_pm_attach - Attach a device to its PM domain using DT.
- * @dev: Device to attach.
- *
- * Parse device's OF node to find a PM domain specifier. If such is found,
- * attaches the device to retrieved pm_domain ops.
- *
- * Returns 1 on successfully attached PM domain, 0 when the device don't need a
- * PM domain or when multiple power-domains exists for it, else a negative error
- * code. Note that if a power-domain exists for the device, but it cannot be
- * found or turned on, then return -EPROBE_DEFER to ensure that the device is
- * not probed and to re-try again later.
- */
-int genpd_dev_pm_attach(struct device *dev)
+static int __genpd_dev_pm_attach(struct device *dev, struct device_node *np,
+				 unsigned int index)
 {
 	struct of_phandle_args pd_args;
 	struct generic_pm_domain *pd;
 	int ret;
 
-	if (!dev->of_node)
-		return 0;
-
-	/*
-	 * Devices with multiple PM domains must be attached separately, as we
-	 * can only attach one PM domain per device.
-	 */
-	if (of_count_phandle_with_args(dev->of_node, "power-domains",
-				       "#power-domain-cells") != 1)
-		return 0;
-
-	ret = of_parse_phandle_with_args(dev->of_node, "power-domains",
-					"#power-domain-cells", 0, &pd_args);
+	ret = of_parse_phandle_with_args(np, "power-domains",
+				"#power-domain-cells", index, &pd_args);
 	if (ret < 0)
 		return ret;
 
@@ -2290,6 +2267,35 @@ int genpd_dev_pm_attach(struct device *dev)
 
 	return ret ? -EPROBE_DEFER : 1;
 }
+
+/**
+ * genpd_dev_pm_attach - Attach a device to its PM domain using DT.
+ * @dev: Device to attach.
+ *
+ * Parse device's OF node to find a PM domain specifier. If such is found,
+ * attaches the device to retrieved pm_domain ops.
+ *
+ * Returns 1 on successfully attached PM domain, 0 when the device don't need a
+ * PM domain or when multiple power-domains exists for it, else a negative error
+ * code. Note that if a power-domain exists for the device, but it cannot be
+ * found or turned on, then return -EPROBE_DEFER to ensure that the device is
+ * not probed and to re-try again later.
+ */
+int genpd_dev_pm_attach(struct device *dev)
+{
+	if (!dev->of_node)
+		return 0;
+
+	/*
+	 * Devices with multiple PM domains must be attached separately, as we
+	 * can only attach one PM domain per device.
+	 */
+	if (of_count_phandle_with_args(dev->of_node, "power-domains",
+				       "#power-domain-cells") != 1)
+		return 0;
+
+	return __genpd_dev_pm_attach(dev, dev->of_node, 0);
+}
 EXPORT_SYMBOL_GPL(genpd_dev_pm_attach);
 
 static const struct of_device_id idle_state_match[] = {
-- 
cgit v1.1


From 3c095f32a92be4d07f3172a777dab1aacdb6a728 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 31 May 2018 12:59:58 +0200
Subject: PM / Domains: Add support for multi PM domains per device to genpd

To support devices being partitioned across multiple PM domains, let's
begin with extending genpd to cope with these kind of configurations.

Therefore, add a new exported function genpd_dev_pm_attach_by_id(), which
is similar to the existing genpd_dev_pm_attach(), but with the difference
that it allows its callers to provide an index to the PM domain that it
wants to attach.

Note that, genpd_dev_pm_attach_by_id() shall only be called by the driver
core / PM core, similar to how the existing dev_pm_domain_attach() makes
use of genpd_dev_pm_attach(). However, this is implemented by following
changes on top.

Because, only one PM domain can be attached per device, genpd needs to
create a virtual device that it can attach/detach instead. More precisely,
let the new function genpd_dev_pm_attach_by_id() register a virtual struct
device via calling device_register(). Then let it attach this device to the
corresponding PM domain, rather than the one that is provided by the
caller. The actual attaching is done via re-using the existing genpd OF
functions.

At successful attachment, genpd_dev_pm_attach_by_id() returns the created
virtual device, which allows the caller to operate on it to deal with power
management. Following changes on top, provides more details in this
regards.

To deal with detaching of a PM domain for the multiple PM domains case,
let's also extend the existing genpd_dev_pm_detach() function, to cover the
cleanup of the created virtual device, via make it call device_unregister()
on it. In this way, there is no need to introduce a new function to deal
with detach for the multiple PM domain case, but instead the existing one
is re-used.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Jon Hunter <jonathanh@nvidia.com>
Tested-by: Jon Hunter <jonathanh@nvidia.com>
Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 80 +++++++++++++++++++++++++++++++++++++++++++++
 include/linux/pm_domain.h   |  8 +++++
 2 files changed, 88 insertions(+)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index b1fcbf9..4925af5 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -2171,6 +2171,15 @@ struct generic_pm_domain *of_genpd_remove_last(struct device_node *np)
 }
 EXPORT_SYMBOL_GPL(of_genpd_remove_last);
 
+static void genpd_release_dev(struct device *dev)
+{
+	kfree(dev);
+}
+
+static struct bus_type genpd_bus_type = {
+	.name		= "genpd",
+};
+
 /**
  * genpd_dev_pm_detach - Detach a device from its PM domain.
  * @dev: Device to detach.
@@ -2208,6 +2217,10 @@ static void genpd_dev_pm_detach(struct device *dev, bool power_off)
 
 	/* Check if PM domain can be powered off after removing this device. */
 	genpd_queue_power_off_work(pd);
+
+	/* Unregister the device if it was created by genpd. */
+	if (dev->bus == &genpd_bus_type)
+		device_unregister(dev);
 }
 
 static void genpd_dev_pm_sync(struct device *dev)
@@ -2298,6 +2311,67 @@ int genpd_dev_pm_attach(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(genpd_dev_pm_attach);
 
+/**
+ * genpd_dev_pm_attach_by_id - Associate a device with one of its PM domains.
+ * @dev: The device used to lookup the PM domain.
+ * @index: The index of the PM domain.
+ *
+ * Parse device's OF node to find a PM domain specifier at the provided @index.
+ * If such is found, creates a virtual device and attaches it to the retrieved
+ * pm_domain ops. To deal with detaching of the virtual device, the ->detach()
+ * callback in the struct dev_pm_domain are assigned to genpd_dev_pm_detach().
+ *
+ * Returns the created virtual device if successfully attached PM domain, NULL
+ * when the device don't need a PM domain, else an ERR_PTR() in case of
+ * failures. If a power-domain exists for the device, but cannot be found or
+ * turned on, then ERR_PTR(-EPROBE_DEFER) is returned to ensure that the device
+ * is not probed and to re-try again later.
+ */
+struct device *genpd_dev_pm_attach_by_id(struct device *dev,
+					 unsigned int index)
+{
+	struct device *genpd_dev;
+	int num_domains;
+	int ret;
+
+	if (!dev->of_node)
+		return NULL;
+
+	/* Deal only with devices using multiple PM domains. */
+	num_domains = of_count_phandle_with_args(dev->of_node, "power-domains",
+						 "#power-domain-cells");
+	if (num_domains < 2 || index >= num_domains)
+		return NULL;
+
+	/* Allocate and register device on the genpd bus. */
+	genpd_dev = kzalloc(sizeof(*genpd_dev), GFP_KERNEL);
+	if (!genpd_dev)
+		return ERR_PTR(-ENOMEM);
+
+	dev_set_name(genpd_dev, "genpd:%u:%s", index, dev_name(dev));
+	genpd_dev->bus = &genpd_bus_type;
+	genpd_dev->release = genpd_release_dev;
+
+	ret = device_register(genpd_dev);
+	if (ret) {
+		kfree(genpd_dev);
+		return ERR_PTR(ret);
+	}
+
+	/* Try to attach the device to the PM domain at the specified index. */
+	ret = __genpd_dev_pm_attach(genpd_dev, dev->of_node, index);
+	if (ret < 1) {
+		device_unregister(genpd_dev);
+		return ret ? ERR_PTR(ret) : NULL;
+	}
+
+	pm_runtime_set_active(genpd_dev);
+	pm_runtime_enable(genpd_dev);
+
+	return genpd_dev;
+}
+EXPORT_SYMBOL_GPL(genpd_dev_pm_attach_by_id);
+
 static const struct of_device_id idle_state_match[] = {
 	{ .compatible = "domain-idle-state", },
 	{ }
@@ -2457,6 +2531,12 @@ unlock:
 }
 EXPORT_SYMBOL_GPL(of_genpd_opp_to_performance_state);
 
+static int __init genpd_bus_init(void)
+{
+	return bus_register(&genpd_bus_type);
+}
+core_initcall(genpd_bus_init);
+
 #endif /* CONFIG_PM_GENERIC_DOMAINS_OF */
 
 
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 42e0d64..82458e8 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -237,6 +237,8 @@ unsigned int of_genpd_opp_to_performance_state(struct device *dev,
 				struct device_node *opp_node);
 
 int genpd_dev_pm_attach(struct device *dev);
+struct device *genpd_dev_pm_attach_by_id(struct device *dev,
+					 unsigned int index);
 #else /* !CONFIG_PM_GENERIC_DOMAINS_OF */
 static inline int of_genpd_add_provider_simple(struct device_node *np,
 					struct generic_pm_domain *genpd)
@@ -282,6 +284,12 @@ static inline int genpd_dev_pm_attach(struct device *dev)
 	return 0;
 }
 
+static inline struct device *genpd_dev_pm_attach_by_id(struct device *dev,
+						       unsigned int index)
+{
+	return NULL;
+}
+
 static inline
 struct generic_pm_domain *of_genpd_remove_last(struct device_node *np)
 {
-- 
cgit v1.1


From 82e12d9e0bd59f3d24be9c735258e2e98e4f54f6 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Thu, 31 May 2018 12:59:59 +0200
Subject: PM / Domains: Add dev_pm_domain_attach_by_id() to manage multi PM
 domains

The existing dev_pm_domain_attach() function, allows a single PM domain to
be attached per device. To be able to support devices that are partitioned
across multiple PM domains, let's introduce a new interface,
dev_pm_domain_attach_by_id().

The dev_pm_domain_attach_by_id() returns a new allocated struct device with
the corresponding attached PM domain. This enables for example a driver to
operate on the new device from a power management point of view. The driver
may then also benefit from using the received device, to set up so called
device-links towards its original device. Depending on the situation, these
links may then be dynamically changed.

The new interface is typically called by drivers during their probe phase,
in case they manages devices which uses multiple PM domains. If that is the
case, the driver also becomes responsible of managing the detaching of the
PM domains, which typically should be done at the remove phase. Detaching
is done by calling the existing dev_pm_domain_detach() function and for
each of the received devices from dev_pm_domain_attach_by_id().

Note, currently its only genpd that supports multiple PM domains per
device, but dev_pm_domain_attach_by_id() can easily by extended to cover
other PM domain types, if/when needed.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Acked-by: Jon Hunter <jonathanh@nvidia.com>
Tested-by: Jon Hunter <jonathanh@nvidia.com>
Reviewed-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/common.c | 43 ++++++++++++++++++++++++++++++++++++++++---
 include/linux/pm_domain.h   |  7 +++++++
 2 files changed, 47 insertions(+), 3 deletions(-)

diff --git a/drivers/base/power/common.c b/drivers/base/power/common.c
index 7ae62b6..df41b47 100644
--- a/drivers/base/power/common.c
+++ b/drivers/base/power/common.c
@@ -117,13 +117,50 @@ int dev_pm_domain_attach(struct device *dev, bool power_on)
 EXPORT_SYMBOL_GPL(dev_pm_domain_attach);
 
 /**
+ * dev_pm_domain_attach_by_id - Associate a device with one of its PM domains.
+ * @dev: The device used to lookup the PM domain.
+ * @index: The index of the PM domain.
+ *
+ * As @dev may only be attached to a single PM domain, the backend PM domain
+ * provider creates a virtual device to attach instead. If attachment succeeds,
+ * the ->detach() callback in the struct dev_pm_domain are assigned by the
+ * corresponding backend attach function, as to deal with detaching of the
+ * created virtual device.
+ *
+ * This function should typically be invoked by a driver during the probe phase,
+ * in case its device requires power management through multiple PM domains. The
+ * driver may benefit from using the received device, to configure device-links
+ * towards its original device. Depending on the use-case and if needed, the
+ * links may be dynamically changed by the driver, which allows it to control
+ * the power to the PM domains independently from each other.
+ *
+ * Callers must ensure proper synchronization of this function with power
+ * management callbacks.
+ *
+ * Returns the virtual created device when successfully attached to its PM
+ * domain, NULL in case @dev don't need a PM domain, else an ERR_PTR().
+ * Note that, to detach the returned virtual device, the driver shall call
+ * dev_pm_domain_detach() on it, typically during the remove phase.
+ */
+struct device *dev_pm_domain_attach_by_id(struct device *dev,
+					  unsigned int index)
+{
+	if (dev->pm_domain)
+		return ERR_PTR(-EEXIST);
+
+	return genpd_dev_pm_attach_by_id(dev, index);
+}
+EXPORT_SYMBOL_GPL(dev_pm_domain_attach_by_id);
+
+/**
  * dev_pm_domain_detach - Detach a device from its PM domain.
  * @dev: Device to detach.
  * @power_off: Used to indicate whether we should power off the device.
  *
- * This functions will reverse the actions from dev_pm_domain_attach() and thus
- * try to detach the @dev from its PM domain. Typically it should be invoked
- * from subsystem level code during the remove phase.
+ * This functions will reverse the actions from dev_pm_domain_attach() and
+ * dev_pm_domain_attach_by_id(), thus it detaches @dev from its PM domain.
+ * Typically it should be invoked during the remove phase, either from
+ * subsystem level code or from drivers.
  *
  * Callers must ensure proper synchronization of this function with power
  * management callbacks.
diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 82458e8..9206a4f 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -299,6 +299,8 @@ struct generic_pm_domain *of_genpd_remove_last(struct device_node *np)
 
 #ifdef CONFIG_PM
 int dev_pm_domain_attach(struct device *dev, bool power_on);
+struct device *dev_pm_domain_attach_by_id(struct device *dev,
+					  unsigned int index);
 void dev_pm_domain_detach(struct device *dev, bool power_off);
 void dev_pm_domain_set(struct device *dev, struct dev_pm_domain *pd);
 #else
@@ -306,6 +308,11 @@ static inline int dev_pm_domain_attach(struct device *dev, bool power_on)
 {
 	return 0;
 }
+static inline struct device *dev_pm_domain_attach_by_id(struct device *dev,
+							unsigned int index)
+{
+	return NULL;
+}
 static inline void dev_pm_domain_detach(struct device *dev, bool power_off) {}
 static inline void dev_pm_domain_set(struct device *dev,
 				     struct dev_pm_domain *pd) {}
-- 
cgit v1.1


From 2d5ed61ce9820a1fe7b076cc45c169524d767746 Mon Sep 17 00:00:00 2001
From: Ravi Chandra Sadineni <ravisadineni@chromium.org>
Date: Fri, 1 Jun 2018 19:32:15 -0700
Subject: PM / wakeup: Export wakeup_count instead of event_count via sysfs

Currently we export event_count instead of wakeup_count via the
per-device wakeup_count sysfs attribute. Change it to wakeup_count
to make it more meaningful.

wakeup_count increments only when events_check_enabled is set,
that is whenever writes the current wakeup count to
/sys/power/wakeup_count.  Also events_check_enabled is cleared on
every resume. User space is expected to write to this just before
suspend.  This way pm_wakeup_event(), when called from IRQs handles,
will increment wakeup_count only if we are in system-wide
suspend-resume cycle and should give a fair approximation of how many
times a device may have triggered a wakeup from system suspend.

event_count on the other hand will increment every time
pm_wakeup_event() is called irrespective of whether we are in a
suspend-resume cycle and some drivers call it on every interrupt
which makes it less useful for system wakeup tracking.

Signed-off-by: Ravi Chandra Sadineni <ravisadineni@chromium.org>
Acked-by: Pavel Machek <pavel@ucw.cz>
[ rjw: Subject & changelog ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/sysfs.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/base/power/sysfs.c b/drivers/base/power/sysfs.c
index 0f651ef..d713738 100644
--- a/drivers/base/power/sysfs.c
+++ b/drivers/base/power/sysfs.c
@@ -353,7 +353,7 @@ static ssize_t wakeup_count_show(struct device *dev,
 
 	spin_lock_irq(&dev->power.lock);
 	if (dev->power.wakeup) {
-		count = dev->power.wakeup->event_count;
+		count = dev->power.wakeup->wakeup_count;
 		enabled = true;
 	}
 	spin_unlock_irq(&dev->power.lock);
-- 
cgit v1.1


From 41ab43c9c89e06ff08a4750d1b09e227ea97894f Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Tue, 5 Jun 2018 14:42:42 -0700
Subject: cpufreq: intel_pstate: enable boost for Skylake Xeon

Enable HWP boost on Skylake server and workstations.

Reported-by: Mel Gorman <mgorman@techsingularity.net>
Tested-by: Giovanni Gherdovich <ggherdovich@suse.cz>
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index 70bf63b..352d5b2 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -1794,6 +1794,12 @@ static const struct x86_cpu_id intel_pstate_cpu_ee_disable_ids[] = {
 	{}
 };
 
+static const struct x86_cpu_id intel_pstate_hwp_boost_ids[] = {
+	ICPU(INTEL_FAM6_SKYLAKE_X, core_funcs),
+	ICPU(INTEL_FAM6_SKYLAKE_DESKTOP, core_funcs),
+	{}
+};
+
 static int intel_pstate_init_cpu(unsigned int cpunum)
 {
 	struct cpudata *cpu;
@@ -1824,6 +1830,10 @@ static int intel_pstate_init_cpu(unsigned int cpunum)
 			intel_pstate_disable_ee(cpunum);
 
 		intel_pstate_hwp_enable(cpu);
+
+		id = x86_match_cpu(intel_pstate_hwp_boost_ids);
+		if (id)
+			hwp_boost = true;
 	}
 
 	intel_pstate_get_cpu_pstates(cpu);
-- 
cgit v1.1


From 7592019634f8473f0b0973ce79297183077bdbc2 Mon Sep 17 00:00:00 2001
From: Chen Yu <yu.c.chen@intel.com>
Date: Fri, 8 Jun 2018 09:07:33 +0800
Subject: cpufreq: governors: Fix long idle detection logic in load calculation

According to current code implementation, detecting the long
idle period is done by checking if the interval between two
adjacent utilization update handlers is long enough. Although
this mechanism can detect if the idle period is long enough
(no utilization hooks invoked during idle period), it might
not cover a corner case: if the task has occupied the CPU
for too long which causes no context switches during that
period, then no utilization handler will be launched until this
high prio task is scheduled out. As a result, the idle_periods
field might be calculated incorrectly because it regards the
100% load as 0% and makes the conservative governor who uses
this field confusing.

Change the detection to compare the idle_time with sampling_rate
directly.

Reported-by: Artem S. Tashkinov <t.artem@mailcity.com>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Cc: All applicable <stable@vger.kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq_governor.c | 12 +++++-------
 1 file changed, 5 insertions(+), 7 deletions(-)

diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c
index 871bf9c..1d50e97 100644
--- a/drivers/cpufreq/cpufreq_governor.c
+++ b/drivers/cpufreq/cpufreq_governor.c
@@ -165,7 +165,7 @@ unsigned int dbs_update(struct cpufreq_policy *policy)
 			 * calls, so the previous load value can be used then.
 			 */
 			load = j_cdbs->prev_load;
-		} else if (unlikely(time_elapsed > 2 * sampling_rate &&
+		} else if (unlikely((int)idle_time > 2 * sampling_rate &&
 				    j_cdbs->prev_load)) {
 			/*
 			 * If the CPU had gone completely idle and a task has
@@ -185,10 +185,8 @@ unsigned int dbs_update(struct cpufreq_policy *policy)
 			 * clear prev_load to guarantee that the load will be
 			 * computed again next time.
 			 *
-			 * Detecting this situation is easy: the governor's
-			 * utilization update handler would not have run during
-			 * CPU-idle periods.  Hence, an unusually large
-			 * 'time_elapsed' (as compared to the sampling rate)
+			 * Detecting this situation is easy: an unusually large
+			 * 'idle_time' (as compared to the sampling rate)
 			 * indicates this scenario.
 			 */
 			load = j_cdbs->prev_load;
@@ -217,8 +215,8 @@ unsigned int dbs_update(struct cpufreq_policy *policy)
 			j_cdbs->prev_load = load;
 		}
 
-		if (time_elapsed > 2 * sampling_rate) {
-			unsigned int periods = time_elapsed / sampling_rate;
+		if (unlikely((int)idle_time > 2 * sampling_rate)) {
+			unsigned int periods = idle_time / sampling_rate;
 
 			if (periods < idle_periods)
 				idle_periods = periods;
-- 
cgit v1.1


From 0aa9abd4c212fc1cd111cc0a9fc571f0d86e63cf Mon Sep 17 00:00:00 2001
From: =?UTF-8?q?S=C3=A9bastien=20Szymanski?=
 <sebastien.szymanski@armadeus.com>
Date: Tue, 22 May 2018 08:28:51 +0200
Subject: cpufreq: imx6q: check speed grades for i.MX6ULL
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Check the max speed supported from the fuses for i.MX6ULL and update the
operating points table accordingly.

Signed-off-by: Sébastien Szymanski <sebastien.szymanski@armadeus.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Tested-by: Stefan Agner <stefan@agner.ch>
Reviewed-by: Stefan Agner <stefan@agner.ch>
Reviewed-by: Fabio Estevam <fabio.estevam@nxp.com>
Acked-by: Shawn Guo <shawnguo@kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/imx6q-cpufreq.c | 29 +++++++++++++++++++++++------
 1 file changed, 23 insertions(+), 6 deletions(-)

diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c
index 83cf631..f094687 100644
--- a/drivers/cpufreq/imx6q-cpufreq.c
+++ b/drivers/cpufreq/imx6q-cpufreq.c
@@ -266,6 +266,8 @@ put_node:
 }
 
 #define OCOTP_CFG3_6UL_SPEED_696MHZ	0x2
+#define OCOTP_CFG3_6ULL_SPEED_792MHZ	0x2
+#define OCOTP_CFG3_6ULL_SPEED_900MHZ	0x3
 
 static void imx6ul_opp_check_speed_grading(struct device *dev)
 {
@@ -287,16 +289,30 @@ static void imx6ul_opp_check_speed_grading(struct device *dev)
 	 * Speed GRADING[1:0] defines the max speed of ARM:
 	 * 2b'00: Reserved;
 	 * 2b'01: 528000000Hz;
-	 * 2b'10: 696000000Hz;
-	 * 2b'11: Reserved;
+	 * 2b'10: 696000000Hz on i.MX6UL, 792000000Hz on i.MX6ULL;
+	 * 2b'11: 900000000Hz on i.MX6ULL only;
 	 * We need to set the max speed of ARM according to fuse map.
 	 */
 	val = readl_relaxed(base + OCOTP_CFG3);
 	val >>= OCOTP_CFG3_SPEED_SHIFT;
 	val &= 0x3;
-	if (val != OCOTP_CFG3_6UL_SPEED_696MHZ)
-		if (dev_pm_opp_disable(dev, 696000000))
-			dev_warn(dev, "failed to disable 696MHz OPP\n");
+
+	if (of_machine_is_compatible("fsl,imx6ul")) {
+		if (val != OCOTP_CFG3_6UL_SPEED_696MHZ)
+			if (dev_pm_opp_disable(dev, 696000000))
+				dev_warn(dev, "failed to disable 696MHz OPP\n");
+	}
+
+	if (of_machine_is_compatible("fsl,imx6ull")) {
+		if (val != OCOTP_CFG3_6ULL_SPEED_792MHZ)
+			if (dev_pm_opp_disable(dev, 792000000))
+				dev_warn(dev, "failed to disable 792MHz OPP\n");
+
+		if (val != OCOTP_CFG3_6ULL_SPEED_900MHZ)
+			if (dev_pm_opp_disable(dev, 900000000))
+				dev_warn(dev, "failed to disable 900MHz OPP\n");
+	}
+
 	iounmap(base);
 put_node:
 	of_node_put(np);
@@ -356,7 +372,8 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev)
 		goto put_reg;
 	}
 
-	if (of_machine_is_compatible("fsl,imx6ul"))
+	if (of_machine_is_compatible("fsl,imx6ul") ||
+	    of_machine_is_compatible("fsl,imx6ull"))
 		imx6ul_opp_check_speed_grading(cpu_dev);
 	else
 		imx6q_opp_check_speed_grading(cpu_dev);
-- 
cgit v1.1


From b06c0b2f087ab498d51d50f5ae353133b602f614 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 12 Jun 2018 10:24:13 +0200
Subject: Revert "PM / runtime: Fixup reference counting of device link
 suppliers at probe"

Revert commit 1e8378619841 (PM / runtime: Fixup reference counting of
device link suppliers at probe), as it has introduced a regression
and the condition it was designed to address should be covered by the
existing code.

Reported-by: Marek Szyprowski <m.szyprowski@samsung.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/dd.c            |  3 ++-
 drivers/base/power/runtime.c | 27 ++++++++++++++++++++++++---
 include/linux/pm_runtime.h   |  6 ++++--
 3 files changed, 30 insertions(+), 6 deletions(-)

diff --git a/drivers/base/dd.c b/drivers/base/dd.c
index a41c91b..10454fe 100644
--- a/drivers/base/dd.c
+++ b/drivers/base/dd.c
@@ -580,7 +580,7 @@ int driver_probe_device(struct device_driver *drv, struct device *dev)
 	pr_debug("bus: '%s': %s: matched device %s with driver %s\n",
 		 drv->bus->name, __func__, dev_name(dev), drv->name);
 
-	pm_runtime_resume_suppliers(dev);
+	pm_runtime_get_suppliers(dev);
 	if (dev->parent)
 		pm_runtime_get_sync(dev->parent);
 
@@ -591,6 +591,7 @@ int driver_probe_device(struct device_driver *drv, struct device *dev)
 	if (dev->parent)
 		pm_runtime_put(dev->parent);
 
+	pm_runtime_put_suppliers(dev);
 	return ret;
 }
 
diff --git a/drivers/base/power/runtime.c b/drivers/base/power/runtime.c
index c6030f1..beb85c3 100644
--- a/drivers/base/power/runtime.c
+++ b/drivers/base/power/runtime.c
@@ -1563,16 +1563,37 @@ void pm_runtime_clean_up_links(struct device *dev)
 }
 
 /**
- * pm_runtime_resume_suppliers - Resume supplier devices.
+ * pm_runtime_get_suppliers - Resume and reference-count supplier devices.
  * @dev: Consumer device.
  */
-void pm_runtime_resume_suppliers(struct device *dev)
+void pm_runtime_get_suppliers(struct device *dev)
 {
+	struct device_link *link;
 	int idx;
 
 	idx = device_links_read_lock();
 
-	rpm_get_suppliers(dev);
+	list_for_each_entry_rcu(link, &dev->links.suppliers, c_node)
+		if (link->flags & DL_FLAG_PM_RUNTIME)
+			pm_runtime_get_sync(link->supplier);
+
+	device_links_read_unlock(idx);
+}
+
+/**
+ * pm_runtime_put_suppliers - Drop references to supplier devices.
+ * @dev: Consumer device.
+ */
+void pm_runtime_put_suppliers(struct device *dev)
+{
+	struct device_link *link;
+	int idx;
+
+	idx = device_links_read_lock();
+
+	list_for_each_entry_rcu(link, &dev->links.suppliers, c_node)
+		if (link->flags & DL_FLAG_PM_RUNTIME)
+			pm_runtime_put(link->supplier);
 
 	device_links_read_unlock(idx);
 }
diff --git a/include/linux/pm_runtime.h b/include/linux/pm_runtime.h
index db5dbbf..f0fc470 100644
--- a/include/linux/pm_runtime.h
+++ b/include/linux/pm_runtime.h
@@ -56,7 +56,8 @@ extern void pm_runtime_update_max_time_suspended(struct device *dev,
 						 s64 delta_ns);
 extern void pm_runtime_set_memalloc_noio(struct device *dev, bool enable);
 extern void pm_runtime_clean_up_links(struct device *dev);
-extern void pm_runtime_resume_suppliers(struct device *dev);
+extern void pm_runtime_get_suppliers(struct device *dev);
+extern void pm_runtime_put_suppliers(struct device *dev);
 extern void pm_runtime_new_link(struct device *dev);
 extern void pm_runtime_drop_link(struct device *dev);
 
@@ -172,7 +173,8 @@ static inline unsigned long pm_runtime_autosuspend_expiration(
 static inline void pm_runtime_set_memalloc_noio(struct device *dev,
 						bool enable){}
 static inline void pm_runtime_clean_up_links(struct device *dev) {}
-static inline void pm_runtime_resume_suppliers(struct device *dev) {}
+static inline void pm_runtime_get_suppliers(struct device *dev) {}
+static inline void pm_runtime_put_suppliers(struct device *dev) {}
 static inline void pm_runtime_new_link(struct device *dev) {}
 static inline void pm_runtime_drop_link(struct device *dev) {}
 
-- 
cgit v1.1