From 8a7e2d2ea080d10a189a1d611344b0330468ebc3 Mon Sep 17 00:00:00 2001
From: Anders Roxell <anders.roxell@linaro.org>
Date: Tue, 28 Aug 2018 11:31:18 +0200
Subject: cpupower: remove stringop-truncation waring
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The strncpy doesn't null terminate the string because the size is too
short by one byte.

parse.c: In function ‘prepare_default_config’:
parse.c:148:2: warning: ‘strncpy’ output truncated before terminating
    nul copying 8 bytes from a string of the same length
    [-Wstringop-truncation]
  strncpy(config->governor, "ondemand", 8);
  ^~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~

The normal method of passing the length of the destination buffer works
correctly here.

Fixes: 7fe2f6399a84 ("cpupowerutils - cpufrequtils extended with quite some features")
Signed-off-by: Anders Roxell <anders.roxell@linaro.org>
Signed-off-by: Shuah Khan (Samsung OSG) <shuah@kernel.org>
---
 tools/power/cpupower/bench/parse.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/power/cpupower/bench/parse.c b/tools/power/cpupower/bench/parse.c
index 9ba8a44..84caee3 100644
--- a/tools/power/cpupower/bench/parse.c
+++ b/tools/power/cpupower/bench/parse.c
@@ -145,7 +145,7 @@ struct config *prepare_default_config()
 	config->cpu = 0;
 	config->prio = SCHED_HIGH;
 	config->verbose = 0;
-	strncpy(config->governor, "ondemand", 8);
+	strncpy(config->governor, "ondemand", sizeof(config->governor));
 
 	config->output = stdout;
 
-- 
cgit v1.1


From a4a008e53c9e5ac8d6070bd1b2eddfc1ae0d2338 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 31 Aug 2018 11:22:29 +0300
Subject: intel_idle: Get rid of custom ICPU() macro

Replace custom grown macro with generic INTEL_CPU_FAM6() one.

No functional change intended.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Acked-by: Jacob Pan <jacob.jun.pan@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/idle/intel_idle.c | 75 +++++++++++++++++++++++------------------------
 1 file changed, 36 insertions(+), 39 deletions(-)

diff --git a/drivers/idle/intel_idle.c b/drivers/idle/intel_idle.c
index b2ccce5..791b8a3 100644
--- a/drivers/idle/intel_idle.c
+++ b/drivers/idle/intel_idle.c
@@ -1066,46 +1066,43 @@ static const struct idle_cpu idle_cpu_dnv = {
 	.disable_promotion_to_c1e = true,
 };
 
-#define ICPU(model, cpu) \
-	{ X86_VENDOR_INTEL, 6, model, X86_FEATURE_ANY, (unsigned long)&cpu }
-
 static const struct x86_cpu_id intel_idle_ids[] __initconst = {
-	ICPU(INTEL_FAM6_NEHALEM_EP,		idle_cpu_nehalem),
-	ICPU(INTEL_FAM6_NEHALEM,		idle_cpu_nehalem),
-	ICPU(INTEL_FAM6_NEHALEM_G,		idle_cpu_nehalem),
-	ICPU(INTEL_FAM6_WESTMERE,		idle_cpu_nehalem),
-	ICPU(INTEL_FAM6_WESTMERE_EP,		idle_cpu_nehalem),
-	ICPU(INTEL_FAM6_NEHALEM_EX,		idle_cpu_nehalem),
-	ICPU(INTEL_FAM6_ATOM_PINEVIEW,		idle_cpu_atom),
-	ICPU(INTEL_FAM6_ATOM_LINCROFT,		idle_cpu_lincroft),
-	ICPU(INTEL_FAM6_WESTMERE_EX,		idle_cpu_nehalem),
-	ICPU(INTEL_FAM6_SANDYBRIDGE,		idle_cpu_snb),
-	ICPU(INTEL_FAM6_SANDYBRIDGE_X,		idle_cpu_snb),
-	ICPU(INTEL_FAM6_ATOM_CEDARVIEW,		idle_cpu_atom),
-	ICPU(INTEL_FAM6_ATOM_SILVERMONT1,	idle_cpu_byt),
-	ICPU(INTEL_FAM6_ATOM_MERRIFIELD,	idle_cpu_tangier),
-	ICPU(INTEL_FAM6_ATOM_AIRMONT,		idle_cpu_cht),
-	ICPU(INTEL_FAM6_IVYBRIDGE,		idle_cpu_ivb),
-	ICPU(INTEL_FAM6_IVYBRIDGE_X,		idle_cpu_ivt),
-	ICPU(INTEL_FAM6_HASWELL_CORE,		idle_cpu_hsw),
-	ICPU(INTEL_FAM6_HASWELL_X,		idle_cpu_hsw),
-	ICPU(INTEL_FAM6_HASWELL_ULT,		idle_cpu_hsw),
-	ICPU(INTEL_FAM6_HASWELL_GT3E,		idle_cpu_hsw),
-	ICPU(INTEL_FAM6_ATOM_SILVERMONT2,	idle_cpu_avn),
-	ICPU(INTEL_FAM6_BROADWELL_CORE,		idle_cpu_bdw),
-	ICPU(INTEL_FAM6_BROADWELL_GT3E,		idle_cpu_bdw),
-	ICPU(INTEL_FAM6_BROADWELL_X,		idle_cpu_bdw),
-	ICPU(INTEL_FAM6_BROADWELL_XEON_D,	idle_cpu_bdw),
-	ICPU(INTEL_FAM6_SKYLAKE_MOBILE,		idle_cpu_skl),
-	ICPU(INTEL_FAM6_SKYLAKE_DESKTOP,	idle_cpu_skl),
-	ICPU(INTEL_FAM6_KABYLAKE_MOBILE,	idle_cpu_skl),
-	ICPU(INTEL_FAM6_KABYLAKE_DESKTOP,	idle_cpu_skl),
-	ICPU(INTEL_FAM6_SKYLAKE_X,		idle_cpu_skx),
-	ICPU(INTEL_FAM6_XEON_PHI_KNL,		idle_cpu_knl),
-	ICPU(INTEL_FAM6_XEON_PHI_KNM,		idle_cpu_knl),
-	ICPU(INTEL_FAM6_ATOM_GOLDMONT,		idle_cpu_bxt),
-	ICPU(INTEL_FAM6_ATOM_GEMINI_LAKE,	idle_cpu_bxt),
-	ICPU(INTEL_FAM6_ATOM_DENVERTON,		idle_cpu_dnv),
+	INTEL_CPU_FAM6(NEHALEM_EP,		idle_cpu_nehalem),
+	INTEL_CPU_FAM6(NEHALEM,			idle_cpu_nehalem),
+	INTEL_CPU_FAM6(NEHALEM_G,		idle_cpu_nehalem),
+	INTEL_CPU_FAM6(WESTMERE,		idle_cpu_nehalem),
+	INTEL_CPU_FAM6(WESTMERE_EP,		idle_cpu_nehalem),
+	INTEL_CPU_FAM6(NEHALEM_EX,		idle_cpu_nehalem),
+	INTEL_CPU_FAM6(ATOM_PINEVIEW,		idle_cpu_atom),
+	INTEL_CPU_FAM6(ATOM_LINCROFT,		idle_cpu_lincroft),
+	INTEL_CPU_FAM6(WESTMERE_EX,		idle_cpu_nehalem),
+	INTEL_CPU_FAM6(SANDYBRIDGE,		idle_cpu_snb),
+	INTEL_CPU_FAM6(SANDYBRIDGE_X,		idle_cpu_snb),
+	INTEL_CPU_FAM6(ATOM_CEDARVIEW,		idle_cpu_atom),
+	INTEL_CPU_FAM6(ATOM_SILVERMONT1,	idle_cpu_byt),
+	INTEL_CPU_FAM6(ATOM_MERRIFIELD,		idle_cpu_tangier),
+	INTEL_CPU_FAM6(ATOM_AIRMONT,		idle_cpu_cht),
+	INTEL_CPU_FAM6(IVYBRIDGE,		idle_cpu_ivb),
+	INTEL_CPU_FAM6(IVYBRIDGE_X,		idle_cpu_ivt),
+	INTEL_CPU_FAM6(HASWELL_CORE,		idle_cpu_hsw),
+	INTEL_CPU_FAM6(HASWELL_X,		idle_cpu_hsw),
+	INTEL_CPU_FAM6(HASWELL_ULT,		idle_cpu_hsw),
+	INTEL_CPU_FAM6(HASWELL_GT3E,		idle_cpu_hsw),
+	INTEL_CPU_FAM6(ATOM_SILVERMONT2,	idle_cpu_avn),
+	INTEL_CPU_FAM6(BROADWELL_CORE,		idle_cpu_bdw),
+	INTEL_CPU_FAM6(BROADWELL_GT3E,		idle_cpu_bdw),
+	INTEL_CPU_FAM6(BROADWELL_X,		idle_cpu_bdw),
+	INTEL_CPU_FAM6(BROADWELL_XEON_D,	idle_cpu_bdw),
+	INTEL_CPU_FAM6(SKYLAKE_MOBILE,		idle_cpu_skl),
+	INTEL_CPU_FAM6(SKYLAKE_DESKTOP,		idle_cpu_skl),
+	INTEL_CPU_FAM6(KABYLAKE_MOBILE,		idle_cpu_skl),
+	INTEL_CPU_FAM6(KABYLAKE_DESKTOP,	idle_cpu_skl),
+	INTEL_CPU_FAM6(SKYLAKE_X,		idle_cpu_skx),
+	INTEL_CPU_FAM6(XEON_PHI_KNL,		idle_cpu_knl),
+	INTEL_CPU_FAM6(XEON_PHI_KNM,		idle_cpu_knl),
+	INTEL_CPU_FAM6(ATOM_GOLDMONT,		idle_cpu_bxt),
+	INTEL_CPU_FAM6(ATOM_GEMINI_LAKE,	idle_cpu_bxt),
+	INTEL_CPU_FAM6(ATOM_DENVERTON,		idle_cpu_dnv),
 	{}
 };
 
-- 
cgit v1.1


From 17ed15183c2453ba2f03a813fd8b1594bd9baf98 Mon Sep 17 00:00:00 2001
From: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Date: Fri, 31 Aug 2018 11:25:13 +0300
Subject: powercap: RAPL: Get rid of custom RAPL_CPU() macro

Replace custom grown macro with generic INTEL_CPU_FAM6() one.

No functional change intended.

Signed-off-by: Andy Shevchenko <andriy.shevchenko@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/powercap/intel_rapl.c | 73 +++++++++++++++++++------------------------
 1 file changed, 33 insertions(+), 40 deletions(-)

diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c
index 295d8dc..bb92874b 100644
--- a/drivers/powercap/intel_rapl.c
+++ b/drivers/powercap/intel_rapl.c
@@ -1133,47 +1133,40 @@ static const struct rapl_defaults rapl_defaults_cht = {
 	.compute_time_window = rapl_compute_time_window_atom,
 };
 
-#define RAPL_CPU(_model, _ops) {			\
-		.vendor = X86_VENDOR_INTEL,		\
-		.family = 6,				\
-		.model = _model,			\
-		.driver_data = (kernel_ulong_t)&_ops,	\
-		}
-
 static const struct x86_cpu_id rapl_ids[] __initconst = {
-	RAPL_CPU(INTEL_FAM6_SANDYBRIDGE,	rapl_defaults_core),
-	RAPL_CPU(INTEL_FAM6_SANDYBRIDGE_X,	rapl_defaults_core),
-
-	RAPL_CPU(INTEL_FAM6_IVYBRIDGE,		rapl_defaults_core),
-	RAPL_CPU(INTEL_FAM6_IVYBRIDGE_X,	rapl_defaults_core),
-
-	RAPL_CPU(INTEL_FAM6_HASWELL_CORE,	rapl_defaults_core),
-	RAPL_CPU(INTEL_FAM6_HASWELL_ULT,	rapl_defaults_core),
-	RAPL_CPU(INTEL_FAM6_HASWELL_GT3E,	rapl_defaults_core),
-	RAPL_CPU(INTEL_FAM6_HASWELL_X,		rapl_defaults_hsw_server),
-
-	RAPL_CPU(INTEL_FAM6_BROADWELL_CORE,	rapl_defaults_core),
-	RAPL_CPU(INTEL_FAM6_BROADWELL_GT3E,	rapl_defaults_core),
-	RAPL_CPU(INTEL_FAM6_BROADWELL_XEON_D,	rapl_defaults_core),
-	RAPL_CPU(INTEL_FAM6_BROADWELL_X,	rapl_defaults_hsw_server),
-
-	RAPL_CPU(INTEL_FAM6_SKYLAKE_DESKTOP,	rapl_defaults_core),
-	RAPL_CPU(INTEL_FAM6_SKYLAKE_MOBILE,	rapl_defaults_core),
-	RAPL_CPU(INTEL_FAM6_SKYLAKE_X,		rapl_defaults_hsw_server),
-	RAPL_CPU(INTEL_FAM6_KABYLAKE_MOBILE,	rapl_defaults_core),
-	RAPL_CPU(INTEL_FAM6_KABYLAKE_DESKTOP,	rapl_defaults_core),
-	RAPL_CPU(INTEL_FAM6_CANNONLAKE_MOBILE,	rapl_defaults_core),
-
-	RAPL_CPU(INTEL_FAM6_ATOM_SILVERMONT1,	rapl_defaults_byt),
-	RAPL_CPU(INTEL_FAM6_ATOM_AIRMONT,	rapl_defaults_cht),
-	RAPL_CPU(INTEL_FAM6_ATOM_MERRIFIELD,	rapl_defaults_tng),
-	RAPL_CPU(INTEL_FAM6_ATOM_MOOREFIELD,	rapl_defaults_ann),
-	RAPL_CPU(INTEL_FAM6_ATOM_GOLDMONT,	rapl_defaults_core),
-	RAPL_CPU(INTEL_FAM6_ATOM_GEMINI_LAKE,	rapl_defaults_core),
-	RAPL_CPU(INTEL_FAM6_ATOM_DENVERTON,	rapl_defaults_core),
-
-	RAPL_CPU(INTEL_FAM6_XEON_PHI_KNL,	rapl_defaults_hsw_server),
-	RAPL_CPU(INTEL_FAM6_XEON_PHI_KNM,	rapl_defaults_hsw_server),
+	INTEL_CPU_FAM6(SANDYBRIDGE,		rapl_defaults_core),
+	INTEL_CPU_FAM6(SANDYBRIDGE_X,		rapl_defaults_core),
+
+	INTEL_CPU_FAM6(IVYBRIDGE,		rapl_defaults_core),
+	INTEL_CPU_FAM6(IVYBRIDGE_X,		rapl_defaults_core),
+
+	INTEL_CPU_FAM6(HASWELL_CORE,		rapl_defaults_core),
+	INTEL_CPU_FAM6(HASWELL_ULT,		rapl_defaults_core),
+	INTEL_CPU_FAM6(HASWELL_GT3E,		rapl_defaults_core),
+	INTEL_CPU_FAM6(HASWELL_X,		rapl_defaults_hsw_server),
+
+	INTEL_CPU_FAM6(BROADWELL_CORE,		rapl_defaults_core),
+	INTEL_CPU_FAM6(BROADWELL_GT3E,		rapl_defaults_core),
+	INTEL_CPU_FAM6(BROADWELL_XEON_D,	rapl_defaults_core),
+	INTEL_CPU_FAM6(BROADWELL_X,		rapl_defaults_hsw_server),
+
+	INTEL_CPU_FAM6(SKYLAKE_DESKTOP,		rapl_defaults_core),
+	INTEL_CPU_FAM6(SKYLAKE_MOBILE,		rapl_defaults_core),
+	INTEL_CPU_FAM6(SKYLAKE_X,		rapl_defaults_hsw_server),
+	INTEL_CPU_FAM6(KABYLAKE_MOBILE,		rapl_defaults_core),
+	INTEL_CPU_FAM6(KABYLAKE_DESKTOP,	rapl_defaults_core),
+	INTEL_CPU_FAM6(CANNONLAKE_MOBILE,	rapl_defaults_core),
+
+	INTEL_CPU_FAM6(ATOM_SILVERMONT1,	rapl_defaults_byt),
+	INTEL_CPU_FAM6(ATOM_AIRMONT,		rapl_defaults_cht),
+	INTEL_CPU_FAM6(ATOM_MERRIFIELD,		rapl_defaults_tng),
+	INTEL_CPU_FAM6(ATOM_MOOREFIELD,		rapl_defaults_ann),
+	INTEL_CPU_FAM6(ATOM_GOLDMONT,		rapl_defaults_core),
+	INTEL_CPU_FAM6(ATOM_GEMINI_LAKE,	rapl_defaults_core),
+	INTEL_CPU_FAM6(ATOM_DENVERTON,		rapl_defaults_core),
+
+	INTEL_CPU_FAM6(XEON_PHI_KNL,		rapl_defaults_hsw_server),
+	INTEL_CPU_FAM6(XEON_PHI_KNM,		rapl_defaults_hsw_server),
 	{}
 };
 MODULE_DEVICE_TABLE(x86cpu, rapl_ids);
-- 
cgit v1.1


From 0e7ea2f3b0e09333e54fb8322f85a45cb01fdd61 Mon Sep 17 00:00:00 2001
From: Igor Stoppa <igor.stoppa@gmail.com>
Date: Fri, 7 Sep 2018 19:09:55 +0300
Subject: cpufreq: remove unnecessary unlikely()

WARN_ON() already contains an unlikely(), so it's not necessary to wrap it
into another.

Signed-off-by: Igor Stoppa <igor.stoppa@huawei.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c
index f53fb41..7aa3dca 100644
--- a/drivers/cpufreq/cpufreq.c
+++ b/drivers/cpufreq/cpufreq.c
@@ -403,7 +403,7 @@ EXPORT_SYMBOL_GPL(cpufreq_freq_transition_begin);
 void cpufreq_freq_transition_end(struct cpufreq_policy *policy,
 		struct cpufreq_freqs *freqs, int transition_failed)
 {
-	if (unlikely(WARN_ON(!policy->transition_ongoing)))
+	if (WARN_ON(!policy->transition_ongoing))
 		return;
 
 	cpufreq_notify_post_transition(policy, freqs, transition_failed);
-- 
cgit v1.1


From 3bb756449b2ddca8e059d011af2e732c857c61d7 Mon Sep 17 00:00:00 2001
From: "Vladimir D. Seleznev" <vseleznv@altlinux.org>
Date: Sat, 1 Sep 2018 07:51:15 +0300
Subject: PM / hibernate: Documentation: fix image_size default value

This commit updates the default value of /sys/power/image_size in
the documentation.

Since ac5c24ec1e983313ef0015258fba6f630e54e7cn the `image_size' value is
set to about 2/5 of RAM, according to kernel/power/snapshot.c:

	image_size = ((totalram_pages * 2) / 5) * PAGE_SIZE;

but this change was not reflected everywhere in the documentation.

Signed-off-by: Vladimir D. Seleznev <vseleznv@altlinux.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/ABI/testing/sysfs-power | 2 +-
 Documentation/power/swsusp.txt        | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/Documentation/ABI/testing/sysfs-power b/Documentation/ABI/testing/sysfs-power
index 2f813d6..18b7dc9 100644
--- a/Documentation/ABI/testing/sysfs-power
+++ b/Documentation/ABI/testing/sysfs-power
@@ -99,7 +99,7 @@ Description:
 		this file, the suspend image will be as small as possible.
 
 		Reading from this file will display the current image size
-		limit, which is set to 500 MB by default.
+		limit, which is set to around 2/5 of available RAM by default.
 
 What:		/sys/power/pm_trace
 Date:		August 2006
diff --git a/Documentation/power/swsusp.txt b/Documentation/power/swsusp.txt
index cc87adf..236d1fb 100644
--- a/Documentation/power/swsusp.txt
+++ b/Documentation/power/swsusp.txt
@@ -56,7 +56,7 @@ If you want to limit the suspend image size to N bytes, do
 
 echo N > /sys/power/image_size
 
-before suspend (it is limited to 500 MB by default).
+before suspend (it is limited to around 2/5 of available RAM by default).
 
 . The resume process checks for the presence of the resume device,
 if found, it then checks the contents for the hibernation image signature.
-- 
cgit v1.1


From 8412dbd642585107e29b20920876de98e7df5819 Mon Sep 17 00:00:00 2001
From: Todd Brandt <todd.e.brandt@linux.intel.com>
Date: Wed, 22 Aug 2018 18:37:11 -0700
Subject: PM / sleep: Show freezing tasks that caused a suspend abort

For debug purposes it would be nice to see which tasks
caused a suspend abort, i.e. which tasks were still
in the process of freezing when a wakeup event occurred.
This patch adds the info to pm_debug_messages.

Signed-off-by: Todd Brandt <todd.e.brandt@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 kernel/power/process.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/kernel/power/process.c b/kernel/power/process.c
index 7381d49..4b6a54d 100644
--- a/kernel/power/process.c
+++ b/kernel/power/process.c
@@ -96,7 +96,7 @@ static int try_to_freeze_tasks(bool user_only)
 		if (wq_busy)
 			show_workqueue_state();
 
-		if (!wakeup) {
+		if (!wakeup || pm_debug_messages_on) {
 			read_lock(&tasklist_lock);
 			for_each_process_thread(g, p) {
 				if (p != current && !freezer_should_skip(p)
-- 
cgit v1.1


From 51b177637b5c54061bbd12f228a26755f466cbb0 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Mon, 27 Aug 2018 20:52:15 -0500
Subject: cpufreq: Convert to using %pOFn instead of device_node.name

In preparation to remove the node name pointer from struct device_node,
convert printf users to use the %pOFn format specifier.

Signed-off-by: Rob Herring <robh@kernel.org>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/s5pv210-cpufreq.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/s5pv210-cpufreq.c b/drivers/cpufreq/s5pv210-cpufreq.c
index 5d31c2d..dbecd76 100644
--- a/drivers/cpufreq/s5pv210-cpufreq.c
+++ b/drivers/cpufreq/s5pv210-cpufreq.c
@@ -611,8 +611,8 @@ static int s5pv210_cpufreq_probe(struct platform_device *pdev)
 	for_each_compatible_node(np, NULL, "samsung,s5pv210-dmc") {
 		id = of_alias_get_id(np, "dmc");
 		if (id < 0 || id >= ARRAY_SIZE(dmc_base)) {
-			pr_err("%s: failed to get alias of dmc node '%s'\n",
-				__func__, np->name);
+			pr_err("%s: failed to get alias of dmc node '%pOFn'\n",
+				__func__, np);
 			of_node_put(np);
 			return id;
 		}
-- 
cgit v1.1


From d1e1303173d7fe3c107a85805a62c13e760f2da4 Mon Sep 17 00:00:00 2001
From: Biju Das <biju.das@bp.renesas.com>
Date: Tue, 11 Sep 2018 11:12:51 +0100
Subject: cpufreq: dt: Add support for r8a7744

Add the compatible strings for supporting the generic cpufreq driver on
the Renesas RZ/G1N (R8A7744) SoC.

Signed-off-by: Biju Das <biju.das@bp.renesas.com>
Reviewed-by: Fabrizio Castro <fabrizio.castro@bp.renesas.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Reviewed-by: Simon Horman <horms+renesas@verge.net.au>
Reviewed-by: Geert Uytterhoeven <geert+renesas@glider.be>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq-dt-platdev.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index fe14c57..805f8a0 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -58,6 +58,7 @@ static const struct of_device_id whitelist[] __initconst = {
 	{ .compatible = "renesas,r8a73a4", },
 	{ .compatible = "renesas,r8a7740", },
 	{ .compatible = "renesas,r8a7743", },
+	{ .compatible = "renesas,r8a7744", },
 	{ .compatible = "renesas,r8a7745", },
 	{ .compatible = "renesas,r8a7778", },
 	{ .compatible = "renesas,r8a7779", },
-- 
cgit v1.1


From 6a5f95b5a4f4ff29e4071bc5b95f8f3a2aef046b Mon Sep 17 00:00:00 2001
From: Fieah Lim <kw@fieahl.im>
Date: Tue, 11 Sep 2018 03:59:01 +0800
Subject: cpuidle: Remove unnecessary wrapper cpuidle_get_last_residency()

cpuidle_get_last_residency() is just a wrapper for retrieving
the last_residency member of struct cpuidle_device.  It's also
weirdly the only wrapper function for accessing cpuidle_* struct
member (by my best guess is it could be a leftover from v2.x).

Anyhow, since the only two users (the ladder and menu governors)
can access dev->last_residency directly, and it's more intuitive to
do it that way, let's just get rid of the wrapper.

This patch tidies up CPU idle code a bit without functional changes.

Signed-off-by: Fieah Lim <kw@fieahl.im>
[ rjw: Changelog cleanup ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/governors/ladder.c |  2 +-
 drivers/cpuidle/governors/menu.c   |  2 +-
 include/linux/cpuidle.h            | 10 ----------
 3 files changed, 2 insertions(+), 12 deletions(-)

diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c
index 704880a..f0dddc6 100644
--- a/drivers/cpuidle/governors/ladder.c
+++ b/drivers/cpuidle/governors/ladder.c
@@ -80,7 +80,7 @@ static int ladder_select_state(struct cpuidle_driver *drv,
 
 	last_state = &ldev->states[last_idx];
 
-	last_residency = cpuidle_get_last_residency(dev) - drv->states[last_idx].exit_latency;
+	last_residency = dev->last_residency - drv->states[last_idx].exit_latency;
 
 	/* consider promotion */
 	if (last_idx < drv->state_count - 1 &&
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index e26a409..021b08d 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -514,7 +514,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 		measured_us = 9 * MAX_INTERESTING / 10;
 	} else {
 		/* measured value */
-		measured_us = cpuidle_get_last_residency(dev);
+		measured_us = dev->last_residency;
 
 		/* Deduct exit latency */
 		if (measured_us > 2 * target->exit_latency)
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index 4325d6f..d262f62 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -99,16 +99,6 @@ struct cpuidle_device {
 DECLARE_PER_CPU(struct cpuidle_device *, cpuidle_devices);
 DECLARE_PER_CPU(struct cpuidle_device, cpuidle_dev);
 
-/**
- * cpuidle_get_last_residency - retrieves the last state's residency time
- * @dev: the target CPU
- */
-static inline int cpuidle_get_last_residency(struct cpuidle_device *dev)
-{
-	return dev->last_residency;
-}
-
-
 /****************************
  * CPUIDLE DRIVER INTERFACE *
  ****************************/
-- 
cgit v1.1


From 7037b43e0076cce1c07c72540e219fb5db7ea01f Mon Sep 17 00:00:00 2001
From: Fieah Lim <kw@fieahl.im>
Date: Tue, 11 Sep 2018 05:47:25 +0800
Subject: cpuidle: enter_state: Don't needlessly calculate diff time

Currently, ktime_us_delta() is invoked unconditionally to compute the
idle residency of the CPU, but it only makes sense to do that if a
valid idle state has been entered, so move the ktime_us_delta()
invocation after the entered_state >= 0 check.

While at it, merge two comment blocks in there into one and drop
a space between type casting of diff.

This patch has no functional changes.

Signed-off-by: Fieah Lim <kw@fieahl.im>
[ rjw: Changelog cleanup, comment format fix ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/cpuidle.c | 16 ++++++++--------
 1 file changed, 8 insertions(+), 8 deletions(-)

diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index 6df894d..4a97446 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -247,17 +247,17 @@ int cpuidle_enter_state(struct cpuidle_device *dev, struct cpuidle_driver *drv,
 	if (!cpuidle_state_is_coupled(drv, index))
 		local_irq_enable();
 
-	diff = ktime_us_delta(time_end, time_start);
-	if (diff > INT_MAX)
-		diff = INT_MAX;
-
-	dev->last_residency = (int) diff;
-
 	if (entered_state >= 0) {
-		/* Update cpuidle counters */
-		/* This can be moved to within driver enter routine
+		/*
+		 * Update cpuidle counters
+		 * This can be moved to within driver enter routine,
 		 * but that results in multiple copies of same code.
 		 */
+		diff = ktime_us_delta(time_end, time_start);
+		if (diff > INT_MAX)
+			diff = INT_MAX;
+
+		dev->last_residency = (int)diff;
 		dev->states_usage[entered_state].time += dev->last_residency;
 		dev->states_usage[entered_state].usage++;
 	} else {
-- 
cgit v1.1


From 2fbb8670b4ff4454f1c0de510f788d737edc4b90 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 11 Sep 2018 11:14:34 +0530
Subject: OPP: Free OPP table properly on performance state irregularities

The OPP table was freed, but not the individual OPPs which is done from
_dev_pm_opp_remove_table(). Fix it by calling _dev_pm_opp_remove_table()
as well.

Cc: 4.18 <stable@vger.kernel.org> # v4.18
Fixes: 3ba98324e81a ("PM / OPP: Get performance state using genpd helper")
Tested-by: Niklas Cassel <niklas.cassel@linaro.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/opp/of.c | 1 +
 1 file changed, 1 insertion(+)

diff --git a/drivers/opp/of.c b/drivers/opp/of.c
index 7af0dde..20988c4 100644
--- a/drivers/opp/of.c
+++ b/drivers/opp/of.c
@@ -425,6 +425,7 @@ static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np)
 		dev_err(dev, "Not all nodes have performance state set (%d: %d)\n",
 			count, pstate_count);
 		ret = -ENOENT;
+		_dev_pm_opp_remove_table(opp_table, dev, false);
 		goto put_opp_table;
 	}
 
-- 
cgit v1.1


From 404b1369ea26f598b96ae4d3183262a879703cfe Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 13 Sep 2018 13:09:27 +0530
Subject: OPP: Don't try to remove all OPP tables on failure

dev_pm_opp_of_cpumask_add_table() creates the OPP table for all CPUs
present in the cpumask and on errors it should revert all changes it has
done.

It actually is doing a bit more than that. On errors, it tries to free
all the OPP tables, even the one it hasn't created yet. This may also
end up freeing the OPP tables which were created from separate path,
like dev_pm_opp_set_supported_hw().

Reported-and-tested-by: Niklas Cassel <niklas.cassel@linaro.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/opp/cpu.c | 8 ++++++--
 drivers/opp/of.c  | 4 ++--
 drivers/opp/opp.h | 2 +-
 3 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/drivers/opp/cpu.c b/drivers/opp/cpu.c
index 0c09107..2eb5e2e 100644
--- a/drivers/opp/cpu.c
+++ b/drivers/opp/cpu.c
@@ -108,7 +108,8 @@ void dev_pm_opp_free_cpufreq_table(struct device *dev,
 EXPORT_SYMBOL_GPL(dev_pm_opp_free_cpufreq_table);
 #endif	/* CONFIG_CPU_FREQ */
 
-void _dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask, bool of)
+void _dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask, bool of,
+				      int last_cpu)
 {
 	struct device *cpu_dev;
 	int cpu;
@@ -116,6 +117,9 @@ void _dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask, bool of)
 	WARN_ON(cpumask_empty(cpumask));
 
 	for_each_cpu(cpu, cpumask) {
+		if (cpu == last_cpu)
+			break;
+
 		cpu_dev = get_cpu_device(cpu);
 		if (!cpu_dev) {
 			pr_err("%s: failed to get cpu%d device\n", __func__,
@@ -140,7 +144,7 @@ void _dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask, bool of)
  */
 void dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask)
 {
-	_dev_pm_opp_cpumask_remove_table(cpumask, false);
+	_dev_pm_opp_cpumask_remove_table(cpumask, false, -1);
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_cpumask_remove_table);
 
diff --git a/drivers/opp/of.c b/drivers/opp/of.c
index 20988c4..8622258 100644
--- a/drivers/opp/of.c
+++ b/drivers/opp/of.c
@@ -592,7 +592,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_of_add_table_indexed);
  */
 void dev_pm_opp_of_cpumask_remove_table(const struct cpumask *cpumask)
 {
-	_dev_pm_opp_cpumask_remove_table(cpumask, true);
+	_dev_pm_opp_cpumask_remove_table(cpumask, true, -1);
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_of_cpumask_remove_table);
 
@@ -627,7 +627,7 @@ int dev_pm_opp_of_cpumask_add_table(const struct cpumask *cpumask)
 				 __func__, cpu, ret);
 
 			/* Free all other OPPs */
-			dev_pm_opp_of_cpumask_remove_table(cpumask);
+			_dev_pm_opp_cpumask_remove_table(cpumask, true, cpu);
 			break;
 		}
 	}
diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h
index 7c540fd..a9d22aa 100644
--- a/drivers/opp/opp.h
+++ b/drivers/opp/opp.h
@@ -196,7 +196,7 @@ struct dev_pm_opp *_opp_allocate(struct opp_table *opp_table);
 void _opp_free(struct dev_pm_opp *opp);
 int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, struct opp_table *opp_table, bool rate_not_available);
 int _opp_add_v1(struct opp_table *opp_table, struct device *dev, unsigned long freq, long u_volt, bool dynamic);
-void _dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask, bool of);
+void _dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask, bool of, int last_cpu);
 struct opp_table *_add_opp_table(struct device *dev);
 
 #ifdef CONFIG_OF
-- 
cgit v1.1


From 3d2556992a878a2210d3be498416aee39e0c32aa Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 3 Aug 2018 07:05:21 +0530
Subject: OPP: Protect dev_list with opp_table lock

The dev_list needs to be protected with a lock, else we may have
simultaneous access (addition/removal) to it and that would be racy.
Extend scope of the opp_table lock to protect dev_list as well.

Tested-by: Niklas Cassel <niklas.cassel@linaro.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/opp/core.c | 21 +++++++++++++++++++--
 drivers/opp/cpu.c  |  2 ++
 drivers/opp/opp.h  |  2 +-
 3 files changed, 22 insertions(+), 3 deletions(-)

diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index 31ff03d..9f8aa31 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -48,9 +48,14 @@ static struct opp_device *_find_opp_dev(const struct device *dev,
 static struct opp_table *_find_opp_table_unlocked(struct device *dev)
 {
 	struct opp_table *opp_table;
+	bool found;
 
 	list_for_each_entry(opp_table, &opp_tables, node) {
-		if (_find_opp_dev(dev, opp_table)) {
+		mutex_lock(&opp_table->lock);
+		found = !!_find_opp_dev(dev, opp_table);
+		mutex_unlock(&opp_table->lock);
+
+		if (found) {
 			_get_opp_table_kref(opp_table);
 
 			return opp_table;
@@ -766,6 +771,8 @@ struct opp_device *_add_opp_dev(const struct device *dev,
 
 	/* Initialize opp-dev */
 	opp_dev->dev = dev;
+
+	mutex_lock(&opp_table->lock);
 	list_add(&opp_dev->node, &opp_table->dev_list);
 
 	/* Create debugfs entries for the opp_table */
@@ -773,6 +780,7 @@ struct opp_device *_add_opp_dev(const struct device *dev,
 	if (ret)
 		dev_err(dev, "%s: Failed to register opp debugfs (%d)\n",
 			__func__, ret);
+	mutex_unlock(&opp_table->lock);
 
 	return opp_dev;
 }
@@ -791,6 +799,7 @@ static struct opp_table *_allocate_opp_table(struct device *dev)
 	if (!opp_table)
 		return NULL;
 
+	mutex_init(&opp_table->lock);
 	INIT_LIST_HEAD(&opp_table->dev_list);
 
 	opp_dev = _add_opp_dev(dev, opp_table);
@@ -812,7 +821,6 @@ static struct opp_table *_allocate_opp_table(struct device *dev)
 
 	BLOCKING_INIT_NOTIFIER_HEAD(&opp_table->head);
 	INIT_LIST_HEAD(&opp_table->opp_list);
-	mutex_init(&opp_table->lock);
 	kref_init(&opp_table->kref);
 
 	/* Secure the device table modification */
@@ -854,6 +862,10 @@ static void _opp_table_kref_release(struct kref *kref)
 	if (!IS_ERR(opp_table->clk))
 		clk_put(opp_table->clk);
 
+	/*
+	 * No need to take opp_table->lock here as we are guaranteed that no
+	 * references to the OPP table are taken at this point.
+	 */
 	opp_dev = list_first_entry(&opp_table->dev_list, struct opp_device,
 				   node);
 
@@ -1716,6 +1728,9 @@ void _dev_pm_opp_remove_table(struct opp_table *opp_table, struct device *dev,
 {
 	struct dev_pm_opp *opp, *tmp;
 
+	/* Protect dev_list */
+	mutex_lock(&opp_table->lock);
+
 	/* Find if opp_table manages a single device */
 	if (list_is_singular(&opp_table->dev_list)) {
 		/* Free static OPPs */
@@ -1733,6 +1748,8 @@ void _dev_pm_opp_remove_table(struct opp_table *opp_table, struct device *dev,
 	} else {
 		_remove_opp_dev(_find_opp_dev(dev, opp_table), opp_table);
 	}
+
+	mutex_unlock(&opp_table->lock);
 }
 
 void _dev_pm_opp_find_and_remove_table(struct device *dev, bool remove_all)
diff --git a/drivers/opp/cpu.c b/drivers/opp/cpu.c
index 2eb5e2e..36586f6 100644
--- a/drivers/opp/cpu.c
+++ b/drivers/opp/cpu.c
@@ -226,8 +226,10 @@ int dev_pm_opp_get_sharing_cpus(struct device *cpu_dev, struct cpumask *cpumask)
 	cpumask_clear(cpumask);
 
 	if (opp_table->shared_opp == OPP_TABLE_ACCESS_SHARED) {
+		mutex_lock(&opp_table->lock);
 		list_for_each_entry(opp_dev, &opp_table->dev_list, node)
 			cpumask_set_cpu(opp_dev->dev->id, cpumask);
+		mutex_unlock(&opp_table->lock);
 	} else {
 		cpumask_set_cpu(cpu_dev->id, cpumask);
 	}
diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h
index a9d22aa..88e9f47 100644
--- a/drivers/opp/opp.h
+++ b/drivers/opp/opp.h
@@ -126,7 +126,7 @@ enum opp_table_access {
  * @dev_list:	list of devices that share these OPPs
  * @opp_list:	table of opps
  * @kref:	for reference count of the table.
- * @lock:	mutex protecting the opp_list.
+ * @lock:	mutex protecting the opp_list and dev_list.
  * @np:		struct device_node pointer for opp's DT node.
  * @clock_latency_ns_max: Max clock latency in nanoseconds.
  * @shared_opp: OPP is shared between multiple devices.
-- 
cgit v1.1


From eb7c8743d6cf489e30091e6656fd4d3306621e9a Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 5 Sep 2018 16:17:14 +0530
Subject: OPP: Pass index to _of_init_opp_table()

This is a preparatory patch required for the next commit which will
start using OPP table's node pointer in _of_init_opp_table(), which
requires the index in order to read the OPP table's phandle.

This commit adds the index argument in the call chains in order to get
it delivered to _of_init_opp_table().

Tested-by: Niklas Cassel <niklas.cassel@linaro.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/opp/core.c     | 19 +++++++++++++++----
 drivers/opp/of.c       | 12 +++++++-----
 drivers/opp/opp.h      |  4 ++--
 include/linux/pm_opp.h |  6 ++++++
 4 files changed, 30 insertions(+), 11 deletions(-)

diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index 9f8aa31..332748a 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -785,7 +785,7 @@ struct opp_device *_add_opp_dev(const struct device *dev,
 	return opp_dev;
 }
 
-static struct opp_table *_allocate_opp_table(struct device *dev)
+static struct opp_table *_allocate_opp_table(struct device *dev, int index)
 {
 	struct opp_table *opp_table;
 	struct opp_device *opp_dev;
@@ -808,7 +808,7 @@ static struct opp_table *_allocate_opp_table(struct device *dev)
 		return NULL;
 	}
 
-	_of_init_opp_table(opp_table, dev);
+	_of_init_opp_table(opp_table, dev, index);
 
 	/* Find clk for the device */
 	opp_table->clk = clk_get(dev, NULL);
@@ -833,7 +833,7 @@ void _get_opp_table_kref(struct opp_table *opp_table)
 	kref_get(&opp_table->kref);
 }
 
-struct opp_table *dev_pm_opp_get_opp_table(struct device *dev)
+static struct opp_table *_opp_get_opp_table(struct device *dev, int index)
 {
 	struct opp_table *opp_table;
 
@@ -844,15 +844,26 @@ struct opp_table *dev_pm_opp_get_opp_table(struct device *dev)
 	if (!IS_ERR(opp_table))
 		goto unlock;
 
-	opp_table = _allocate_opp_table(dev);
+	opp_table = _allocate_opp_table(dev, index);
 
 unlock:
 	mutex_unlock(&opp_table_lock);
 
 	return opp_table;
 }
+
+struct opp_table *dev_pm_opp_get_opp_table(struct device *dev)
+{
+	return _opp_get_opp_table(dev, 0);
+}
 EXPORT_SYMBOL_GPL(dev_pm_opp_get_opp_table);
 
+struct opp_table *dev_pm_opp_get_opp_table_indexed(struct device *dev,
+						   int index)
+{
+	return _opp_get_opp_table(dev, index);
+}
+
 static void _opp_table_kref_release(struct kref *kref)
 {
 	struct opp_table *opp_table = container_of(kref, struct opp_table, kref);
diff --git a/drivers/opp/of.c b/drivers/opp/of.c
index 8622258..1a9e124 100644
--- a/drivers/opp/of.c
+++ b/drivers/opp/of.c
@@ -52,7 +52,8 @@ static struct opp_table *_managed_opp(const struct device_node *np)
 	return managed_table;
 }
 
-void _of_init_opp_table(struct opp_table *opp_table, struct device *dev)
+void _of_init_opp_table(struct opp_table *opp_table, struct device *dev,
+			int index)
 {
 	struct device_node *np;
 
@@ -378,7 +379,8 @@ free_opp:
 }
 
 /* Initializes OPP tables based on new bindings */
-static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np)
+static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np,
+				int index)
 {
 	struct device_node *np;
 	struct opp_table *opp_table;
@@ -393,7 +395,7 @@ static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np)
 		goto put_opp_table;
 	}
 
-	opp_table = dev_pm_opp_get_opp_table(dev);
+	opp_table = dev_pm_opp_get_opp_table_indexed(dev, index);
 	if (!opp_table)
 		return -ENOMEM;
 
@@ -526,7 +528,7 @@ int dev_pm_opp_of_add_table(struct device *dev)
 		return _of_add_opp_table_v1(dev);
 	}
 
-	ret = _of_add_opp_table_v2(dev, opp_np);
+	ret = _of_add_opp_table_v2(dev, opp_np, 0);
 	of_node_put(opp_np);
 
 	return ret;
@@ -574,7 +576,7 @@ again:
 		return -ENODEV;
 	}
 
-	ret = _of_add_opp_table_v2(dev, opp_np);
+	ret = _of_add_opp_table_v2(dev, opp_np, index);
 	of_node_put(opp_np);
 
 	return ret;
diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h
index 88e9f47..b235e76 100644
--- a/drivers/opp/opp.h
+++ b/drivers/opp/opp.h
@@ -200,9 +200,9 @@ void _dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask, bool of, in
 struct opp_table *_add_opp_table(struct device *dev);
 
 #ifdef CONFIG_OF
-void _of_init_opp_table(struct opp_table *opp_table, struct device *dev);
+void _of_init_opp_table(struct opp_table *opp_table, struct device *dev, int index);
 #else
-static inline void _of_init_opp_table(struct opp_table *opp_table, struct device *dev) {}
+static inline void _of_init_opp_table(struct opp_table *opp_table, struct device *dev, int index) {}
 #endif
 
 #ifdef CONFIG_DEBUG_FS
diff --git a/include/linux/pm_opp.h b/include/linux/pm_opp.h
index 099b319..5d399eee 100644
--- a/include/linux/pm_opp.h
+++ b/include/linux/pm_opp.h
@@ -79,6 +79,7 @@ struct dev_pm_set_opp_data {
 #if defined(CONFIG_PM_OPP)
 
 struct opp_table *dev_pm_opp_get_opp_table(struct device *dev);
+struct opp_table *dev_pm_opp_get_opp_table_indexed(struct device *dev, int index);
 void dev_pm_opp_put_opp_table(struct opp_table *opp_table);
 
 unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp);
@@ -136,6 +137,11 @@ static inline struct opp_table *dev_pm_opp_get_opp_table(struct device *dev)
 	return ERR_PTR(-ENOTSUPP);
 }
 
+static inline struct opp_table *dev_pm_opp_get_opp_table_indexed(struct device *dev, int index)
+{
+	return ERR_PTR(-ENOTSUPP);
+}
+
 static inline void dev_pm_opp_put_opp_table(struct opp_table *opp_table) {}
 
 static inline unsigned long dev_pm_opp_get_voltage(struct dev_pm_opp *opp)
-- 
cgit v1.1


From f06ed90e7051a3a50c2172001c86ff9645e5c2ba Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 14 Jun 2018 12:04:43 +0530
Subject: OPP: Parse OPP table's DT properties from _of_init_opp_table()

Parse the DT properties present in the OPP table from
_of_init_opp_table(), which is a dedicated routine for DT parsing.

Minor relocation of helpers is required for this.

It is possible now for _managed_opp() to return a partially initialized
OPP table if the OPP table is created via the helpers like
dev_pm_opp_set_supported_hw() and we need another flag to indicate if
the static OPP are already parsed or not to make sure we don't
incorrectly skip initializing the static OPPs.

Tested-by: Niklas Cassel <niklas.cassel@linaro.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/opp/of.c  | 79 +++++++++++++++++++++++++++++++++----------------------
 drivers/opp/opp.h |  2 ++
 2 files changed, 50 insertions(+), 31 deletions(-)

diff --git a/drivers/opp/of.c b/drivers/opp/of.c
index 1a9e124..4a19f76 100644
--- a/drivers/opp/of.c
+++ b/drivers/opp/of.c
@@ -23,6 +23,24 @@
 
 #include "opp.h"
 
+/*
+ * Returns opp descriptor node for a device node, caller must
+ * do of_node_put().
+ */
+static struct device_node *_opp_of_get_opp_desc_node(struct device_node *np,
+						     int index)
+{
+	/* "operating-points-v2" can be an array for power domain providers */
+	return of_parse_phandle(np, "operating-points-v2", index);
+}
+
+/* Returns opp descriptor node for a device, caller must do of_node_put() */
+struct device_node *dev_pm_opp_of_get_opp_desc_node(struct device *dev)
+{
+	return _opp_of_get_opp_desc_node(dev->of_node, 0);
+}
+EXPORT_SYMBOL_GPL(dev_pm_opp_of_get_opp_desc_node);
+
 static struct opp_table *_managed_opp(const struct device_node *np)
 {
 	struct opp_table *opp_table, *managed_table = NULL;
@@ -55,22 +73,37 @@ static struct opp_table *_managed_opp(const struct device_node *np)
 void _of_init_opp_table(struct opp_table *opp_table, struct device *dev,
 			int index)
 {
-	struct device_node *np;
+	struct device_node *np, *opp_np;
+	u32 val;
 
 	/*
 	 * Only required for backward compatibility with v1 bindings, but isn't
 	 * harmful for other cases. And so we do it unconditionally.
 	 */
 	np = of_node_get(dev->of_node);
-	if (np) {
-		u32 val;
-
-		if (!of_property_read_u32(np, "clock-latency", &val))
-			opp_table->clock_latency_ns_max = val;
-		of_property_read_u32(np, "voltage-tolerance",
-				     &opp_table->voltage_tolerance_v1);
-		of_node_put(np);
-	}
+	if (!np)
+		return;
+
+	if (!of_property_read_u32(np, "clock-latency", &val))
+		opp_table->clock_latency_ns_max = val;
+	of_property_read_u32(np, "voltage-tolerance",
+			     &opp_table->voltage_tolerance_v1);
+
+	/* Get OPP table node */
+	opp_np = _opp_of_get_opp_desc_node(np, index);
+	of_node_put(np);
+
+	if (!opp_np)
+		return;
+
+	if (of_property_read_bool(opp_np, "opp-shared"))
+		opp_table->shared_opp = OPP_TABLE_ACCESS_SHARED;
+	else
+		opp_table->shared_opp = OPP_TABLE_ACCESS_EXCLUSIVE;
+
+	opp_table->np = opp_np;
+
+	of_node_put(opp_np);
 }
 
 static bool _opp_is_supported(struct device *dev, struct opp_table *opp_table,
@@ -250,22 +283,6 @@ void dev_pm_opp_of_remove_table(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_of_remove_table);
 
-/* Returns opp descriptor node for a device node, caller must
- * do of_node_put() */
-static struct device_node *_opp_of_get_opp_desc_node(struct device_node *np,
-						     int index)
-{
-	/* "operating-points-v2" can be an array for power domain providers */
-	return of_parse_phandle(np, "operating-points-v2", index);
-}
-
-/* Returns opp descriptor node for a device, caller must do of_node_put() */
-struct device_node *dev_pm_opp_of_get_opp_desc_node(struct device *dev)
-{
-	return _opp_of_get_opp_desc_node(dev->of_node, 0);
-}
-EXPORT_SYMBOL_GPL(dev_pm_opp_of_get_opp_desc_node);
-
 /**
  * _opp_add_static_v2() - Allocate static OPPs (As per 'v2' DT bindings)
  * @opp_table:	OPP table
@@ -392,6 +409,9 @@ static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np,
 		/* OPPs are already managed */
 		if (!_add_opp_dev(dev, opp_table))
 			ret = -ENOMEM;
+		else if (!opp_table->parsed_static_opps)
+			goto initialize_static_opps;
+
 		goto put_opp_table;
 	}
 
@@ -399,6 +419,7 @@ static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np,
 	if (!opp_table)
 		return -ENOMEM;
 
+initialize_static_opps:
 	/* We have opp-table node now, iterate over it and add OPPs */
 	for_each_available_child_of_node(opp_np, np) {
 		count++;
@@ -434,11 +455,7 @@ static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np,
 	if (pstate_count)
 		opp_table->genpd_performance_state = true;
 
-	opp_table->np = opp_np;
-	if (of_property_read_bool(opp_np, "opp-shared"))
-		opp_table->shared_opp = OPP_TABLE_ACCESS_SHARED;
-	else
-		opp_table->shared_opp = OPP_TABLE_ACCESS_EXCLUSIVE;
+	opp_table->parsed_static_opps = true;
 
 put_opp_table:
 	dev_pm_opp_put_opp_table(opp_table);
diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h
index b235e76..b04c2b5 100644
--- a/drivers/opp/opp.h
+++ b/drivers/opp/opp.h
@@ -129,6 +129,7 @@ enum opp_table_access {
  * @lock:	mutex protecting the opp_list and dev_list.
  * @np:		struct device_node pointer for opp's DT node.
  * @clock_latency_ns_max: Max clock latency in nanoseconds.
+ * @parsed_static_opps: True if OPPs are initialized from DT.
  * @shared_opp: OPP is shared between multiple devices.
  * @suspend_opp: Pointer to OPP to be used during device suspend.
  * @supported_hw: Array of version number to support.
@@ -164,6 +165,7 @@ struct opp_table {
 	/* For backward compatibility with v1 bindings */
 	unsigned int voltage_tolerance_v1;
 
+	bool parsed_static_opps;
 	enum opp_table_access shared_opp;
 	struct dev_pm_opp *suspend_opp;
 
-- 
cgit v1.1


From 0ad8c623907c27f4b8572d36c4ba73ea103e3108 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 2 Aug 2018 14:23:09 +0530
Subject: OPP: Don't take OPP table's kref for static OPPs

The reference count is only required to be incremented for every call
that may lead to adding the OPP table. For static OPPs the same should
be done from the parent routine which adds all static OPPs together and
so only one refcount for all static OPPs.

Update code to reflect that.

The refcount is incremented every time a dynamic OPP is created (as that
can lead to creating the OPP table) and the same is dropped when the OPP
is removed.

Tested-by: Niklas Cassel <niklas.cassel@linaro.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/opp/core.c | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index 332748a..2a69762 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -919,7 +919,6 @@ static void _opp_kref_release(struct kref *kref)
 	kfree(opp);
 
 	mutex_unlock(&opp_table->lock);
-	dev_pm_opp_put_opp_table(opp_table);
 }
 
 void dev_pm_opp_get(struct dev_pm_opp *opp)
@@ -963,11 +962,15 @@ void dev_pm_opp_remove(struct device *dev, unsigned long freq)
 
 	if (found) {
 		dev_pm_opp_put(opp);
+
+		/* Drop the reference taken by dev_pm_opp_add() */
+		dev_pm_opp_put_opp_table(opp_table);
 	} else {
 		dev_warn(dev, "%s: Couldn't find OPP with freq: %lu\n",
 			 __func__, freq);
 	}
 
+	/* Drop the reference taken by _find_opp_table() */
 	dev_pm_opp_put_opp_table(opp_table);
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_remove);
@@ -1085,9 +1088,6 @@ int _opp_add(struct device *dev, struct dev_pm_opp *new_opp,
 	new_opp->opp_table = opp_table;
 	kref_init(&new_opp->kref);
 
-	/* Get a reference to the OPP table */
-	_get_opp_table_kref(opp_table);
-
 	ret = opp_debug_create_one(new_opp, opp_table);
 	if (ret)
 		dev_err(dev, "%s: Failed to register opp to debugfs (%d)\n",
@@ -1566,8 +1566,9 @@ int dev_pm_opp_add(struct device *dev, unsigned long freq, unsigned long u_volt)
 		return -ENOMEM;
 
 	ret = _opp_add_v1(opp_table, dev, freq, u_volt, true);
+	if (ret)
+		dev_pm_opp_put_opp_table(opp_table);
 
-	dev_pm_opp_put_opp_table(opp_table);
 	return ret;
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_add);
-- 
cgit v1.1


From d0e8ae6c26da7b9436775dfd9768d7a821ed47b7 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Tue, 11 Sep 2018 11:14:11 +0530
Subject: OPP: Create separate kref for static OPPs list

The static OPPs don't always get freed with the OPP table, it can happen
before that as well. For example, if the OPP table is first created
using helpers like dev_pm_opp_set_supported_hw() and the OPPs are
created at a later point. Now when the OPPs are removed, the OPP table
stays until the time dev_pm_opp_put_supported_hw() is called.

Later patches will streamline the freeing of OPP table and that requires
the static OPPs to get freed with help of a separate kernel reference.
This patch prepares for that by creating a separate kref for static OPPs
list.

Tested-by: Niklas Cassel <niklas.cassel@linaro.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/opp/core.c | 33 ++++++++++++++++++++++++++++++++-
 drivers/opp/of.c   |  7 +++++++
 drivers/opp/opp.h  |  3 +++
 3 files changed, 42 insertions(+), 1 deletion(-)

diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index 2a69762..b555121 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -892,6 +892,33 @@ static void _opp_table_kref_release(struct kref *kref)
 	mutex_unlock(&opp_table_lock);
 }
 
+void _opp_remove_all_static(struct opp_table *opp_table)
+{
+	struct dev_pm_opp *opp, *tmp;
+
+	list_for_each_entry_safe(opp, tmp, &opp_table->opp_list, node) {
+		if (!opp->dynamic)
+			dev_pm_opp_put(opp);
+	}
+
+	opp_table->parsed_static_opps = false;
+}
+
+static void _opp_table_list_kref_release(struct kref *kref)
+{
+	struct opp_table *opp_table = container_of(kref, struct opp_table,
+						   list_kref);
+
+	_opp_remove_all_static(opp_table);
+	mutex_unlock(&opp_table_lock);
+}
+
+void _put_opp_list_kref(struct opp_table *opp_table)
+{
+	kref_put_mutex(&opp_table->list_kref, _opp_table_list_kref_release,
+		       &opp_table_lock);
+}
+
 void dev_pm_opp_put_opp_table(struct opp_table *opp_table)
 {
 	kref_put_mutex(&opp_table->kref, _opp_table_kref_release,
@@ -1746,8 +1773,11 @@ void _dev_pm_opp_remove_table(struct opp_table *opp_table, struct device *dev,
 	/* Find if opp_table manages a single device */
 	if (list_is_singular(&opp_table->dev_list)) {
 		/* Free static OPPs */
+		_put_opp_list_kref(opp_table);
+
+		/* Free dynamic OPPs */
 		list_for_each_entry_safe(opp, tmp, &opp_table->opp_list, node) {
-			if (remove_all || !opp->dynamic)
+			if (remove_all)
 				dev_pm_opp_put(opp);
 		}
 
@@ -1758,6 +1788,7 @@ void _dev_pm_opp_remove_table(struct opp_table *opp_table, struct device *dev,
 		if (opp_table->genpd_performance_state)
 			dev_pm_genpd_set_performance_state(dev, 0);
 	} else {
+		_put_opp_list_kref(opp_table);
 		_remove_opp_dev(_find_opp_dev(dev, opp_table), opp_table);
 	}
 
diff --git a/drivers/opp/of.c b/drivers/opp/of.c
index 4a19f76..aaa4bab 100644
--- a/drivers/opp/of.c
+++ b/drivers/opp/of.c
@@ -411,6 +411,8 @@ static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np,
 			ret = -ENOMEM;
 		else if (!opp_table->parsed_static_opps)
 			goto initialize_static_opps;
+		else
+			kref_get(&opp_table->list_kref);
 
 		goto put_opp_table;
 	}
@@ -420,6 +422,8 @@ static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np,
 		return -ENOMEM;
 
 initialize_static_opps:
+	kref_init(&opp_table->list_kref);
+
 	/* We have opp-table node now, iterate over it and add OPPs */
 	for_each_available_child_of_node(opp_np, np) {
 		count++;
@@ -437,6 +441,7 @@ initialize_static_opps:
 	/* There should be one of more OPP defined */
 	if (WARN_ON(!count)) {
 		ret = -ENOENT;
+		_put_opp_list_kref(opp_table);
 		goto put_opp_table;
 	}
 
@@ -491,6 +496,8 @@ static int _of_add_opp_table_v1(struct device *dev)
 	if (!opp_table)
 		return -ENOMEM;
 
+	kref_init(&opp_table->list_kref);
+
 	val = prop->value;
 	while (nr) {
 		unsigned long freq = be32_to_cpup(val++) * 1000;
diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h
index b04c2b5..9274116 100644
--- a/drivers/opp/opp.h
+++ b/drivers/opp/opp.h
@@ -126,6 +126,7 @@ enum opp_table_access {
  * @dev_list:	list of devices that share these OPPs
  * @opp_list:	table of opps
  * @kref:	for reference count of the table.
+ * @list_kref:	for reference count of the OPP list.
  * @lock:	mutex protecting the opp_list and dev_list.
  * @np:		struct device_node pointer for opp's DT node.
  * @clock_latency_ns_max: Max clock latency in nanoseconds.
@@ -157,6 +158,7 @@ struct opp_table {
 	struct list_head dev_list;
 	struct list_head opp_list;
 	struct kref kref;
+	struct kref list_kref;
 	struct mutex lock;
 
 	struct device_node *np;
@@ -200,6 +202,7 @@ int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, struct opp_table *o
 int _opp_add_v1(struct opp_table *opp_table, struct device *dev, unsigned long freq, long u_volt, bool dynamic);
 void _dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask, bool of, int last_cpu);
 struct opp_table *_add_opp_table(struct device *dev);
+void _put_opp_list_kref(struct opp_table *opp_table);
 
 #ifdef CONFIG_OF
 void _of_init_opp_table(struct opp_table *opp_table, struct device *dev, int index);
-- 
cgit v1.1


From 883071c4bd9a6e45bf0b5e45575d110eaeb2e6d0 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 12 Sep 2018 12:25:22 +0530
Subject: cpufreq: mvebu: Remove OPPs using dev_pm_opp_remove()

dev_pm_opp_cpumask_remove_table() is going to change in the next commit
and will not remove dynamic OPPs automatically. They must be removed
with a call to dev_pm_opp_remove().

Reviewed-by: Gregory CLEMENT <gregory.clement@bootlin.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/mvebu-cpufreq.c | 9 ++-------
 1 file changed, 2 insertions(+), 7 deletions(-)

diff --git a/drivers/cpufreq/mvebu-cpufreq.c b/drivers/cpufreq/mvebu-cpufreq.c
index 31513bd..6d33a63 100644
--- a/drivers/cpufreq/mvebu-cpufreq.c
+++ b/drivers/cpufreq/mvebu-cpufreq.c
@@ -84,9 +84,10 @@ static int __init armada_xp_pmsu_cpufreq_init(void)
 
 		ret = dev_pm_opp_add(cpu_dev, clk_get_rate(clk) / 2, 0);
 		if (ret) {
+			dev_pm_opp_remove(cpu_dev, clk_get_rate(clk));
 			clk_put(clk);
 			dev_err(cpu_dev, "Failed to register OPPs\n");
-			goto opp_register_failed;
+			return ret;
 		}
 
 		ret = dev_pm_opp_set_sharing_cpus(cpu_dev,
@@ -99,11 +100,5 @@ static int __init armada_xp_pmsu_cpufreq_init(void)
 
 	platform_device_register_simple("cpufreq-dt", -1, NULL, 0);
 	return 0;
-
-opp_register_failed:
-	/* As registering has failed remove all the opp for all cpus */
-	dev_pm_opp_cpumask_remove_table(cpu_possible_mask);
-
-	return ret;
 }
 device_initcall(armada_xp_pmsu_cpufreq_init);
-- 
cgit v1.1


From 2a4eb7358aba6beff7fa23f028c733310756e525 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Thu, 13 Sep 2018 13:14:36 +0530
Subject: OPP: Don't remove dynamic OPPs from _dev_pm_opp_remove_table()

Only one platform was depending on this feature and it is already
updated now. Stop removing dynamic OPPs from _dev_pm_opp_remove_table().
This simplifies lot of paths and removes unnecessary parameters.

Tested-by: Niklas Cassel <niklas.cassel@linaro.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/opp/core.c | 20 +++++---------------
 drivers/opp/cpu.c  |  9 +++------
 drivers/opp/of.c   | 12 ++++++------
 drivers/opp/opp.h  |  6 +++---
 4 files changed, 17 insertions(+), 30 deletions(-)

diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index b555121..2319ad4 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -1759,14 +1759,10 @@ int dev_pm_opp_unregister_notifier(struct device *dev,
 EXPORT_SYMBOL(dev_pm_opp_unregister_notifier);
 
 /*
- * Free OPPs either created using static entries present in DT or even the
- * dynamically added entries based on remove_all param.
+ * Free OPPs either created using static entries present in DT.
  */
-void _dev_pm_opp_remove_table(struct opp_table *opp_table, struct device *dev,
-			      bool remove_all)
+void _dev_pm_opp_remove_table(struct opp_table *opp_table, struct device *dev)
 {
-	struct dev_pm_opp *opp, *tmp;
-
 	/* Protect dev_list */
 	mutex_lock(&opp_table->lock);
 
@@ -1775,12 +1771,6 @@ void _dev_pm_opp_remove_table(struct opp_table *opp_table, struct device *dev,
 		/* Free static OPPs */
 		_put_opp_list_kref(opp_table);
 
-		/* Free dynamic OPPs */
-		list_for_each_entry_safe(opp, tmp, &opp_table->opp_list, node) {
-			if (remove_all)
-				dev_pm_opp_put(opp);
-		}
-
 		/*
 		 * The OPP table is getting removed, drop the performance state
 		 * constraints.
@@ -1795,7 +1785,7 @@ void _dev_pm_opp_remove_table(struct opp_table *opp_table, struct device *dev,
 	mutex_unlock(&opp_table->lock);
 }
 
-void _dev_pm_opp_find_and_remove_table(struct device *dev, bool remove_all)
+void _dev_pm_opp_find_and_remove_table(struct device *dev)
 {
 	struct opp_table *opp_table;
 
@@ -1812,7 +1802,7 @@ void _dev_pm_opp_find_and_remove_table(struct device *dev, bool remove_all)
 		return;
 	}
 
-	_dev_pm_opp_remove_table(opp_table, dev, remove_all);
+	_dev_pm_opp_remove_table(opp_table, dev);
 
 	dev_pm_opp_put_opp_table(opp_table);
 }
@@ -1826,6 +1816,6 @@ void _dev_pm_opp_find_and_remove_table(struct device *dev, bool remove_all)
  */
 void dev_pm_opp_remove_table(struct device *dev)
 {
-	_dev_pm_opp_find_and_remove_table(dev, true);
+	_dev_pm_opp_find_and_remove_table(dev);
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_remove_table);
diff --git a/drivers/opp/cpu.c b/drivers/opp/cpu.c
index 36586f6..ab6d07e 100644
--- a/drivers/opp/cpu.c
+++ b/drivers/opp/cpu.c
@@ -108,7 +108,7 @@ void dev_pm_opp_free_cpufreq_table(struct device *dev,
 EXPORT_SYMBOL_GPL(dev_pm_opp_free_cpufreq_table);
 #endif	/* CONFIG_CPU_FREQ */
 
-void _dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask, bool of,
+void _dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask,
 				      int last_cpu)
 {
 	struct device *cpu_dev;
@@ -127,10 +127,7 @@ void _dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask, bool of,
 			continue;
 		}
 
-		if (of)
-			dev_pm_opp_of_remove_table(cpu_dev);
-		else
-			dev_pm_opp_remove_table(cpu_dev);
+		_dev_pm_opp_find_and_remove_table(cpu_dev);
 	}
 }
 
@@ -144,7 +141,7 @@ void _dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask, bool of,
  */
 void dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask)
 {
-	_dev_pm_opp_cpumask_remove_table(cpumask, false, -1);
+	_dev_pm_opp_cpumask_remove_table(cpumask, -1);
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_cpumask_remove_table);
 
diff --git a/drivers/opp/of.c b/drivers/opp/of.c
index aaa4bab..861cc75 100644
--- a/drivers/opp/of.c
+++ b/drivers/opp/of.c
@@ -279,7 +279,7 @@ free_microvolt:
  */
 void dev_pm_opp_of_remove_table(struct device *dev)
 {
-	_dev_pm_opp_find_and_remove_table(dev, false);
+	_dev_pm_opp_find_and_remove_table(dev);
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_of_remove_table);
 
@@ -432,7 +432,7 @@ initialize_static_opps:
 		if (ret) {
 			dev_err(dev, "%s: Failed to add OPP, %d\n", __func__,
 				ret);
-			_dev_pm_opp_remove_table(opp_table, dev, false);
+			_dev_pm_opp_remove_table(opp_table, dev);
 			of_node_put(np);
 			goto put_opp_table;
 		}
@@ -453,7 +453,7 @@ initialize_static_opps:
 		dev_err(dev, "Not all nodes have performance state set (%d: %d)\n",
 			count, pstate_count);
 		ret = -ENOENT;
-		_dev_pm_opp_remove_table(opp_table, dev, false);
+		_dev_pm_opp_remove_table(opp_table, dev);
 		goto put_opp_table;
 	}
 
@@ -507,7 +507,7 @@ static int _of_add_opp_table_v1(struct device *dev)
 		if (ret) {
 			dev_err(dev, "%s: Failed to add OPP %ld (%d)\n",
 				__func__, freq, ret);
-			_dev_pm_opp_remove_table(opp_table, dev, false);
+			_dev_pm_opp_remove_table(opp_table, dev);
 			break;
 		}
 		nr -= 2;
@@ -618,7 +618,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_of_add_table_indexed);
  */
 void dev_pm_opp_of_cpumask_remove_table(const struct cpumask *cpumask)
 {
-	_dev_pm_opp_cpumask_remove_table(cpumask, true, -1);
+	_dev_pm_opp_cpumask_remove_table(cpumask, -1);
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_of_cpumask_remove_table);
 
@@ -653,7 +653,7 @@ int dev_pm_opp_of_cpumask_add_table(const struct cpumask *cpumask)
 				 __func__, cpu, ret);
 
 			/* Free all other OPPs */
-			_dev_pm_opp_cpumask_remove_table(cpumask, true, cpu);
+			_dev_pm_opp_cpumask_remove_table(cpumask, cpu);
 			break;
 		}
 	}
diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h
index 9274116..98dd7d3 100644
--- a/drivers/opp/opp.h
+++ b/drivers/opp/opp.h
@@ -194,13 +194,13 @@ void _get_opp_table_kref(struct opp_table *opp_table);
 int _get_opp_count(struct opp_table *opp_table);
 struct opp_table *_find_opp_table(struct device *dev);
 struct opp_device *_add_opp_dev(const struct device *dev, struct opp_table *opp_table);
-void _dev_pm_opp_remove_table(struct opp_table *opp_table, struct device *dev, bool remove_all);
-void _dev_pm_opp_find_and_remove_table(struct device *dev, bool remove_all);
+void _dev_pm_opp_remove_table(struct opp_table *opp_table, struct device *dev);
+void _dev_pm_opp_find_and_remove_table(struct device *dev);
 struct dev_pm_opp *_opp_allocate(struct opp_table *opp_table);
 void _opp_free(struct dev_pm_opp *opp);
 int _opp_add(struct device *dev, struct dev_pm_opp *new_opp, struct opp_table *opp_table, bool rate_not_available);
 int _opp_add_v1(struct opp_table *opp_table, struct device *dev, unsigned long freq, long u_volt, bool dynamic);
-void _dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask, bool of, int last_cpu);
+void _dev_pm_opp_cpumask_remove_table(const struct cpumask *cpumask, int last_cpu);
 struct opp_table *_add_opp_table(struct device *dev);
 void _put_opp_list_kref(struct opp_table *opp_table);
 
-- 
cgit v1.1


From cdd6ed90cdb6c2fd982909501f0a109274147fb4 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 12 Sep 2018 12:35:19 +0530
Subject: OPP: Use a single mechanism to free the OPP table

Currently there are two separate ways to free the OPP table based on how
it is created in the first place.

We call _dev_pm_opp_remove_table() to free the static and/or dynamic
OPP, OPP list devices, etc. This is done for the case where the OPP
table is added while initializing the OPPs, like via the path
dev_pm_opp_of_add_table().

We also call dev_pm_opp_put_opp_table() in some cases which eventually
frees the OPP table structure once the reference count reaches 0. This
is used by the first case as well as other cases like
dev_pm_opp_set_regulators() where the OPPs aren't necessarily
initialized at this point.

This whole thing is a bit unclear and messy and obstruct any further
cleanup/fixup of OPP core.

This patch tries to streamline this by keeping a single path for OPP
table destruction, i.e. dev_pm_opp_put_opp_table().

All the cleanup happens in _opp_table_kref_release() now after the
reference count reaches 0. _dev_pm_opp_remove_table() is removed as it
isn't required anymore.

We don't drop the reference to the OPP table after creating it from
_of_add_opp_table_v{1|2}() anymore and the same is dropped only when we
try to remove them.

Tested-by: Niklas Cassel <niklas.cassel@linaro.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/opp/core.c | 54 ++++++++++++++++--------------------------------------
 drivers/opp/of.c   | 32 ++++++++++++++++++--------------
 drivers/opp/opp.h  |  2 +-
 3 files changed, 35 insertions(+), 53 deletions(-)

diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index 2319ad4..d3e33fd 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -867,23 +867,24 @@ struct opp_table *dev_pm_opp_get_opp_table_indexed(struct device *dev,
 static void _opp_table_kref_release(struct kref *kref)
 {
 	struct opp_table *opp_table = container_of(kref, struct opp_table, kref);
-	struct opp_device *opp_dev;
+	struct opp_device *opp_dev, *temp;
 
 	/* Release clk */
 	if (!IS_ERR(opp_table->clk))
 		clk_put(opp_table->clk);
 
-	/*
-	 * No need to take opp_table->lock here as we are guaranteed that no
-	 * references to the OPP table are taken at this point.
-	 */
-	opp_dev = list_first_entry(&opp_table->dev_list, struct opp_device,
-				   node);
+	WARN_ON(!list_empty(&opp_table->opp_list));
 
-	_remove_opp_dev(opp_dev, opp_table);
+	list_for_each_entry_safe(opp_dev, temp, &opp_table->dev_list, node) {
+		/*
+		 * The OPP table is getting removed, drop the performance state
+		 * constraints.
+		 */
+		if (opp_table->genpd_performance_state)
+			dev_pm_genpd_set_performance_state((struct device *)(opp_dev->dev), 0);
 
-	/* dev_list must be empty now */
-	WARN_ON(!list_empty(&opp_table->dev_list));
+		_remove_opp_dev(opp_dev, opp_table);
+	}
 
 	mutex_destroy(&opp_table->lock);
 	list_del(&opp_table->node);
@@ -1758,33 +1759,6 @@ int dev_pm_opp_unregister_notifier(struct device *dev,
 }
 EXPORT_SYMBOL(dev_pm_opp_unregister_notifier);
 
-/*
- * Free OPPs either created using static entries present in DT.
- */
-void _dev_pm_opp_remove_table(struct opp_table *opp_table, struct device *dev)
-{
-	/* Protect dev_list */
-	mutex_lock(&opp_table->lock);
-
-	/* Find if opp_table manages a single device */
-	if (list_is_singular(&opp_table->dev_list)) {
-		/* Free static OPPs */
-		_put_opp_list_kref(opp_table);
-
-		/*
-		 * The OPP table is getting removed, drop the performance state
-		 * constraints.
-		 */
-		if (opp_table->genpd_performance_state)
-			dev_pm_genpd_set_performance_state(dev, 0);
-	} else {
-		_put_opp_list_kref(opp_table);
-		_remove_opp_dev(_find_opp_dev(dev, opp_table), opp_table);
-	}
-
-	mutex_unlock(&opp_table->lock);
-}
-
 void _dev_pm_opp_find_and_remove_table(struct device *dev)
 {
 	struct opp_table *opp_table;
@@ -1802,8 +1776,12 @@ void _dev_pm_opp_find_and_remove_table(struct device *dev)
 		return;
 	}
 
-	_dev_pm_opp_remove_table(opp_table, dev);
+	_put_opp_list_kref(opp_table);
+
+	/* Drop reference taken by _find_opp_table() */
+	dev_pm_opp_put_opp_table(opp_table);
 
+	/* Drop reference taken while the OPP table was added */
 	dev_pm_opp_put_opp_table(opp_table);
 }
 
diff --git a/drivers/opp/of.c b/drivers/opp/of.c
index 861cc75..ae0436e 100644
--- a/drivers/opp/of.c
+++ b/drivers/opp/of.c
@@ -407,14 +407,17 @@ static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np,
 	opp_table = _managed_opp(opp_np);
 	if (opp_table) {
 		/* OPPs are already managed */
-		if (!_add_opp_dev(dev, opp_table))
+		if (!_add_opp_dev(dev, opp_table)) {
 			ret = -ENOMEM;
-		else if (!opp_table->parsed_static_opps)
-			goto initialize_static_opps;
-		else
+			goto put_opp_table;
+		}
+
+		if (opp_table->parsed_static_opps) {
 			kref_get(&opp_table->list_kref);
+			return 0;
+		}
 
-		goto put_opp_table;
+		goto initialize_static_opps;
 	}
 
 	opp_table = dev_pm_opp_get_opp_table_indexed(dev, index);
@@ -432,17 +435,15 @@ initialize_static_opps:
 		if (ret) {
 			dev_err(dev, "%s: Failed to add OPP, %d\n", __func__,
 				ret);
-			_dev_pm_opp_remove_table(opp_table, dev);
 			of_node_put(np);
-			goto put_opp_table;
+			goto put_list_kref;
 		}
 	}
 
 	/* There should be one of more OPP defined */
 	if (WARN_ON(!count)) {
 		ret = -ENOENT;
-		_put_opp_list_kref(opp_table);
-		goto put_opp_table;
+		goto put_list_kref;
 	}
 
 	list_for_each_entry(opp, &opp_table->opp_list, node)
@@ -453,8 +454,7 @@ initialize_static_opps:
 		dev_err(dev, "Not all nodes have performance state set (%d: %d)\n",
 			count, pstate_count);
 		ret = -ENOENT;
-		_dev_pm_opp_remove_table(opp_table, dev);
-		goto put_opp_table;
+		goto put_list_kref;
 	}
 
 	if (pstate_count)
@@ -462,6 +462,10 @@ initialize_static_opps:
 
 	opp_table->parsed_static_opps = true;
 
+	return 0;
+
+put_list_kref:
+	_put_opp_list_kref(opp_table);
 put_opp_table:
 	dev_pm_opp_put_opp_table(opp_table);
 
@@ -507,13 +511,13 @@ static int _of_add_opp_table_v1(struct device *dev)
 		if (ret) {
 			dev_err(dev, "%s: Failed to add OPP %ld (%d)\n",
 				__func__, freq, ret);
-			_dev_pm_opp_remove_table(opp_table, dev);
-			break;
+			_put_opp_list_kref(opp_table);
+			dev_pm_opp_put_opp_table(opp_table);
+			return ret;
 		}
 		nr -= 2;
 	}
 
-	dev_pm_opp_put_opp_table(opp_table);
 	return ret;
 }
 
diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h
index 98dd7d3..f9fbb755 100644
--- a/drivers/opp/opp.h
+++ b/drivers/opp/opp.h
@@ -190,11 +190,11 @@ struct opp_table {
 
 /* Routines internal to opp core */
 void dev_pm_opp_get(struct dev_pm_opp *opp);
+void _opp_remove_all_static(struct opp_table *opp_table);
 void _get_opp_table_kref(struct opp_table *opp_table);
 int _get_opp_count(struct opp_table *opp_table);
 struct opp_table *_find_opp_table(struct device *dev);
 struct opp_device *_add_opp_dev(const struct device *dev, struct opp_table *opp_table);
-void _dev_pm_opp_remove_table(struct opp_table *opp_table, struct device *dev);
 void _dev_pm_opp_find_and_remove_table(struct device *dev);
 struct dev_pm_opp *_opp_allocate(struct opp_table *opp_table);
 void _opp_free(struct dev_pm_opp *opp);
-- 
cgit v1.1


From 283d55e68d8a0f302057f57dcbd4d2e000c2ac85 Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Fri, 7 Sep 2018 09:01:54 +0530
Subject: OPP: Prevent creating multiple OPP tables for devices sharing OPP
 nodes

When two or more devices are sharing their clock and voltage rails, they
share the same OPP table. But there are some corner cases where the OPP
core incorrectly creates separate OPP tables for them.

For example, CPU 0 and 1 share clock/voltage rails. The platform
specific code calls dev_pm_opp_set_regulators() for CPU0 and the OPP
core creates an OPP table for it (the individual OPPs aren't initialized
as of now). The same is repeated for CPU1 then. Because
_opp_get_opp_table() doesn't compare DT node pointers currently, it
fails to find the link between CPU0 and CPU1 and so creates a new OPP
table.

Fix this by calling _managed_opp() from _opp_get_opp_table().
_managed_opp() gain an additional argument (index) to get the right node
pointer. This resulted in simplifying code in _of_add_opp_table_v2() as
well.

Tested-by: Niklas Cassel <niklas.cassel@linaro.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/opp/core.c | 25 ++++++++++++++++++++++---
 drivers/opp/of.c   | 35 +++++++++++++----------------------
 drivers/opp/opp.h  |  2 ++
 3 files changed, 37 insertions(+), 25 deletions(-)

diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index d3e33fd..cdf918a 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -759,8 +759,8 @@ static void _remove_opp_dev(struct opp_device *opp_dev,
 	kfree(opp_dev);
 }
 
-struct opp_device *_add_opp_dev(const struct device *dev,
-				struct opp_table *opp_table)
+static struct opp_device *_add_opp_dev_unlocked(const struct device *dev,
+						struct opp_table *opp_table)
 {
 	struct opp_device *opp_dev;
 	int ret;
@@ -772,7 +772,6 @@ struct opp_device *_add_opp_dev(const struct device *dev,
 	/* Initialize opp-dev */
 	opp_dev->dev = dev;
 
-	mutex_lock(&opp_table->lock);
 	list_add(&opp_dev->node, &opp_table->dev_list);
 
 	/* Create debugfs entries for the opp_table */
@@ -780,6 +779,17 @@ struct opp_device *_add_opp_dev(const struct device *dev,
 	if (ret)
 		dev_err(dev, "%s: Failed to register opp debugfs (%d)\n",
 			__func__, ret);
+
+	return opp_dev;
+}
+
+struct opp_device *_add_opp_dev(const struct device *dev,
+				struct opp_table *opp_table)
+{
+	struct opp_device *opp_dev;
+
+	mutex_lock(&opp_table->lock);
+	opp_dev = _add_opp_dev_unlocked(dev, opp_table);
 	mutex_unlock(&opp_table->lock);
 
 	return opp_dev;
@@ -844,6 +854,15 @@ static struct opp_table *_opp_get_opp_table(struct device *dev, int index)
 	if (!IS_ERR(opp_table))
 		goto unlock;
 
+	opp_table = _managed_opp(dev, index);
+	if (opp_table) {
+		if (!_add_opp_dev_unlocked(dev, opp_table)) {
+			dev_pm_opp_put_opp_table(opp_table);
+			opp_table = NULL;
+		}
+		goto unlock;
+	}
+
 	opp_table = _allocate_opp_table(dev, index);
 
 unlock:
diff --git a/drivers/opp/of.c b/drivers/opp/of.c
index ae0436e..1ddef52 100644
--- a/drivers/opp/of.c
+++ b/drivers/opp/of.c
@@ -41,11 +41,14 @@ struct device_node *dev_pm_opp_of_get_opp_desc_node(struct device *dev)
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_of_get_opp_desc_node);
 
-static struct opp_table *_managed_opp(const struct device_node *np)
+struct opp_table *_managed_opp(struct device *dev, int index)
 {
 	struct opp_table *opp_table, *managed_table = NULL;
+	struct device_node *np;
 
-	mutex_lock(&opp_table_lock);
+	np = _opp_of_get_opp_desc_node(dev->of_node, index);
+	if (!np)
+		return NULL;
 
 	list_for_each_entry(opp_table, &opp_tables, node) {
 		if (opp_table->np == np) {
@@ -65,7 +68,7 @@ static struct opp_table *_managed_opp(const struct device_node *np)
 		}
 	}
 
-	mutex_unlock(&opp_table_lock);
+	of_node_put(np);
 
 	return managed_table;
 }
@@ -401,30 +404,19 @@ static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np,
 {
 	struct device_node *np;
 	struct opp_table *opp_table;
-	int ret = 0, count = 0, pstate_count = 0;
+	int ret, count = 0, pstate_count = 0;
 	struct dev_pm_opp *opp;
 
-	opp_table = _managed_opp(opp_np);
-	if (opp_table) {
-		/* OPPs are already managed */
-		if (!_add_opp_dev(dev, opp_table)) {
-			ret = -ENOMEM;
-			goto put_opp_table;
-		}
-
-		if (opp_table->parsed_static_opps) {
-			kref_get(&opp_table->list_kref);
-			return 0;
-		}
-
-		goto initialize_static_opps;
-	}
-
 	opp_table = dev_pm_opp_get_opp_table_indexed(dev, index);
 	if (!opp_table)
 		return -ENOMEM;
 
-initialize_static_opps:
+	/* OPP table is already initialized for the device */
+	if (opp_table->parsed_static_opps) {
+		kref_get(&opp_table->list_kref);
+		return 0;
+	}
+
 	kref_init(&opp_table->list_kref);
 
 	/* We have opp-table node now, iterate over it and add OPPs */
@@ -466,7 +458,6 @@ initialize_static_opps:
 
 put_list_kref:
 	_put_opp_list_kref(opp_table);
-put_opp_table:
 	dev_pm_opp_put_opp_table(opp_table);
 
 	return ret;
diff --git a/drivers/opp/opp.h b/drivers/opp/opp.h
index f9fbb755..9c6544b 100644
--- a/drivers/opp/opp.h
+++ b/drivers/opp/opp.h
@@ -206,8 +206,10 @@ void _put_opp_list_kref(struct opp_table *opp_table);
 
 #ifdef CONFIG_OF
 void _of_init_opp_table(struct opp_table *opp_table, struct device *dev, int index);
+struct opp_table *_managed_opp(struct device *dev, int index);
 #else
 static inline void _of_init_opp_table(struct opp_table *opp_table, struct device *dev, int index) {}
+static inline struct opp_table *_managed_opp(struct device *dev, int index) { return NULL; }
 #endif
 
 #ifdef CONFIG_DEBUG_FS
-- 
cgit v1.1


From 5ed4cecd75e90232a19afa502cf477925854561e Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 12 Sep 2018 11:21:17 +0530
Subject: OPP: Pass OPP table to _of_add_opp_table_v{1|2}()

Both _of_add_opp_table_v1() and _of_add_opp_table_v2() contain similar
code to get the OPP table and their parent routine also parses the DT to
find the OPP table's node pointer. This can be simplified by getting the
OPP table in advance and then passing it as argument to these routines.

Tested-by: Niklas Cassel <niklas.cassel@linaro.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/opp/of.c | 68 +++++++++++++++++++++++---------------------------------
 1 file changed, 28 insertions(+), 40 deletions(-)

diff --git a/drivers/opp/of.c b/drivers/opp/of.c
index 1ddef52..a71ff3a 100644
--- a/drivers/opp/of.c
+++ b/drivers/opp/of.c
@@ -399,18 +399,12 @@ free_opp:
 }
 
 /* Initializes OPP tables based on new bindings */
-static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np,
-				int index)
+static int _of_add_opp_table_v2(struct device *dev, struct opp_table *opp_table)
 {
 	struct device_node *np;
-	struct opp_table *opp_table;
 	int ret, count = 0, pstate_count = 0;
 	struct dev_pm_opp *opp;
 
-	opp_table = dev_pm_opp_get_opp_table_indexed(dev, index);
-	if (!opp_table)
-		return -ENOMEM;
-
 	/* OPP table is already initialized for the device */
 	if (opp_table->parsed_static_opps) {
 		kref_get(&opp_table->list_kref);
@@ -420,7 +414,7 @@ static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np,
 	kref_init(&opp_table->list_kref);
 
 	/* We have opp-table node now, iterate over it and add OPPs */
-	for_each_available_child_of_node(opp_np, np) {
+	for_each_available_child_of_node(opp_table->np, np) {
 		count++;
 
 		ret = _opp_add_static_v2(opp_table, dev, np);
@@ -458,15 +452,13 @@ static int _of_add_opp_table_v2(struct device *dev, struct device_node *opp_np,
 
 put_list_kref:
 	_put_opp_list_kref(opp_table);
-	dev_pm_opp_put_opp_table(opp_table);
 
 	return ret;
 }
 
 /* Initializes OPP tables based on old-deprecated bindings */
-static int _of_add_opp_table_v1(struct device *dev)
+static int _of_add_opp_table_v1(struct device *dev, struct opp_table *opp_table)
 {
-	struct opp_table *opp_table;
 	const struct property *prop;
 	const __be32 *val;
 	int nr, ret = 0;
@@ -487,10 +479,6 @@ static int _of_add_opp_table_v1(struct device *dev)
 		return -EINVAL;
 	}
 
-	opp_table = dev_pm_opp_get_opp_table(dev);
-	if (!opp_table)
-		return -ENOMEM;
-
 	kref_init(&opp_table->list_kref);
 
 	val = prop->value;
@@ -503,7 +491,6 @@ static int _of_add_opp_table_v1(struct device *dev)
 			dev_err(dev, "%s: Failed to add OPP %ld (%d)\n",
 				__func__, freq, ret);
 			_put_opp_list_kref(opp_table);
-			dev_pm_opp_put_opp_table(opp_table);
 			return ret;
 		}
 		nr -= 2;
@@ -531,24 +518,24 @@ static int _of_add_opp_table_v1(struct device *dev)
  */
 int dev_pm_opp_of_add_table(struct device *dev)
 {
-	struct device_node *opp_np;
+	struct opp_table *opp_table;
 	int ret;
 
+	opp_table = dev_pm_opp_get_opp_table_indexed(dev, 0);
+	if (!opp_table)
+		return -ENOMEM;
+
 	/*
-	 * OPPs have two version of bindings now. The older one is deprecated,
-	 * try for the new binding first.
+	 * OPPs have two version of bindings now. Also try the old (v1)
+	 * bindings for backward compatibility with older dtbs.
 	 */
-	opp_np = dev_pm_opp_of_get_opp_desc_node(dev);
-	if (!opp_np) {
-		/*
-		 * Try old-deprecated bindings for backward compatibility with
-		 * older dtbs.
-		 */
-		return _of_add_opp_table_v1(dev);
-	}
+	if (opp_table->np)
+		ret = _of_add_opp_table_v2(dev, opp_table);
+	else
+		ret = _of_add_opp_table_v1(dev, opp_table);
 
-	ret = _of_add_opp_table_v2(dev, opp_np, 0);
-	of_node_put(opp_np);
+	if (ret)
+		dev_pm_opp_put_opp_table(opp_table);
 
 	return ret;
 }
@@ -575,28 +562,29 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_of_add_table);
  */
 int dev_pm_opp_of_add_table_indexed(struct device *dev, int index)
 {
-	struct device_node *opp_np;
+	struct opp_table *opp_table;
 	int ret, count;
 
-again:
-	opp_np = _opp_of_get_opp_desc_node(dev->of_node, index);
-	if (!opp_np) {
+	if (index) {
 		/*
 		 * If only one phandle is present, then the same OPP table
 		 * applies for all index requests.
 		 */
 		count = of_count_phandle_with_args(dev->of_node,
 						   "operating-points-v2", NULL);
-		if (count == 1 && index) {
-			index = 0;
-			goto again;
-		}
+		if (count != 1)
+			return -ENODEV;
 
-		return -ENODEV;
+		index = 0;
 	}
 
-	ret = _of_add_opp_table_v2(dev, opp_np, index);
-	of_node_put(opp_np);
+	opp_table = dev_pm_opp_get_opp_table_indexed(dev, index);
+	if (!opp_table)
+		return -ENOMEM;
+
+	ret = _of_add_opp_table_v2(dev, opp_table);
+	if (ret)
+		dev_pm_opp_put_opp_table(opp_table);
 
 	return ret;
 }
-- 
cgit v1.1


From 23c7b54ca1cd1797ef39169ab85e6d46f1c2d061 Mon Sep 17 00:00:00 2001
From: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Date: Wed, 4 Jul 2018 10:45:50 +0200
Subject: PM / devfreq: Fix devfreq_add_device() when drivers are built as
 modules.

When the devfreq driver and the governor driver are built as modules,
the call to devfreq_add_device() or governor_store() fails because the
governor driver is not loaded at the time the devfreq driver loads. The
devfreq driver has a build dependency on the governor but also should
have a runtime dependency. We need to make sure that the governor driver
is loaded before the devfreq driver.

This patch fixes this bug by adding a try_then_request_governor()
function. First tries to find the governor, and then, if it is not found,
it requests the module and tries again.

Fixes: 1b5c1be2c88e (PM / devfreq: map devfreq drivers to governor using name)
Signed-off-by: Enric Balletbo i Serra <enric.balletbo@collabora.com>
Reviewed-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 drivers/devfreq/devfreq.c | 53 +++++++++++++++++++++++++++++++++++++++++++----
 1 file changed, 49 insertions(+), 4 deletions(-)

diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 4c49bb1..62ead44 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -11,6 +11,7 @@
  */
 
 #include <linux/kernel.h>
+#include <linux/kmod.h>
 #include <linux/sched.h>
 #include <linux/errno.h>
 #include <linux/err.h>
@@ -221,6 +222,49 @@ static struct devfreq_governor *find_devfreq_governor(const char *name)
 	return ERR_PTR(-ENODEV);
 }
 
+/**
+ * try_then_request_governor() - Try to find the governor and request the
+ *                               module if is not found.
+ * @name:	name of the governor
+ *
+ * Search the list of devfreq governors and request the module and try again
+ * if is not found. This can happen when both drivers (the governor driver
+ * and the driver that call devfreq_add_device) are built as modules.
+ * devfreq_list_lock should be held by the caller. Returns the matched
+ * governor's pointer.
+ */
+static struct devfreq_governor *try_then_request_governor(const char *name)
+{
+	struct devfreq_governor *governor;
+	int err = 0;
+
+	if (IS_ERR_OR_NULL(name)) {
+		pr_err("DEVFREQ: %s: Invalid parameters\n", __func__);
+		return ERR_PTR(-EINVAL);
+	}
+	WARN(!mutex_is_locked(&devfreq_list_lock),
+	     "devfreq_list_lock must be locked.");
+
+	governor = find_devfreq_governor(name);
+	if (IS_ERR(governor)) {
+		mutex_unlock(&devfreq_list_lock);
+
+		if (!strncmp(name, DEVFREQ_GOV_SIMPLE_ONDEMAND,
+			     DEVFREQ_NAME_LEN))
+			err = request_module("governor_%s", "simpleondemand");
+		else
+			err = request_module("governor_%s", name);
+		/* Restore previous state before return */
+		mutex_lock(&devfreq_list_lock);
+		if (err)
+			return NULL;
+
+		governor = find_devfreq_governor(name);
+	}
+
+	return governor;
+}
+
 static int devfreq_notify_transition(struct devfreq *devfreq,
 		struct devfreq_freqs *freqs, unsigned int state)
 {
@@ -646,9 +690,8 @@ struct devfreq *devfreq_add_device(struct device *dev,
 	mutex_unlock(&devfreq->lock);
 
 	mutex_lock(&devfreq_list_lock);
-	list_add(&devfreq->node, &devfreq_list);
 
-	governor = find_devfreq_governor(devfreq->governor_name);
+	governor = try_then_request_governor(devfreq->governor_name);
 	if (IS_ERR(governor)) {
 		dev_err(dev, "%s: Unable to find governor for the device\n",
 			__func__);
@@ -664,12 +707,14 @@ struct devfreq *devfreq_add_device(struct device *dev,
 			__func__);
 		goto err_init;
 	}
+
+	list_add(&devfreq->node, &devfreq_list);
+
 	mutex_unlock(&devfreq_list_lock);
 
 	return devfreq;
 
 err_init:
-	list_del(&devfreq->node);
 	mutex_unlock(&devfreq_list_lock);
 
 	device_unregister(&devfreq->dev);
@@ -991,7 +1036,7 @@ static ssize_t governor_store(struct device *dev, struct device_attribute *attr,
 		return -EINVAL;
 
 	mutex_lock(&devfreq_list_lock);
-	governor = find_devfreq_governor(str_governor);
+	governor = try_then_request_governor(str_governor);
 	if (IS_ERR(governor)) {
 		ret = PTR_ERR(governor);
 		goto out;
-- 
cgit v1.1


From d0e464205b8a1fa21357aad0bbf136500d7e688d Mon Sep 17 00:00:00 2001
From: Bjorn Andersson <bjorn.andersson@linaro.org>
Date: Tue, 24 Apr 2018 12:46:39 -0700
Subject: PM / devfreq: Drop custom MIN/MAX macros

Drop the custom MIN/MAX macros in favour of the standard min/max from
kernel.h

Signed-off-by: Bjorn Andersson <bjorn.andersson@linaro.org>
Reviewed-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 drivers/devfreq/devfreq.c | 11 ++++-------
 1 file changed, 4 insertions(+), 7 deletions(-)

diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 62ead44..329c5e3 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -29,9 +29,6 @@
 #include <linux/of.h>
 #include "governor.h"
 
-#define MAX(a,b)	((a > b) ? a : b)
-#define MIN(a,b)	((a < b) ? a : b)
-
 static struct class *devfreq_class;
 
 /*
@@ -324,8 +321,8 @@ int update_devfreq(struct devfreq *devfreq)
 	 * max_freq
 	 * min_freq
 	 */
-	max_freq = MIN(devfreq->scaling_max_freq, devfreq->max_freq);
-	min_freq = MAX(devfreq->scaling_min_freq, devfreq->min_freq);
+	max_freq = min(devfreq->scaling_max_freq, devfreq->max_freq);
+	min_freq = max(devfreq->scaling_min_freq, devfreq->min_freq);
 
 	if (min_freq && freq < min_freq) {
 		freq = min_freq;
@@ -1197,7 +1194,7 @@ static ssize_t min_freq_show(struct device *dev, struct device_attribute *attr,
 {
 	struct devfreq *df = to_devfreq(dev);
 
-	return sprintf(buf, "%lu\n", MAX(df->scaling_min_freq, df->min_freq));
+	return sprintf(buf, "%lu\n", max(df->scaling_min_freq, df->min_freq));
 }
 
 static ssize_t max_freq_store(struct device *dev, struct device_attribute *attr,
@@ -1233,7 +1230,7 @@ static ssize_t max_freq_show(struct device *dev, struct device_attribute *attr,
 {
 	struct devfreq *df = to_devfreq(dev);
 
-	return sprintf(buf, "%lu\n", MIN(df->scaling_max_freq, df->max_freq));
+	return sprintf(buf, "%lu\n", min(df->scaling_max_freq, df->max_freq));
 }
 static DEVICE_ATTR_RW(max_freq);
 
-- 
cgit v1.1


From df5cf4a36178c5d4f2b8b9469cb2f722e64cd102 Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <mka@chromium.org>
Date: Fri, 3 Aug 2018 13:05:09 -0700
Subject: PM / devfreq: Fix handling of min/max_freq == 0

Commit ab8f58ad72c4 ("PM / devfreq: Set min/max_freq when adding the
devfreq device") initializes df->min/max_freq with the min/max OPP when
the device is added. Later commit f1d981eaecf8 ("PM / devfreq: Use the
available min/max frequency") adds df->scaling_min/max_freq and the
following to the frequency adjustment code:

  max_freq = MIN(devfreq->scaling_max_freq, devfreq->max_freq);

With the current handling of min/max_freq this is incorrect:

Even though df->max_freq is now initialized to a value != 0 user space
can still set it to 0, in this case max_freq would be 0 instead of
df->scaling_max_freq as intended. In consequence the frequency adjustment
is not performed:

  if (max_freq && freq > max_freq) {
	freq = max_freq;

To fix this set df->min/max freq to the min/max OPP in max/max_freq_store,
when the user passes a value of 0. This also prevents df->max_freq from
being set below the min OPP when df->min_freq is 0, and similar for
min_freq. Since it is now guaranteed that df->min/max_freq can't be 0 the
checks for this case can be removed.

Fixes: f1d981eaecf8 ("PM / devfreq: Use the available min/max frequency")
Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
Reviewed-by: Brian Norris <briannorris@chromium.org>
Reviewed-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 drivers/devfreq/devfreq.c | 42 ++++++++++++++++++++++++++++++------------
 1 file changed, 30 insertions(+), 12 deletions(-)

diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 329c5e3..b5e7af6 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -324,11 +324,11 @@ int update_devfreq(struct devfreq *devfreq)
 	max_freq = min(devfreq->scaling_max_freq, devfreq->max_freq);
 	min_freq = max(devfreq->scaling_min_freq, devfreq->min_freq);
 
-	if (min_freq && freq < min_freq) {
+	if (freq < min_freq) {
 		freq = min_freq;
 		flags &= ~DEVFREQ_FLAG_LEAST_UPPER_BOUND; /* Use GLB */
 	}
-	if (max_freq && freq > max_freq) {
+	if (freq > max_freq) {
 		freq = max_freq;
 		flags |= DEVFREQ_FLAG_LEAST_UPPER_BOUND; /* Use LUB */
 	}
@@ -1168,17 +1168,26 @@ static ssize_t min_freq_store(struct device *dev, struct device_attribute *attr,
 	struct devfreq *df = to_devfreq(dev);
 	unsigned long value;
 	int ret;
-	unsigned long max;
 
 	ret = sscanf(buf, "%lu", &value);
 	if (ret != 1)
 		return -EINVAL;
 
 	mutex_lock(&df->lock);
-	max = df->max_freq;
-	if (value && max && value > max) {
-		ret = -EINVAL;
-		goto unlock;
+
+	if (value) {
+		if (value > df->max_freq) {
+			ret = -EINVAL;
+			goto unlock;
+		}
+	} else {
+		unsigned long *freq_table = df->profile->freq_table;
+
+		/* Get minimum frequency according to sorting order */
+		if (freq_table[0] < freq_table[df->profile->max_state - 1])
+			value = freq_table[0];
+		else
+			value = freq_table[df->profile->max_state - 1];
 	}
 
 	df->min_freq = value;
@@ -1203,17 +1212,26 @@ static ssize_t max_freq_store(struct device *dev, struct device_attribute *attr,
 	struct devfreq *df = to_devfreq(dev);
 	unsigned long value;
 	int ret;
-	unsigned long min;
 
 	ret = sscanf(buf, "%lu", &value);
 	if (ret != 1)
 		return -EINVAL;
 
 	mutex_lock(&df->lock);
-	min = df->min_freq;
-	if (value && min && value < min) {
-		ret = -EINVAL;
-		goto unlock;
+
+	if (value) {
+		if (value < df->min_freq) {
+			ret = -EINVAL;
+			goto unlock;
+		}
+	} else {
+		unsigned long *freq_table = df->profile->freq_table;
+
+		/* Get maximum frequency according to sorting order */
+		if (freq_table[0] < freq_table[df->profile->max_state - 1])
+			value = freq_table[df->profile->max_state - 1];
+		else
+			value = freq_table[0];
 	}
 
 	df->max_freq = value;
-- 
cgit v1.1


From 6ff66e2a008337b8a005fd0ae2037bed716262cc Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <mka@chromium.org>
Date: Fri, 3 Aug 2018 13:05:10 -0700
Subject: PM / devfreq: Don't adjust to user limits in governors

Several governors use the user space limits df->min/max_freq to adjust
the target frequency. This is not necessary, since update_devfreq()
already takes care of this. Instead the governor can request the available
min/max frequency by setting the target frequency to DEVFREQ_MIN/MAX_FREQ
and let update_devfreq() take care of any adjustments.

Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
Reviewed-by: Brian Norris <briannorris@chromium.org>
Reviewed-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 drivers/devfreq/governor.h                |  3 +++
 drivers/devfreq/governor_performance.c    |  5 +----
 drivers/devfreq/governor_powersave.c      |  2 +-
 drivers/devfreq/governor_simpleondemand.c | 12 +++---------
 drivers/devfreq/governor_userspace.c      | 16 ++++------------
 5 files changed, 12 insertions(+), 26 deletions(-)

diff --git a/drivers/devfreq/governor.h b/drivers/devfreq/governor.h
index cfc50a6..b817002 100644
--- a/drivers/devfreq/governor.h
+++ b/drivers/devfreq/governor.h
@@ -25,6 +25,9 @@
 #define DEVFREQ_GOV_SUSPEND			0x4
 #define DEVFREQ_GOV_RESUME			0x5
 
+#define DEVFREQ_MIN_FREQ			0
+#define DEVFREQ_MAX_FREQ			ULONG_MAX
+
 /**
  * struct devfreq_governor - Devfreq policy governor
  * @node:		list node - contains registered devfreq governors
diff --git a/drivers/devfreq/governor_performance.c b/drivers/devfreq/governor_performance.c
index 4d23ecf..ded429f 100644
--- a/drivers/devfreq/governor_performance.c
+++ b/drivers/devfreq/governor_performance.c
@@ -20,10 +20,7 @@ static int devfreq_performance_func(struct devfreq *df,
 	 * target callback should be able to get floor value as
 	 * said in devfreq.h
 	 */
-	if (!df->max_freq)
-		*freq = UINT_MAX;
-	else
-		*freq = df->max_freq;
+	*freq = DEVFREQ_MAX_FREQ;
 	return 0;
 }
 
diff --git a/drivers/devfreq/governor_powersave.c b/drivers/devfreq/governor_powersave.c
index 0c42f23..9e8897f 100644
--- a/drivers/devfreq/governor_powersave.c
+++ b/drivers/devfreq/governor_powersave.c
@@ -20,7 +20,7 @@ static int devfreq_powersave_func(struct devfreq *df,
 	 * target callback should be able to get ceiling value as
 	 * said in devfreq.h
 	 */
-	*freq = df->min_freq;
+	*freq = DEVFREQ_MIN_FREQ;
 	return 0;
 }
 
diff --git a/drivers/devfreq/governor_simpleondemand.c b/drivers/devfreq/governor_simpleondemand.c
index 28e0f2d..c0417f0 100644
--- a/drivers/devfreq/governor_simpleondemand.c
+++ b/drivers/devfreq/governor_simpleondemand.c
@@ -27,7 +27,6 @@ static int devfreq_simple_ondemand_func(struct devfreq *df,
 	unsigned int dfso_upthreshold = DFSO_UPTHRESHOLD;
 	unsigned int dfso_downdifferential = DFSO_DOWNDIFFERENCTIAL;
 	struct devfreq_simple_ondemand_data *data = df->data;
-	unsigned long max = (df->max_freq) ? df->max_freq : UINT_MAX;
 
 	err = devfreq_update_stats(df);
 	if (err)
@@ -47,7 +46,7 @@ static int devfreq_simple_ondemand_func(struct devfreq *df,
 
 	/* Assume MAX if it is going to be divided by zero */
 	if (stat->total_time == 0) {
-		*freq = max;
+		*freq = DEVFREQ_MAX_FREQ;
 		return 0;
 	}
 
@@ -60,13 +59,13 @@ static int devfreq_simple_ondemand_func(struct devfreq *df,
 	/* Set MAX if it's busy enough */
 	if (stat->busy_time * 100 >
 	    stat->total_time * dfso_upthreshold) {
-		*freq = max;
+		*freq = DEVFREQ_MAX_FREQ;
 		return 0;
 	}
 
 	/* Set MAX if we do not know the initial frequency */
 	if (stat->current_frequency == 0) {
-		*freq = max;
+		*freq = DEVFREQ_MAX_FREQ;
 		return 0;
 	}
 
@@ -85,11 +84,6 @@ static int devfreq_simple_ondemand_func(struct devfreq *df,
 	b = div_u64(b, (dfso_upthreshold - dfso_downdifferential / 2));
 	*freq = (unsigned long) b;
 
-	if (df->min_freq && *freq < df->min_freq)
-		*freq = df->min_freq;
-	if (df->max_freq && *freq > df->max_freq)
-		*freq = df->max_freq;
-
 	return 0;
 }
 
diff --git a/drivers/devfreq/governor_userspace.c b/drivers/devfreq/governor_userspace.c
index 080607c..378d84c 100644
--- a/drivers/devfreq/governor_userspace.c
+++ b/drivers/devfreq/governor_userspace.c
@@ -26,19 +26,11 @@ static int devfreq_userspace_func(struct devfreq *df, unsigned long *freq)
 {
 	struct userspace_data *data = df->data;
 
-	if (data->valid) {
-		unsigned long adjusted_freq = data->user_frequency;
-
-		if (df->max_freq && adjusted_freq > df->max_freq)
-			adjusted_freq = df->max_freq;
-
-		if (df->min_freq && adjusted_freq < df->min_freq)
-			adjusted_freq = df->min_freq;
-
-		*freq = adjusted_freq;
-	} else {
+	if (data->valid)
+		*freq = data->user_frequency;
+	else
 		*freq = df->previous_freq; /* No user freq specified yet */
-	}
+
 	return 0;
 }
 
-- 
cgit v1.1


From b596d895fa2957e136a6b398b97b06bd42b51291 Mon Sep 17 00:00:00 2001
From: Matthias Kaehlcke <mka@chromium.org>
Date: Fri, 3 Aug 2018 13:05:11 -0700
Subject: PM / devfreq: Make update_devfreq() public

Currently update_devfreq() is only visible to devfreq governors outside
of devfreq.c. Make it public to allow drivers that adjust devfreq policies
to cause a re-evaluation of the frequency after a policy change.

Signed-off-by: Matthias Kaehlcke <mka@chromium.org>
Reviewed-by: Brian Norris <briannorris@chromium.org>
Reviewed-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 drivers/devfreq/governor.h | 3 ---
 include/linux/devfreq.h    | 8 ++++++++
 2 files changed, 8 insertions(+), 3 deletions(-)

diff --git a/drivers/devfreq/governor.h b/drivers/devfreq/governor.h
index b817002..f53339c 100644
--- a/drivers/devfreq/governor.h
+++ b/drivers/devfreq/governor.h
@@ -57,9 +57,6 @@ struct devfreq_governor {
 				unsigned int event, void *data);
 };
 
-/* Caution: devfreq->lock must be locked before calling update_devfreq */
-extern int update_devfreq(struct devfreq *devfreq);
-
 extern void devfreq_monitor_start(struct devfreq *devfreq);
 extern void devfreq_monitor_stop(struct devfreq *devfreq);
 extern void devfreq_monitor_suspend(struct devfreq *devfreq);
diff --git a/include/linux/devfreq.h b/include/linux/devfreq.h
index 3aae5b3..e4963b0 100644
--- a/include/linux/devfreq.h
+++ b/include/linux/devfreq.h
@@ -198,6 +198,14 @@ extern void devm_devfreq_remove_device(struct device *dev,
 extern int devfreq_suspend_device(struct devfreq *devfreq);
 extern int devfreq_resume_device(struct devfreq *devfreq);
 
+/**
+ * update_devfreq() - Reevaluate the device and configure frequency
+ * @devfreq:	the devfreq device
+ *
+ * Note: devfreq->lock must be held
+ */
+extern int update_devfreq(struct devfreq *devfreq);
+
 /* Helper functions for devfreq user device driver with OPP. */
 extern struct dev_pm_opp *devfreq_recommended_opp(struct device *dev,
 					   unsigned long *freq, u32 flags);
-- 
cgit v1.1


From f037eb8c1f476bc903d99695c1eb9f99ccb46b27 Mon Sep 17 00:00:00 2001
From: Rob Herring <robh@kernel.org>
Date: Mon, 27 Aug 2018 20:52:16 -0500
Subject: PM / devfreq: Convert to using %pOFn instead of device_node.name

In preparation to remove the node name pointer from struct device_node,
convert printf users to use the %pOFn format specifier.

Signed-off-by: Rob Herring <robh@kernel.org>
Acked-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 drivers/devfreq/event/exynos-ppmu.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/devfreq/event/exynos-ppmu.c b/drivers/devfreq/event/exynos-ppmu.c
index a9c64f0..c61de0b 100644
--- a/drivers/devfreq/event/exynos-ppmu.c
+++ b/drivers/devfreq/event/exynos-ppmu.c
@@ -535,8 +535,8 @@ static int of_get_devfreq_events(struct device_node *np,
 
 		if (i == ARRAY_SIZE(ppmu_events)) {
 			dev_warn(dev,
-				"don't know how to configure events : %s\n",
-				node->name);
+				"don't know how to configure events : %pOFn\n",
+				node);
 			continue;
 		}
 
-- 
cgit v1.1


From 2f061fd0c2d852e32e03a903fccd810663c5c31e Mon Sep 17 00:00:00 2001
From: Vincent Donnefort <vincent.donnefort@arm.com>
Date: Mon, 3 Sep 2018 09:02:07 +0900
Subject: PM / devfreq: stopping the governor before device_unregister()

device_release() is freeing the resources before calling the device
specific release callback which is, in the case of devfreq, stopping
the governor.

It is a problem as some governors are using the device resources. e.g.
simpleondemand which is using the devfreq deferrable monitoring work. If it
is not stopped before the resources are freed, it might lead to a use after
free.

Signed-off-by: Vincent Donnefort <vincent.donnefort@arm.com>
Reviewed-by: John Einar Reitan <john.reitan@arm.com>
[cw00.choi: Fix merge conflict]
Reviewed-by: Chanwoo Choi <cw00.choi@samsung.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 drivers/devfreq/devfreq.c | 9 ++++-----
 1 file changed, 4 insertions(+), 5 deletions(-)

diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index b5e7af6..1868423 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -575,10 +575,6 @@ static void devfreq_dev_release(struct device *dev)
 	list_del(&devfreq->node);
 	mutex_unlock(&devfreq_list_lock);
 
-	if (devfreq->governor)
-		devfreq->governor->event_handler(devfreq,
-						 DEVFREQ_GOV_STOP, NULL);
-
 	if (devfreq->profile->exit)
 		devfreq->profile->exit(devfreq->dev.parent);
 
@@ -714,7 +710,7 @@ struct devfreq *devfreq_add_device(struct device *dev,
 err_init:
 	mutex_unlock(&devfreq_list_lock);
 
-	device_unregister(&devfreq->dev);
+	devfreq_remove_device(devfreq);
 	devfreq = NULL;
 err_dev:
 	if (devfreq)
@@ -735,6 +731,9 @@ int devfreq_remove_device(struct devfreq *devfreq)
 	if (!devfreq)
 		return -EINVAL;
 
+	if (devfreq->governor)
+		devfreq->governor->event_handler(devfreq,
+						 DEVFREQ_GOV_STOP, NULL);
 	device_unregister(&devfreq->dev);
 
 	return 0;
-- 
cgit v1.1


From 8188b154f95014dae4d19892fefb202c8df3f885 Mon Sep 17 00:00:00 2001
From: zhong jiang <zhongjiang@huawei.com>
Date: Fri, 21 Sep 2018 21:18:43 +0800
Subject: PM / devfreq: remove redundant null pointer check before kfree

kfree has taken the null pointer into account. hence it is safe
to remove the redundant null pointer check before kfree.

Signed-off-by: zhong jiang <zhongjiang@huawei.com>
Signed-off-by: MyungJoo Ham <myungjoo.ham@samsung.com>
---
 drivers/devfreq/devfreq.c | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/drivers/devfreq/devfreq.c b/drivers/devfreq/devfreq.c
index 1868423..1414130 100644
--- a/drivers/devfreq/devfreq.c
+++ b/drivers/devfreq/devfreq.c
@@ -713,8 +713,7 @@ err_init:
 	devfreq_remove_device(devfreq);
 	devfreq = NULL;
 err_dev:
-	if (devfreq)
-		kfree(devfreq);
+	kfree(devfreq);
 err_out:
 	return ERR_PTR(err);
 }
-- 
cgit v1.1


From cc55f7537db6af371e9c1c6a71161ee40f918824 Mon Sep 17 00:00:00 2001
From: Zhimin Gu <kookoo.gu@intel.com>
Date: Fri, 21 Sep 2018 14:26:24 +0800
Subject: x86, hibernate: Fix nosave_regions setup for hibernation

On 32bit systems, nosave_regions(non RAM areas) located between
max_low_pfn and max_pfn are not excluded from hibernation snapshot
currently, which may result in a machine check exception when
trying to access these unsafe regions during hibernation:

[  612.800453] Disabling lock debugging due to kernel taint
[  612.805786] mce: [Hardware Error]: CPU 0: Machine Check Exception: 5 Bank 6: fe00000000801136
[  612.814344] mce: [Hardware Error]: RIP !INEXACT! 60:<00000000d90be566> {swsusp_save+0x436/0x560}
[  612.823167] mce: [Hardware Error]: TSC 1f5939fe276 ADDR dd000000 MISC 30e0000086
[  612.830677] mce: [Hardware Error]: PROCESSOR 0:306c3 TIME 1529487426 SOCKET 0 APIC 0 microcode 24
[  612.839581] mce: [Hardware Error]: Run the above through 'mcelog --ascii'
[  612.846394] mce: [Hardware Error]: Machine check: Processor context corrupt
[  612.853380] Kernel panic - not syncing: Fatal machine check
[  612.858978] Kernel Offset: 0x18000000 from 0xc1000000 (relocation range: 0xc0000000-0xf7ffdfff)

This is because on 32bit systems, pages above max_low_pfn are regarded
as high memeory, and accessing unsafe pages might cause expected MCE.
On the problematic 32bit system, there are reserved memory above low
memory, which triggered the MCE:

e820 memory mapping:
[    0.000000] BIOS-e820: [mem 0x0000000000000000-0x000000000009d7ff] usable
[    0.000000] BIOS-e820: [mem 0x000000000009d800-0x000000000009ffff] reserved
[    0.000000] BIOS-e820: [mem 0x00000000000e0000-0x00000000000fffff] reserved
[    0.000000] BIOS-e820: [mem 0x0000000000100000-0x00000000d160cfff] usable
[    0.000000] BIOS-e820: [mem 0x00000000d160d000-0x00000000d1613fff] ACPI NVS
[    0.000000] BIOS-e820: [mem 0x00000000d1614000-0x00000000d1a44fff] usable
[    0.000000] BIOS-e820: [mem 0x00000000d1a45000-0x00000000d1ecffff] reserved
[    0.000000] BIOS-e820: [mem 0x00000000d1ed0000-0x00000000d7eeafff] usable
[    0.000000] BIOS-e820: [mem 0x00000000d7eeb000-0x00000000d7ffffff] reserved
[    0.000000] BIOS-e820: [mem 0x00000000d8000000-0x00000000d875ffff] usable
[    0.000000] BIOS-e820: [mem 0x00000000d8760000-0x00000000d87fffff] reserved
[    0.000000] BIOS-e820: [mem 0x00000000d8800000-0x00000000d8fadfff] usable
[    0.000000] BIOS-e820: [mem 0x00000000d8fae000-0x00000000d8ffffff] ACPI data
[    0.000000] BIOS-e820: [mem 0x00000000d9000000-0x00000000da71bfff] usable
[    0.000000] BIOS-e820: [mem 0x00000000da71c000-0x00000000da7fffff] ACPI NVS
[    0.000000] BIOS-e820: [mem 0x00000000da800000-0x00000000dbb8bfff] usable
[    0.000000] BIOS-e820: [mem 0x00000000dbb8c000-0x00000000dbffffff] reserved
[    0.000000] BIOS-e820: [mem 0x00000000dd000000-0x00000000df1fffff] reserved
[    0.000000] BIOS-e820: [mem 0x00000000f8000000-0x00000000fbffffff] reserved
[    0.000000] BIOS-e820: [mem 0x00000000fec00000-0x00000000fec00fff] reserved
[    0.000000] BIOS-e820: [mem 0x00000000fed00000-0x00000000fed03fff] reserved
[    0.000000] BIOS-e820: [mem 0x00000000fed1c000-0x00000000fed1ffff] reserved
[    0.000000] BIOS-e820: [mem 0x00000000fee00000-0x00000000fee00fff] reserved
[    0.000000] BIOS-e820: [mem 0x00000000ff000000-0x00000000ffffffff] reserved
[    0.000000] BIOS-e820: [mem 0x0000000100000000-0x000000041edfffff] usable

Fix this problem by changing pfn limit from max_low_pfn to max_pfn.
This fix does not impact 64bit system because on 64bit max_low_pfn
is the same as max_pfn.

Signed-off-by: Zhimin Gu <kookoo.gu@intel.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Cc: All applicable <stable@vger.kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/x86/kernel/setup.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/kernel/setup.c b/arch/x86/kernel/setup.c
index b4866ba..90ecc10 100644
--- a/arch/x86/kernel/setup.c
+++ b/arch/x86/kernel/setup.c
@@ -1251,7 +1251,7 @@ void __init setup_arch(char **cmdline_p)
 	x86_init.hyper.guest_late_init();
 
 	e820__reserve_resources();
-	e820__register_nosave_regions(max_low_pfn);
+	e820__register_nosave_regions(max_pfn);
 
 	x86_init.resources.reserve_resources();
 
-- 
cgit v1.1


From 749fa17093ff67b31dea864531a3698b6a95c26c Mon Sep 17 00:00:00 2001
From: Chen Yu <yu.c.chen@intel.com>
Date: Fri, 21 Sep 2018 14:26:38 +0800
Subject: PM / hibernate: Check the success of generating md5 digest before
 hibernation

Currently if get_e820_md5() fails, then it will hibernate nevertheless.
Actually the error code should be propagated to upper caller so that
the hibernation could be aware of the result and terminates the process
if md5 digest fails.

Suggested-by: Thomas Gleixner <tglx@linutronix.de>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/x86/power/hibernate_64.c | 11 +++++------
 1 file changed, 5 insertions(+), 6 deletions(-)

diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index f8e3b66..e0de959 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -265,9 +265,9 @@ free_tfm:
 	return ret;
 }
 
-static void hibernation_e820_save(void *buf)
+static int hibernation_e820_save(void *buf)
 {
-	get_e820_md5(e820_table_firmware, buf);
+	return get_e820_md5(e820_table_firmware, buf);
 }
 
 static bool hibernation_e820_mismatch(void *buf)
@@ -287,8 +287,9 @@ static bool hibernation_e820_mismatch(void *buf)
 	return memcmp(result, buf, MD5_DIGEST_SIZE) ? true : false;
 }
 #else
-static void hibernation_e820_save(void *buf)
+static int hibernation_e820_save(void *buf)
 {
+	return 0;
 }
 
 static bool hibernation_e820_mismatch(void *buf)
@@ -333,9 +334,7 @@ int arch_hibernation_header_save(void *addr, unsigned int max_size)
 
 	rdr->magic = RESTORE_MAGIC;
 
-	hibernation_e820_save(rdr->e820_digest);
-
-	return 0;
+	return hibernation_e820_save(rdr->e820_digest);
 }
 
 /**
-- 
cgit v1.1


From 8e5b2a3c5a773f161b57eee7156a63089edd2c5c Mon Sep 17 00:00:00 2001
From: Zhimin Gu <kookoo.gu@intel.com>
Date: Fri, 21 Sep 2018 14:26:47 +0800
Subject: x86-32/asm/power: Create stack frames in hibernate_asm_32.S

swsusp_arch_suspend() is callable non-leaf function which doesn't
honor CONFIG_FRAME_POINTER, which can result in bad stack traces.
Also it's not annotated as ELF callable function which can confuse tooling.

Create a stack frame for it when CONFIG_FRAME_POINTER is enabled and
give it proper ELF function annotation.

Also in this patch introduces the restore_registers() symbol and
gives it ELF function annotation, thus to prepare for later register
restore.

Analogous changes were made for 64bit before in commit ef0f3ed5a4ac
(x86/asm/power: Create stack frames in hibernate_asm_64.S) and
commit 4ce827b4cc58 (x86/power/64: Fix hibernation return address
corruption).

Signed-off-by: Zhimin Gu <kookoo.gu@intel.com>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/x86/include/asm/suspend_32.h | 4 ++++
 arch/x86/power/hibernate_asm_32.S | 9 +++++++++
 2 files changed, 13 insertions(+)

diff --git a/arch/x86/include/asm/suspend_32.h b/arch/x86/include/asm/suspend_32.h
index 8be6afb..fdbd9d7 100644
--- a/arch/x86/include/asm/suspend_32.h
+++ b/arch/x86/include/asm/suspend_32.h
@@ -32,4 +32,8 @@ struct saved_context {
 	unsigned long return_address;
 } __attribute__((packed));
 
+/* routines for saving/restoring kernel state */
+extern char core_restore_code[];
+extern char restore_registers[];
+
 #endif /* _ASM_X86_SUSPEND_32_H */
diff --git a/arch/x86/power/hibernate_asm_32.S b/arch/x86/power/hibernate_asm_32.S
index 6e56815..671d38d 100644
--- a/arch/x86/power/hibernate_asm_32.S
+++ b/arch/x86/power/hibernate_asm_32.S
@@ -12,6 +12,7 @@
 #include <asm/page_types.h>
 #include <asm/asm-offsets.h>
 #include <asm/processor-flags.h>
+#include <asm/frame.h>
 
 .text
 
@@ -24,8 +25,11 @@ ENTRY(swsusp_arch_suspend)
 	pushfl
 	popl saved_context_eflags
 
+	FRAME_BEGIN
 	call swsusp_save
+	FRAME_END
 	ret
+ENDPROC(swsusp_arch_suspend)
 
 ENTRY(restore_image)
 	movl	mmu_cr4_features, %ecx
@@ -58,6 +62,10 @@ copy_loop:
 	.p2align 4,,7
 
 done:
+
+	/* code below belongs to the image kernel */
+	.align PAGE_SIZE
+ENTRY(restore_registers)
 	/* go back to the original page tables */
 	movl	$swapper_pg_dir, %eax
 	subl	$__PAGE_OFFSET, %eax
@@ -83,3 +91,4 @@ done:
 	xorl	%eax, %eax
 
 	ret
+ENDPROC(restore_registers)
-- 
cgit v1.1


From 25862a049e6f04cc982f4bed25ed3e6f0a0a5a61 Mon Sep 17 00:00:00 2001
From: Zhimin Gu <kookoo.gu@intel.com>
Date: Fri, 21 Sep 2018 14:26:58 +0800
Subject: x86, hibernate: Extract the common code of 64/32 bit system

Reduce the hibernation code duplication between x86-32 and x86-64
by extracting the common code into hibernate.c.

Currently only pfn_is_nosave() is the activated common
function in hibernate.c

No functional change.

Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Zhimin Gu <kookoo.gu@intel.com>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/x86/include/asm/suspend.h |   8 ++
 arch/x86/power/Makefile        |   2 +-
 arch/x86/power/hibernate.c     | 246 +++++++++++++++++++++++++++++++++++++++++
 arch/x86/power/hibernate_32.c  |  15 +--
 arch/x86/power/hibernate_64.c  | 221 ------------------------------------
 5 files changed, 256 insertions(+), 236 deletions(-)
 create mode 100644 arch/x86/power/hibernate.c

diff --git a/arch/x86/include/asm/suspend.h b/arch/x86/include/asm/suspend.h
index ecffe81..40b0255 100644
--- a/arch/x86/include/asm/suspend.h
+++ b/arch/x86/include/asm/suspend.h
@@ -4,3 +4,11 @@
 #else
 # include <asm/suspend_64.h>
 #endif
+extern unsigned long restore_jump_address __visible;
+extern unsigned long jump_address_phys;
+extern unsigned long restore_cr3 __visible;
+extern unsigned long temp_level4_pgt __visible;
+extern unsigned long relocated_restore_code __visible;
+extern int relocate_restore_code(void);
+/* Defined in hibernate_asm_32/64.S */
+extern asmlinkage __visible int restore_image(void);
diff --git a/arch/x86/power/Makefile b/arch/x86/power/Makefile
index a470138..37923d7 100644
--- a/arch/x86/power/Makefile
+++ b/arch/x86/power/Makefile
@@ -7,4 +7,4 @@ nostackp := $(call cc-option, -fno-stack-protector)
 CFLAGS_cpu.o	:= $(nostackp)
 
 obj-$(CONFIG_PM_SLEEP)		+= cpu.o
-obj-$(CONFIG_HIBERNATION)	+= hibernate_$(BITS).o hibernate_asm_$(BITS).o
+obj-$(CONFIG_HIBERNATION)	+= hibernate_$(BITS).o hibernate_asm_$(BITS).o hibernate.o
diff --git a/arch/x86/power/hibernate.c b/arch/x86/power/hibernate.c
new file mode 100644
index 0000000..f63793b
--- /dev/null
+++ b/arch/x86/power/hibernate.c
@@ -0,0 +1,246 @@
+// SPDX-License-Identifier: GPL-2.0
+/*
+ * Hibernation support for x86
+ *
+ * Copyright (c) 2007 Rafael J. Wysocki <rjw@sisk.pl>
+ * Copyright (c) 2002 Pavel Machek <pavel@ucw.cz>
+ * Copyright (c) 2001 Patrick Mochel <mochel@osdl.org>
+ */
+#include <linux/gfp.h>
+#include <linux/smp.h>
+#include <linux/suspend.h>
+#include <linux/scatterlist.h>
+#include <linux/kdebug.h>
+
+#include <crypto/hash.h>
+
+#include <asm/e820/api.h>
+#include <asm/init.h>
+#include <asm/proto.h>
+#include <asm/page.h>
+#include <asm/pgtable.h>
+#include <asm/mtrr.h>
+#include <asm/sections.h>
+#include <asm/suspend.h>
+#include <asm/tlbflush.h>
+
+/*
+ * Address to jump to in the last phase of restore in order to get to the image
+ * kernel's text (this value is passed in the image header).
+ */
+unsigned long restore_jump_address __visible;
+unsigned long jump_address_phys;
+
+/*
+ * Value of the cr3 register from before the hibernation (this value is passed
+ * in the image header).
+ */
+unsigned long restore_cr3 __visible;
+unsigned long temp_level4_pgt __visible;
+unsigned long relocated_restore_code __visible;
+
+/**
+ *	pfn_is_nosave - check if given pfn is in the 'nosave' section
+ */
+int pfn_is_nosave(unsigned long pfn)
+{
+	unsigned long nosave_begin_pfn;
+	unsigned long nosave_end_pfn;
+
+	nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT;
+	nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT;
+
+	return pfn >= nosave_begin_pfn && pfn < nosave_end_pfn;
+}
+
+#ifdef CONFIG_X86_64
+
+#define MD5_DIGEST_SIZE 16
+
+struct restore_data_record {
+	unsigned long jump_address;
+	unsigned long jump_address_phys;
+	unsigned long cr3;
+	unsigned long magic;
+	u8 e820_digest[MD5_DIGEST_SIZE];
+};
+
+#if IS_BUILTIN(CONFIG_CRYPTO_MD5)
+/**
+ * get_e820_md5 - calculate md5 according to given e820 table
+ *
+ * @table: the e820 table to be calculated
+ * @buf: the md5 result to be stored to
+ */
+static int get_e820_md5(struct e820_table *table, void *buf)
+{
+	struct crypto_shash *tfm;
+	struct shash_desc *desc;
+	int size;
+	int ret = 0;
+
+	tfm = crypto_alloc_shash("md5", 0, 0);
+	if (IS_ERR(tfm))
+		return -ENOMEM;
+
+	desc = kmalloc(sizeof(struct shash_desc) + crypto_shash_descsize(tfm),
+		       GFP_KERNEL);
+	if (!desc) {
+		ret = -ENOMEM;
+		goto free_tfm;
+	}
+
+	desc->tfm = tfm;
+	desc->flags = 0;
+
+	size = offsetof(struct e820_table, entries) +
+		sizeof(struct e820_entry) * table->nr_entries;
+
+	if (crypto_shash_digest(desc, (u8 *)table, size, buf))
+		ret = -EINVAL;
+
+	kzfree(desc);
+
+free_tfm:
+	crypto_free_shash(tfm);
+	return ret;
+}
+
+static int hibernation_e820_save(void *buf)
+{
+	return get_e820_md5(e820_table_firmware, buf);
+}
+
+static bool hibernation_e820_mismatch(void *buf)
+{
+	int ret;
+	u8 result[MD5_DIGEST_SIZE];
+
+	memset(result, 0, MD5_DIGEST_SIZE);
+	/* If there is no digest in suspend kernel, let it go. */
+	if (!memcmp(result, buf, MD5_DIGEST_SIZE))
+		return false;
+
+	ret = get_e820_md5(e820_table_firmware, result);
+	if (ret)
+		return true;
+
+	return memcmp(result, buf, MD5_DIGEST_SIZE) ? true : false;
+}
+#else
+static int hibernation_e820_save(void *buf)
+{
+	return 0;
+}
+
+static bool hibernation_e820_mismatch(void *buf)
+{
+	/* If md5 is not builtin for restore kernel, let it go. */
+	return false;
+}
+#endif
+
+#define RESTORE_MAGIC	0x23456789ABCDEF01UL
+
+/**
+ *	arch_hibernation_header_save - populate the architecture specific part
+ *		of a hibernation image header
+ *	@addr: address to save the data at
+ */
+int arch_hibernation_header_save(void *addr, unsigned int max_size)
+{
+	struct restore_data_record *rdr = addr;
+
+	if (max_size < sizeof(struct restore_data_record))
+		return -EOVERFLOW;
+	rdr->magic = RESTORE_MAGIC;
+	rdr->jump_address = (unsigned long)restore_registers;
+	rdr->jump_address_phys = __pa_symbol(restore_registers);
+
+	/*
+	 * The restore code fixes up CR3 and CR4 in the following sequence:
+	 *
+	 * [in hibernation asm]
+	 * 1. CR3 <= temporary page tables
+	 * 2. CR4 <= mmu_cr4_features (from the kernel that restores us)
+	 * 3. CR3 <= rdr->cr3
+	 * 4. CR4 <= mmu_cr4_features (from us, i.e. the image kernel)
+	 * [in restore_processor_state()]
+	 * 5. CR4 <= saved CR4
+	 * 6. CR3 <= saved CR3
+	 *
+	 * Our mmu_cr4_features has CR4.PCIDE=0, and toggling
+	 * CR4.PCIDE while CR3's PCID bits are nonzero is illegal, so
+	 * rdr->cr3 needs to point to valid page tables but must not
+	 * have any of the PCID bits set.
+	 */
+	rdr->cr3 = restore_cr3 & ~CR3_PCID_MASK;
+
+	return hibernation_e820_save(rdr->e820_digest);
+}
+
+/**
+ *	arch_hibernation_header_restore - read the architecture specific data
+ *		from the hibernation image header
+ *	@addr: address to read the data from
+ */
+int arch_hibernation_header_restore(void *addr)
+{
+	struct restore_data_record *rdr = addr;
+
+	if (rdr->magic != RESTORE_MAGIC) {
+		pr_crit("Unrecognized hibernate image header format!\n");
+		return -EINVAL;
+	}
+
+	restore_jump_address = rdr->jump_address;
+	jump_address_phys = rdr->jump_address_phys;
+	restore_cr3 = rdr->cr3;
+
+	if (hibernation_e820_mismatch(rdr->e820_digest)) {
+		pr_crit("Hibernate inconsistent memory map detected!\n");
+		return -ENODEV;
+	}
+
+	return 0;
+}
+
+int relocate_restore_code(void)
+{
+	pgd_t *pgd;
+	p4d_t *p4d;
+	pud_t *pud;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	relocated_restore_code = get_safe_page(GFP_ATOMIC);
+	if (!relocated_restore_code)
+		return -ENOMEM;
+
+	memcpy((void *)relocated_restore_code, core_restore_code, PAGE_SIZE);
+
+	/* Make the page containing the relocated code executable */
+	pgd = (pgd_t *)__va(read_cr3_pa()) +
+		pgd_index(relocated_restore_code);
+	p4d = p4d_offset(pgd, relocated_restore_code);
+	if (p4d_large(*p4d)) {
+		set_p4d(p4d, __p4d(p4d_val(*p4d) & ~_PAGE_NX));
+		goto out;
+	}
+	pud = pud_offset(p4d, relocated_restore_code);
+	if (pud_large(*pud)) {
+		set_pud(pud, __pud(pud_val(*pud) & ~_PAGE_NX));
+		goto out;
+	}
+	pmd = pmd_offset(pud, relocated_restore_code);
+	if (pmd_large(*pmd)) {
+		set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_NX));
+		goto out;
+	}
+	pte = pte_offset_kernel(pmd, relocated_restore_code);
+	set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_NX));
+out:
+	__flush_tlb_all();
+	return 0;
+}
+#endif
diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c
index afc4ed7..f82fbd2 100644
--- a/arch/x86/power/hibernate_32.c
+++ b/arch/x86/power/hibernate_32.c
@@ -14,9 +14,7 @@
 #include <asm/pgtable.h>
 #include <asm/mmzone.h>
 #include <asm/sections.h>
-
-/* Defined in hibernate_asm_32.S */
-extern int restore_image(void);
+#include <asm/suspend.h>
 
 /* Pointer to the temporary resume page tables */
 pgd_t *resume_pg_dir;
@@ -162,14 +160,3 @@ asmlinkage int swsusp_arch_resume(void)
 	restore_image();
 	return 0;
 }
-
-/*
- *	pfn_is_nosave - check if given pfn is in the 'nosave' section
- */
-
-int pfn_is_nosave(unsigned long pfn)
-{
-	unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT;
-	unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT;
-	return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
-}
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index e0de959..8bc2eb0 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -26,26 +26,6 @@
 #include <asm/suspend.h>
 #include <asm/tlbflush.h>
 
-/* Defined in hibernate_asm_64.S */
-extern asmlinkage __visible int restore_image(void);
-
-/*
- * Address to jump to in the last phase of restore in order to get to the image
- * kernel's text (this value is passed in the image header).
- */
-unsigned long restore_jump_address __visible;
-unsigned long jump_address_phys;
-
-/*
- * Value of the cr3 register from before the hibernation (this value is passed
- * in the image header).
- */
-unsigned long restore_cr3 __visible;
-
-unsigned long temp_level4_pgt __visible;
-
-unsigned long relocated_restore_code __visible;
-
 static int set_up_temporary_text_mapping(pgd_t *pgd)
 {
 	pmd_t *pmd;
@@ -145,45 +125,6 @@ static int set_up_temporary_mappings(void)
 	return 0;
 }
 
-static int relocate_restore_code(void)
-{
-	pgd_t *pgd;
-	p4d_t *p4d;
-	pud_t *pud;
-	pmd_t *pmd;
-	pte_t *pte;
-
-	relocated_restore_code = get_safe_page(GFP_ATOMIC);
-	if (!relocated_restore_code)
-		return -ENOMEM;
-
-	memcpy((void *)relocated_restore_code, core_restore_code, PAGE_SIZE);
-
-	/* Make the page containing the relocated code executable */
-	pgd = (pgd_t *)__va(read_cr3_pa()) +
-		pgd_index(relocated_restore_code);
-	p4d = p4d_offset(pgd, relocated_restore_code);
-	if (p4d_large(*p4d)) {
-		set_p4d(p4d, __p4d(p4d_val(*p4d) & ~_PAGE_NX));
-		goto out;
-	}
-	pud = pud_offset(p4d, relocated_restore_code);
-	if (pud_large(*pud)) {
-		set_pud(pud, __pud(pud_val(*pud) & ~_PAGE_NX));
-		goto out;
-	}
-	pmd = pmd_offset(pud, relocated_restore_code);
-	if (pmd_large(*pmd)) {
-		set_pmd(pmd, __pmd(pmd_val(*pmd) & ~_PAGE_NX));
-		goto out;
-	}
-	pte = pte_offset_kernel(pmd, relocated_restore_code);
-	set_pte(pte, __pte(pte_val(*pte) & ~_PAGE_NX));
-out:
-	__flush_tlb_all();
-	return 0;
-}
-
 asmlinkage int swsusp_arch_resume(void)
 {
 	int error;
@@ -200,165 +141,3 @@ asmlinkage int swsusp_arch_resume(void)
 	restore_image();
 	return 0;
 }
-
-/*
- *	pfn_is_nosave - check if given pfn is in the 'nosave' section
- */
-
-int pfn_is_nosave(unsigned long pfn)
-{
-	unsigned long nosave_begin_pfn = __pa_symbol(&__nosave_begin) >> PAGE_SHIFT;
-	unsigned long nosave_end_pfn = PAGE_ALIGN(__pa_symbol(&__nosave_end)) >> PAGE_SHIFT;
-	return (pfn >= nosave_begin_pfn) && (pfn < nosave_end_pfn);
-}
-
-#define MD5_DIGEST_SIZE 16
-
-struct restore_data_record {
-	unsigned long jump_address;
-	unsigned long jump_address_phys;
-	unsigned long cr3;
-	unsigned long magic;
-	u8 e820_digest[MD5_DIGEST_SIZE];
-};
-
-#define RESTORE_MAGIC	0x23456789ABCDEF01UL
-
-#if IS_BUILTIN(CONFIG_CRYPTO_MD5)
-/**
- * get_e820_md5 - calculate md5 according to given e820 table
- *
- * @table: the e820 table to be calculated
- * @buf: the md5 result to be stored to
- */
-static int get_e820_md5(struct e820_table *table, void *buf)
-{
-	struct crypto_shash *tfm;
-	struct shash_desc *desc;
-	int size;
-	int ret = 0;
-
-	tfm = crypto_alloc_shash("md5", 0, 0);
-	if (IS_ERR(tfm))
-		return -ENOMEM;
-
-	desc = kmalloc(sizeof(struct shash_desc) + crypto_shash_descsize(tfm),
-		       GFP_KERNEL);
-	if (!desc) {
-		ret = -ENOMEM;
-		goto free_tfm;
-	}
-
-	desc->tfm = tfm;
-	desc->flags = 0;
-
-	size = offsetof(struct e820_table, entries) +
-		sizeof(struct e820_entry) * table->nr_entries;
-
-	if (crypto_shash_digest(desc, (u8 *)table, size, buf))
-		ret = -EINVAL;
-
-	kzfree(desc);
-
-free_tfm:
-	crypto_free_shash(tfm);
-	return ret;
-}
-
-static int hibernation_e820_save(void *buf)
-{
-	return get_e820_md5(e820_table_firmware, buf);
-}
-
-static bool hibernation_e820_mismatch(void *buf)
-{
-	int ret;
-	u8 result[MD5_DIGEST_SIZE];
-
-	memset(result, 0, MD5_DIGEST_SIZE);
-	/* If there is no digest in suspend kernel, let it go. */
-	if (!memcmp(result, buf, MD5_DIGEST_SIZE))
-		return false;
-
-	ret = get_e820_md5(e820_table_firmware, result);
-	if (ret)
-		return true;
-
-	return memcmp(result, buf, MD5_DIGEST_SIZE) ? true : false;
-}
-#else
-static int hibernation_e820_save(void *buf)
-{
-	return 0;
-}
-
-static bool hibernation_e820_mismatch(void *buf)
-{
-	/* If md5 is not builtin for restore kernel, let it go. */
-	return false;
-}
-#endif
-
-/**
- *	arch_hibernation_header_save - populate the architecture specific part
- *		of a hibernation image header
- *	@addr: address to save the data at
- */
-int arch_hibernation_header_save(void *addr, unsigned int max_size)
-{
-	struct restore_data_record *rdr = addr;
-
-	if (max_size < sizeof(struct restore_data_record))
-		return -EOVERFLOW;
-	rdr->jump_address = (unsigned long)restore_registers;
-	rdr->jump_address_phys = __pa_symbol(restore_registers);
-
-	/*
-	 * The restore code fixes up CR3 and CR4 in the following sequence:
-	 *
-	 * [in hibernation asm]
-	 * 1. CR3 <= temporary page tables
-	 * 2. CR4 <= mmu_cr4_features (from the kernel that restores us)
-	 * 3. CR3 <= rdr->cr3
-	 * 4. CR4 <= mmu_cr4_features (from us, i.e. the image kernel)
-	 * [in restore_processor_state()]
-	 * 5. CR4 <= saved CR4
-	 * 6. CR3 <= saved CR3
-	 *
-	 * Our mmu_cr4_features has CR4.PCIDE=0, and toggling
-	 * CR4.PCIDE while CR3's PCID bits are nonzero is illegal, so
-	 * rdr->cr3 needs to point to valid page tables but must not
-	 * have any of the PCID bits set.
-	 */
-	rdr->cr3 = restore_cr3 & ~CR3_PCID_MASK;
-
-	rdr->magic = RESTORE_MAGIC;
-
-	return hibernation_e820_save(rdr->e820_digest);
-}
-
-/**
- *	arch_hibernation_header_restore - read the architecture specific data
- *		from the hibernation image header
- *	@addr: address to read the data from
- */
-int arch_hibernation_header_restore(void *addr)
-{
-	struct restore_data_record *rdr = addr;
-
-	restore_jump_address = rdr->jump_address;
-	jump_address_phys = rdr->jump_address_phys;
-	restore_cr3 = rdr->cr3;
-
-	if (rdr->magic != RESTORE_MAGIC) {
-		pr_crit("Unrecognized hibernate image header format!\n");
-		return -EINVAL;
-	}
-
-	if (hibernation_e820_mismatch(rdr->e820_digest)) {
-		pr_crit("Hibernate inconsistent memory map detected!\n");
-		return -ENODEV;
-	}
-
-	return 0;
-}
-- 
cgit v1.1


From 445565303d19c75858004c129835fa418b740829 Mon Sep 17 00:00:00 2001
From: Zhimin Gu <kookoo.gu@intel.com>
Date: Fri, 21 Sep 2018 14:27:29 +0800
Subject: x86-32, hibernate: Enable CONFIG_ARCH_HIBERNATION_HEADER on 32bit
 system

Enable CONFIG_ARCH_HIBERNATION_HEADER for 32bit system so that

1. arch_hibernation_header_save/restore() are invoked across
   hibernation on 32bit system.

2. The checksum handling as well as 'magic' number checking
   for 32bit system are enabled.

Controlled by CONFIG_X86_64 in hibernate.c

Signed-off-by: Zhimin Gu <kookoo.gu@intel.com>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/x86/Kconfig           |  2 +-
 arch/x86/power/hibernate.c | 10 +++++++++-
 2 files changed, 10 insertions(+), 2 deletions(-)

diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig
index 1a0be02..e8de5de 100644
--- a/arch/x86/Kconfig
+++ b/arch/x86/Kconfig
@@ -2422,7 +2422,7 @@ menu "Power management and ACPI options"
 
 config ARCH_HIBERNATION_HEADER
 	def_bool y
-	depends on X86_64 && HIBERNATION
+	depends on HIBERNATION
 
 source "kernel/power/Kconfig"
 
diff --git a/arch/x86/power/hibernate.c b/arch/x86/power/hibernate.c
index f63793b..a04ca13 100644
--- a/arch/x86/power/hibernate.c
+++ b/arch/x86/power/hibernate.c
@@ -53,7 +53,6 @@ int pfn_is_nosave(unsigned long pfn)
 	return pfn >= nosave_begin_pfn && pfn < nosave_end_pfn;
 }
 
-#ifdef CONFIG_X86_64
 
 #define MD5_DIGEST_SIZE 16
 
@@ -140,7 +139,11 @@ static bool hibernation_e820_mismatch(void *buf)
 }
 #endif
 
+#ifdef CONFIG_X86_64
 #define RESTORE_MAGIC	0x23456789ABCDEF01UL
+#else
+#define RESTORE_MAGIC	0x12345678UL
+#endif
 
 /**
  *	arch_hibernation_header_save - populate the architecture specific part
@@ -154,6 +157,7 @@ int arch_hibernation_header_save(void *addr, unsigned int max_size)
 	if (max_size < sizeof(struct restore_data_record))
 		return -EOVERFLOW;
 	rdr->magic = RESTORE_MAGIC;
+#ifdef CONFIG_X86_64
 	rdr->jump_address = (unsigned long)restore_registers;
 	rdr->jump_address_phys = __pa_symbol(restore_registers);
 
@@ -175,6 +179,7 @@ int arch_hibernation_header_save(void *addr, unsigned int max_size)
 	 * have any of the PCID bits set.
 	 */
 	rdr->cr3 = restore_cr3 & ~CR3_PCID_MASK;
+#endif
 
 	return hibernation_e820_save(rdr->e820_digest);
 }
@@ -193,9 +198,11 @@ int arch_hibernation_header_restore(void *addr)
 		return -EINVAL;
 	}
 
+#ifdef CONFIG_X86_64
 	restore_jump_address = rdr->jump_address;
 	jump_address_phys = rdr->jump_address_phys;
 	restore_cr3 = rdr->cr3;
+#endif
 
 	if (hibernation_e820_mismatch(rdr->e820_digest)) {
 		pr_crit("Hibernate inconsistent memory map detected!\n");
@@ -205,6 +212,7 @@ int arch_hibernation_header_restore(void *addr)
 	return 0;
 }
 
+#ifdef CONFIG_X86_64
 int relocate_restore_code(void)
 {
 	pgd_t *pgd;
-- 
cgit v1.1


From 72adf47764a0be66ea0c712c7d5a6b8a81e28449 Mon Sep 17 00:00:00 2001
From: Zhimin Gu <kookoo.gu@intel.com>
Date: Fri, 21 Sep 2018 14:27:40 +0800
Subject: x86, hibernate: Rename temp_level4_pgt to temp_pgt

As 32bit system is not using 4-level page, rename it
to temp_pgt so that it can be reused for both 32bit
and 64bit hibernation.

No functional change.

Signed-off-by: Zhimin Gu <kookoo.gu@intel.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/x86/include/asm/suspend.h    | 2 +-
 arch/x86/power/hibernate.c        | 2 +-
 arch/x86/power/hibernate_64.c     | 2 +-
 arch/x86/power/hibernate_asm_64.S | 2 +-
 4 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/arch/x86/include/asm/suspend.h b/arch/x86/include/asm/suspend.h
index 40b0255..a892494 100644
--- a/arch/x86/include/asm/suspend.h
+++ b/arch/x86/include/asm/suspend.h
@@ -7,7 +7,7 @@
 extern unsigned long restore_jump_address __visible;
 extern unsigned long jump_address_phys;
 extern unsigned long restore_cr3 __visible;
-extern unsigned long temp_level4_pgt __visible;
+extern unsigned long temp_pgt __visible;
 extern unsigned long relocated_restore_code __visible;
 extern int relocate_restore_code(void);
 /* Defined in hibernate_asm_32/64.S */
diff --git a/arch/x86/power/hibernate.c b/arch/x86/power/hibernate.c
index a04ca13..e3409e4 100644
--- a/arch/x86/power/hibernate.c
+++ b/arch/x86/power/hibernate.c
@@ -36,7 +36,7 @@ unsigned long jump_address_phys;
  * in the image header).
  */
 unsigned long restore_cr3 __visible;
-unsigned long temp_level4_pgt __visible;
+unsigned long temp_pgt __visible;
 unsigned long relocated_restore_code __visible;
 
 /**
diff --git a/arch/x86/power/hibernate_64.c b/arch/x86/power/hibernate_64.c
index 8bc2eb0..239f424 100644
--- a/arch/x86/power/hibernate_64.c
+++ b/arch/x86/power/hibernate_64.c
@@ -121,7 +121,7 @@ static int set_up_temporary_mappings(void)
 			return result;
 	}
 
-	temp_level4_pgt = __pa(pgd);
+	temp_pgt = __pa(pgd);
 	return 0;
 }
 
diff --git a/arch/x86/power/hibernate_asm_64.S b/arch/x86/power/hibernate_asm_64.S
index fd369a6..3008baa 100644
--- a/arch/x86/power/hibernate_asm_64.S
+++ b/arch/x86/power/hibernate_asm_64.S
@@ -59,7 +59,7 @@ ENTRY(restore_image)
 	movq	restore_cr3(%rip), %r9
 
 	/* prepare to switch to temporary page tables */
-	movq	temp_level4_pgt(%rip), %rax
+	movq	temp_pgt(%rip), %rax
 	movq	mmu_cr4_features(%rip), %rbx
 
 	/* prepare to copy image data to their original locations */
-- 
cgit v1.1


From 7c0a982750b3f8bc6636e8086ac4d8b7775e90f6 Mon Sep 17 00:00:00 2001
From: Zhimin Gu <kookoo.gu@intel.com>
Date: Fri, 21 Sep 2018 14:27:51 +0800
Subject: x86-32, hibernate: Use temp_pgt as the temporary page table

This is to reuse the temp_pgt for both 32bit and 64bit
system.

No intentional behavior change.

Signed-off-by: Zhimin Gu <kookoo.gu@intel.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/x86/power/hibernate_32.c     | 2 ++
 arch/x86/power/hibernate_asm_32.S | 3 +--
 2 files changed, 3 insertions(+), 2 deletions(-)

diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c
index f82fbd2..a44bdad 100644
--- a/arch/x86/power/hibernate_32.c
+++ b/arch/x86/power/hibernate_32.c
@@ -156,6 +156,8 @@ asmlinkage int swsusp_arch_resume(void)
 	if (error)
 		return error;
 
+	temp_pgt = __pa(resume_pg_dir);
+
 	/* We have got enough memory and from now on we cannot recover */
 	restore_image();
 	return 0;
diff --git a/arch/x86/power/hibernate_asm_32.S b/arch/x86/power/hibernate_asm_32.S
index 671d38d..f0627cf3 100644
--- a/arch/x86/power/hibernate_asm_32.S
+++ b/arch/x86/power/hibernate_asm_32.S
@@ -33,8 +33,7 @@ ENDPROC(swsusp_arch_suspend)
 
 ENTRY(restore_image)
 	movl	mmu_cr4_features, %ecx
-	movl	resume_pg_dir, %eax
-	subl	$__PAGE_OFFSET, %eax
+	movl	temp_pgt, %eax
 	movl	%eax, %cr3
 
 	jecxz	1f	# cr4 Pentium and higher, skip if zero
-- 
cgit v1.1


From 0b0a6b1f76835cbaf746e7c72edd374ec0fe818b Mon Sep 17 00:00:00 2001
From: Zhimin Gu <kookoo.gu@intel.com>
Date: Fri, 21 Sep 2018 14:28:01 +0800
Subject: x86-32, hibernate: Use the page size macro instead of constant value

Convert the hard code into PAGE_SIZE for better scalability.

No functional change.

Signed-off-by: Zhimin Gu <kookoo.gu@intel.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/x86/power/hibernate_asm_32.S | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/arch/x86/power/hibernate_asm_32.S b/arch/x86/power/hibernate_asm_32.S
index f0627cf3..f5103ae 100644
--- a/arch/x86/power/hibernate_asm_32.S
+++ b/arch/x86/power/hibernate_asm_32.S
@@ -52,7 +52,7 @@ copy_loop:
 	movl	pbe_address(%edx), %esi
 	movl	pbe_orig_address(%edx), %edi
 
-	movl	$1024, %ecx
+	movl	$(PAGE_SIZE >> 2), %ecx
 	rep
 	movsl
 
-- 
cgit v1.1


From 32aa276437f6128df63111af13e57fe8f0272af3 Mon Sep 17 00:00:00 2001
From: Zhimin Gu <kookoo.gu@intel.com>
Date: Fri, 21 Sep 2018 14:28:11 +0800
Subject: x86-32, hibernate: Switch to original page table after resumed

After all the pages are restored to previous address, the page
table switches back to current swapper_pg_dir. However the
swapper_pg_dir currently in used might not be consistent with
previous page table, which might cause issue after resume.

Fix this issue by switching to original page table after resume,
and the address of the original page table is saved in the hibernation
image header.

Move the manipulation of restore_cr3 into common code blocks.

Signed-off-by: Zhimin Gu <kookoo.gu@intel.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/x86/power/hibernate.c        |  4 ++--
 arch/x86/power/hibernate_asm_32.S | 10 +++++++---
 2 files changed, 9 insertions(+), 5 deletions(-)

diff --git a/arch/x86/power/hibernate.c b/arch/x86/power/hibernate.c
index e3409e4..4935b81 100644
--- a/arch/x86/power/hibernate.c
+++ b/arch/x86/power/hibernate.c
@@ -160,6 +160,7 @@ int arch_hibernation_header_save(void *addr, unsigned int max_size)
 #ifdef CONFIG_X86_64
 	rdr->jump_address = (unsigned long)restore_registers;
 	rdr->jump_address_phys = __pa_symbol(restore_registers);
+#endif
 
 	/*
 	 * The restore code fixes up CR3 and CR4 in the following sequence:
@@ -179,7 +180,6 @@ int arch_hibernation_header_save(void *addr, unsigned int max_size)
 	 * have any of the PCID bits set.
 	 */
 	rdr->cr3 = restore_cr3 & ~CR3_PCID_MASK;
-#endif
 
 	return hibernation_e820_save(rdr->e820_digest);
 }
@@ -201,8 +201,8 @@ int arch_hibernation_header_restore(void *addr)
 #ifdef CONFIG_X86_64
 	restore_jump_address = rdr->jump_address;
 	jump_address_phys = rdr->jump_address_phys;
-	restore_cr3 = rdr->cr3;
 #endif
+	restore_cr3 = rdr->cr3;
 
 	if (hibernation_e820_mismatch(rdr->e820_digest)) {
 		pr_crit("Hibernate inconsistent memory map detected!\n");
diff --git a/arch/x86/power/hibernate_asm_32.S b/arch/x86/power/hibernate_asm_32.S
index f5103ae..6b2b949 100644
--- a/arch/x86/power/hibernate_asm_32.S
+++ b/arch/x86/power/hibernate_asm_32.S
@@ -25,6 +25,10 @@ ENTRY(swsusp_arch_suspend)
 	pushfl
 	popl saved_context_eflags
 
+	/* save cr3 */
+	movl	%cr3, %eax
+	movl	%eax, restore_cr3
+
 	FRAME_BEGIN
 	call swsusp_save
 	FRAME_END
@@ -32,6 +36,8 @@ ENTRY(swsusp_arch_suspend)
 ENDPROC(swsusp_arch_suspend)
 
 ENTRY(restore_image)
+	movl	restore_cr3, %ebp
+
 	movl	mmu_cr4_features, %ecx
 	movl	temp_pgt, %eax
 	movl	%eax, %cr3
@@ -66,9 +72,7 @@ done:
 	.align PAGE_SIZE
 ENTRY(restore_registers)
 	/* go back to the original page tables */
-	movl	$swapper_pg_dir, %eax
-	subl	$__PAGE_OFFSET, %eax
-	movl	%eax, %cr3
+	movl	%ebp, %cr3
 	movl	mmu_cr4_features, %ecx
 	jecxz	1f	# cr4 Pentium and higher, skip if zero
 	movl	%ecx, %cr4;  # turn PGE back on
-- 
cgit v1.1


From 6bae499a0ad437efb67b7c378e6fb4abef1885a1 Mon Sep 17 00:00:00 2001
From: Zhimin Gu <kookoo.gu@intel.com>
Date: Fri, 21 Sep 2018 14:28:22 +0800
Subject: x86-32, hibernate: Switch to relocated restore code during resume on
 32bit system

On 64bit system, code should be executed in a safe page
during page restoring, as the page where instruction is
running during resume might be scribbled and causes issues.

Although on 32 bit, we only suspend resuming by same kernel
that did the suspend, we'd like to remove that restriction
in the future.

Porting corresponding code from
64bit system: Allocate a safe page, and copy the restore
code to it, then jump to the safe page to run the code.

Signed-off-by: Zhimin Gu <kookoo.gu@intel.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/x86/power/hibernate.c        | 2 --
 arch/x86/power/hibernate_32.c     | 4 ++++
 arch/x86/power/hibernate_asm_32.S | 7 +++++++
 3 files changed, 11 insertions(+), 2 deletions(-)

diff --git a/arch/x86/power/hibernate.c b/arch/x86/power/hibernate.c
index 4935b81..7383cb6 100644
--- a/arch/x86/power/hibernate.c
+++ b/arch/x86/power/hibernate.c
@@ -212,7 +212,6 @@ int arch_hibernation_header_restore(void *addr)
 	return 0;
 }
 
-#ifdef CONFIG_X86_64
 int relocate_restore_code(void)
 {
 	pgd_t *pgd;
@@ -251,4 +250,3 @@ out:
 	__flush_tlb_all();
 	return 0;
 }
-#endif
diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c
index a44bdad..a986109 100644
--- a/arch/x86/power/hibernate_32.c
+++ b/arch/x86/power/hibernate_32.c
@@ -158,6 +158,10 @@ asmlinkage int swsusp_arch_resume(void)
 
 	temp_pgt = __pa(resume_pg_dir);
 
+	error = relocate_restore_code();
+	if (error)
+		return error;
+
 	/* We have got enough memory and from now on we cannot recover */
 	restore_image();
 	return 0;
diff --git a/arch/x86/power/hibernate_asm_32.S b/arch/x86/power/hibernate_asm_32.S
index 6b2b949..e9adda6 100644
--- a/arch/x86/power/hibernate_asm_32.S
+++ b/arch/x86/power/hibernate_asm_32.S
@@ -39,6 +39,13 @@ ENTRY(restore_image)
 	movl	restore_cr3, %ebp
 
 	movl	mmu_cr4_features, %ecx
+
+	/* jump to relocated restore code */
+	movl	relocated_restore_code, %eax
+	jmpl	*%eax
+
+/* code below has been relocated to a safe page */
+ENTRY(core_restore_code)
 	movl	temp_pgt, %eax
 	movl	%eax, %cr3
 
-- 
cgit v1.1


From 5331d2c7efbccab436aa11639d7fa00a1d58abe2 Mon Sep 17 00:00:00 2001
From: Zhimin Gu <kookoo.gu@intel.com>
Date: Fri, 21 Sep 2018 14:28:32 +0800
Subject: x86-32, hibernate: Set up temporary text mapping for 32bit system

Set up the temporary text mapping for the final jump address
so that the system could jump to the right address after all
the pages have been copied back to their original address -
otherwise the final mapping for the jump address is invalid.

Analogous changes were made for 64-bit in commit 65c0554b73c9
(x86/power/64: Fix kernel text mapping corruption during image
restoration).

Signed-off-by: Zhimin Gu <kookoo.gu@intel.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/x86/power/hibernate.c        |  4 ----
 arch/x86/power/hibernate_32.c     | 31 +++++++++++++++++++++++++++++++
 arch/x86/power/hibernate_asm_32.S |  3 +++
 3 files changed, 34 insertions(+), 4 deletions(-)

diff --git a/arch/x86/power/hibernate.c b/arch/x86/power/hibernate.c
index 7383cb6..bcddf09 100644
--- a/arch/x86/power/hibernate.c
+++ b/arch/x86/power/hibernate.c
@@ -157,10 +157,8 @@ int arch_hibernation_header_save(void *addr, unsigned int max_size)
 	if (max_size < sizeof(struct restore_data_record))
 		return -EOVERFLOW;
 	rdr->magic = RESTORE_MAGIC;
-#ifdef CONFIG_X86_64
 	rdr->jump_address = (unsigned long)restore_registers;
 	rdr->jump_address_phys = __pa_symbol(restore_registers);
-#endif
 
 	/*
 	 * The restore code fixes up CR3 and CR4 in the following sequence:
@@ -198,10 +196,8 @@ int arch_hibernation_header_restore(void *addr)
 		return -EINVAL;
 	}
 
-#ifdef CONFIG_X86_64
 	restore_jump_address = rdr->jump_address;
 	jump_address_phys = rdr->jump_address_phys;
-#endif
 	restore_cr3 = rdr->cr3;
 
 	if (hibernation_e820_mismatch(rdr->e820_digest)) {
diff --git a/arch/x86/power/hibernate_32.c b/arch/x86/power/hibernate_32.c
index a986109..15695e3 100644
--- a/arch/x86/power/hibernate_32.c
+++ b/arch/x86/power/hibernate_32.c
@@ -143,6 +143,32 @@ static inline void resume_init_first_level_page_table(pgd_t *pg_dir)
 #endif
 }
 
+static int set_up_temporary_text_mapping(pgd_t *pgd_base)
+{
+	pgd_t *pgd;
+	pmd_t *pmd;
+	pte_t *pte;
+
+	pgd = pgd_base + pgd_index(restore_jump_address);
+
+	pmd = resume_one_md_table_init(pgd);
+	if (!pmd)
+		return -ENOMEM;
+
+	if (boot_cpu_has(X86_FEATURE_PSE)) {
+		set_pmd(pmd + pmd_index(restore_jump_address),
+		__pmd((jump_address_phys & PMD_MASK) | pgprot_val(PAGE_KERNEL_LARGE_EXEC)));
+	} else {
+		pte = resume_one_page_table_init(pmd);
+		if (!pte)
+			return -ENOMEM;
+		set_pte(pte + pte_index(restore_jump_address),
+		__pte((jump_address_phys & PAGE_MASK) | pgprot_val(PAGE_KERNEL_EXEC)));
+	}
+
+	return 0;
+}
+
 asmlinkage int swsusp_arch_resume(void)
 {
 	int error;
@@ -152,6 +178,11 @@ asmlinkage int swsusp_arch_resume(void)
 		return -ENOMEM;
 
 	resume_init_first_level_page_table(resume_pg_dir);
+
+	error = set_up_temporary_text_mapping(resume_pg_dir);
+	if (error)
+		return error;
+
 	error = resume_physical_mapping_init(resume_pg_dir);
 	if (error)
 		return error;
diff --git a/arch/x86/power/hibernate_asm_32.S b/arch/x86/power/hibernate_asm_32.S
index e9adda6..01f653f 100644
--- a/arch/x86/power/hibernate_asm_32.S
+++ b/arch/x86/power/hibernate_asm_32.S
@@ -36,6 +36,8 @@ ENTRY(swsusp_arch_suspend)
 ENDPROC(swsusp_arch_suspend)
 
 ENTRY(restore_image)
+	/* prepare to jump to the image kernel */
+	movl	restore_jump_address, %ebx
 	movl	restore_cr3, %ebp
 
 	movl	mmu_cr4_features, %ecx
@@ -74,6 +76,7 @@ copy_loop:
 	.p2align 4,,7
 
 done:
+	jmpl	*%ebx
 
 	/* code below belongs to the image kernel */
 	.align PAGE_SIZE
-- 
cgit v1.1


From 1fca4ba0b14d79e8a43822f7b0c7288efba4c9fa Mon Sep 17 00:00:00 2001
From: Zhimin Gu <kookoo.gu@intel.com>
Date: Fri, 21 Sep 2018 14:28:41 +0800
Subject: x86-32, hibernate: Adjust in_suspend after resumed on 32bit system

Update the in_suspend variable to reflect the actual hibernation
status. Back-port from 64bit system.

Signed-off-by: Zhimin Gu <kookoo.gu@intel.com>
Acked-by: Pavel Machek <pavel@ucw.cz>
Signed-off-by: Chen Yu <yu.c.chen@intel.com>
Acked-by: Thomas Gleixner <tglx@linutronix.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 arch/x86/power/hibernate_asm_32.S | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/arch/x86/power/hibernate_asm_32.S b/arch/x86/power/hibernate_asm_32.S
index 01f653f..6fe3830 100644
--- a/arch/x86/power/hibernate_asm_32.S
+++ b/arch/x86/power/hibernate_asm_32.S
@@ -103,5 +103,8 @@ ENTRY(restore_registers)
 
 	xorl	%eax, %eax
 
+	/* tell the hibernation core that we've just restored the memory */
+	movl	%eax, in_suspend
+
 	ret
 ENDPROC(restore_registers)
-- 
cgit v1.1


From 03dba278043358bbbf4f029be169e1e73d2fbe2b Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 1 Oct 2018 11:56:21 +0200
Subject: cpuidle: menu: Replace data->predicted_us with local variable

The predicted_us field in struct menu_device is only accessed in
menu_select(), so replace it with a local variable in that function.

With that, stop using expected_interval instead of predicted_us to
store the new predicted idle duration value if it is set to the
selected state's target residency which is quite confusing.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/cpuidle/governors/menu.c | 23 +++++++++++------------
 1 file changed, 11 insertions(+), 12 deletions(-)

diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 021b08d..b6684fd 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -124,7 +124,6 @@ struct menu_device {
 	int             tick_wakeup;
 
 	unsigned int	next_timer_us;
-	unsigned int	predicted_us;
 	unsigned int	bucket;
 	unsigned int	correction_factor[BUCKETS];
 	unsigned int	intervals[INTERVALS];
@@ -290,6 +289,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	int idx;
 	unsigned int interactivity_req;
 	unsigned int expected_interval;
+	unsigned int predicted_us;
 	unsigned long nr_iowaiters, cpu_load;
 	ktime_t delta_next;
 
@@ -315,7 +315,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	 * operands are 32 bits.
 	 * Make sure to round up for half microseconds.
 	 */
-	data->predicted_us = DIV_ROUND_CLOSEST_ULL((uint64_t)data->next_timer_us *
+	predicted_us = DIV_ROUND_CLOSEST_ULL((uint64_t)data->next_timer_us *
 					 data->correction_factor[data->bucket],
 					 RESOLUTION * DECAY);
 
@@ -341,7 +341,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	/*
 	 * Use the lowest expected idle interval to pick the idle state.
 	 */
-	data->predicted_us = min(data->predicted_us, expected_interval);
+	predicted_us = min(predicted_us, expected_interval);
 
 	if (tick_nohz_tick_stopped()) {
 		/*
@@ -352,19 +352,18 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 		 * the known time till the closest timer event for the idle
 		 * state selection.
 		 */
-		if (data->predicted_us < TICK_USEC)
-			data->predicted_us = ktime_to_us(delta_next);
+		if (predicted_us < TICK_USEC)
+			predicted_us = ktime_to_us(delta_next);
 	} else {
 		/*
 		 * Use the performance multiplier and the user-configurable
 		 * latency_req to determine the maximum exit latency.
 		 */
-		interactivity_req = data->predicted_us / performance_multiplier(nr_iowaiters, cpu_load);
+		interactivity_req = predicted_us / performance_multiplier(nr_iowaiters, cpu_load);
 		if (latency_req > interactivity_req)
 			latency_req = interactivity_req;
 	}
 
-	expected_interval = data->predicted_us;
 	/*
 	 * Find the idle state with the lowest power while satisfying
 	 * our constraints.
@@ -378,8 +377,8 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 			continue;
 		if (idx == -1)
 			idx = i; /* first enabled state */
-		if (s->target_residency > data->predicted_us) {
-			if (data->predicted_us < TICK_USEC)
+		if (s->target_residency > predicted_us) {
+			if (predicted_us < TICK_USEC)
 				break;
 
 			if (!tick_nohz_tick_stopped()) {
@@ -389,7 +388,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 				 * tick in that case and let the governor run
 				 * again in the next iteration of the loop.
 				 */
-				expected_interval = drv->states[idx].target_residency;
+				predicted_us = drv->states[idx].target_residency;
 				break;
 			}
 
@@ -412,7 +411,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 			 * expected idle duration so that the tick is retained
 			 * as long as that target residency is low enough.
 			 */
-			expected_interval = drv->states[idx].target_residency;
+			predicted_us = drv->states[idx].target_residency;
 			break;
 		}
 		idx = i;
@@ -426,7 +425,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	 * expected idle duration is shorter than the tick period length.
 	 */
 	if (((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) ||
-	     expected_interval < TICK_USEC) && !tick_nohz_tick_stopped()) {
+	     predicted_us < TICK_USEC) && !tick_nohz_tick_stopped()) {
 		unsigned int delta_next_us = ktime_to_us(delta_next);
 
 		*stop_tick = false;
-- 
cgit v1.1


From 8ff3c22688ff3616ee53c5c54b3ee66a73326175 Mon Sep 17 00:00:00 2001
From: Nathan Chancellor <natechancellor@gmail.com>
Date: Tue, 2 Oct 2018 15:34:57 -0700
Subject: cpufreq / CPPC: Mark acpi_ids as used

Clang warns:

drivers/cpufreq/cppc_cpufreq.c:431:36: warning: variable 'cppc_acpi_ids'
is not needed and will not be emitted [-Wunneeded-internal-declaration]
static const struct acpi_device_id cppc_acpi_ids[] = {
                                   ^
1 warning generated.

Mark the definition as used so that Clang understands we don't want this
warning while not inhibiting Clang's dead code elimination from removing
the unreferenced internal symbol when moving the data it contains to the
globally available symbol via MODULE_DEVICE_TABLE.

$ nm -S drivers/cpufreq/cppc_cpufreq.o | grep acpi | tail -1
0000000000000000 0000000000000040 R __mod_acpi__cppc_acpi_ids_device_table

Suggested-by: Nick Desaulniers <ndesaulniers@google.com>
Reviewed-by: Nick Desaulniers <ndesaulniers@google.com>
Signed-off-by: Nathan Chancellor <natechancellor@gmail.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cppc_cpufreq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpufreq/cppc_cpufreq.c b/drivers/cpufreq/cppc_cpufreq.c
index 30f3021..fd25c21c 100644
--- a/drivers/cpufreq/cppc_cpufreq.c
+++ b/drivers/cpufreq/cppc_cpufreq.c
@@ -428,7 +428,7 @@ MODULE_LICENSE("GPL");
 
 late_initcall(cppc_cpufreq_init);
 
-static const struct acpi_device_id cppc_acpi_ids[] = {
+static const struct acpi_device_id cppc_acpi_ids[] __used = {
 	{ACPI_PROCESSOR_DEVICE_HID, },
 	{}
 };
-- 
cgit v1.1


From 50b6b87c8484da8bb5dcce00f84ec80aee8fc2bd Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 3 Oct 2018 15:12:06 +0530
Subject: OPP: Improve error handling in dev_pm_opp_of_cpumask_add_table()

The error handling wasn't appropriate in
dev_pm_opp_of_cpumask_add_table(). For example it returns 0 on success
and also for the case where cpumask is empty or cpu_device wasn't found
for any of the CPUs.

It should really return error on such cases, so that the callers can be
aware of the outcome.

Fix it.

Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/opp/of.c | 18 ++++++++++++------
 1 file changed, 12 insertions(+), 6 deletions(-)

diff --git a/drivers/opp/of.c b/drivers/opp/of.c
index a71ff3a..67a384c 100644
--- a/drivers/opp/of.c
+++ b/drivers/opp/of.c
@@ -614,16 +614,18 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_of_cpumask_remove_table);
 int dev_pm_opp_of_cpumask_add_table(const struct cpumask *cpumask)
 {
 	struct device *cpu_dev;
-	int cpu, ret = 0;
+	int cpu, ret;
 
-	WARN_ON(cpumask_empty(cpumask));
+	if (WARN_ON(cpumask_empty(cpumask)))
+		return -ENODEV;
 
 	for_each_cpu(cpu, cpumask) {
 		cpu_dev = get_cpu_device(cpu);
 		if (!cpu_dev) {
 			pr_err("%s: failed to get cpu%d device\n", __func__,
 			       cpu);
-			continue;
+			ret = -ENODEV;
+			goto remove_table;
 		}
 
 		ret = dev_pm_opp_of_add_table(cpu_dev);
@@ -635,12 +637,16 @@ int dev_pm_opp_of_cpumask_add_table(const struct cpumask *cpumask)
 			pr_debug("%s: couldn't find opp table for cpu:%d, %d\n",
 				 __func__, cpu, ret);
 
-			/* Free all other OPPs */
-			_dev_pm_opp_cpumask_remove_table(cpumask, cpu);
-			break;
+			goto remove_table;
 		}
 	}
 
+	return 0;
+
+remove_table:
+	/* Free all other OPPs */
+	_dev_pm_opp_cpumask_remove_table(cpumask, cpu);
+
 	return ret;
 }
 EXPORT_SYMBOL_GPL(dev_pm_opp_of_cpumask_add_table);
-- 
cgit v1.1


From 09f662f95306f3e3d47ab6842bc4b0bb868a80ad Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 3 Oct 2018 15:22:03 +0530
Subject: OPP: Return error on error from dev_pm_opp_get_opp_count()

Return error number instead of 0 on failures.

Fixes: a1e8c13600bf ("PM / OPP: "opp-hz" is optional for power domains")
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/opp/core.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/opp/core.c b/drivers/opp/core.c
index cdf918a..2c2df4e 100644
--- a/drivers/opp/core.c
+++ b/drivers/opp/core.c
@@ -318,7 +318,7 @@ int dev_pm_opp_get_opp_count(struct device *dev)
 		count = PTR_ERR(opp_table);
 		dev_dbg(dev, "%s: OPP table not found (%d)\n",
 			__func__, count);
-		return 0;
+		return count;
 	}
 
 	count = _get_opp_count(opp_table);
-- 
cgit v1.1


From 51c99dd2c06b234575661fa1e0a1dea6c3ef566f Mon Sep 17 00:00:00 2001
From: Viresh Kumar <viresh.kumar@linaro.org>
Date: Wed, 3 Oct 2018 15:35:21 +0530
Subject: cpufreq: dt: Try freeing static OPPs only if we have added them

We can not call dev_pm_opp_of_cpumask_remove_table() freely anymore
since the latest OPP core updates as that uses reference counting to
free resources. There are cases where no static OPPs are added (using
DT) for a platform and trying to remove the OPP table may end up
decrementing refcount which is already zero and hence generating
warnings.

Lets track if we were able to add static OPPs or not and then only
remove the table based on that. Some reshuffling of code is also done to
do that.

Reported-by: Niklas Cassel <niklas.cassel@linaro.org>
Tested-by: Niklas Cassel <niklas.cassel@linaro.org>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/cpufreq-dt.c | 34 +++++++++++++++++++---------------
 1 file changed, 19 insertions(+), 15 deletions(-)

diff --git a/drivers/cpufreq/cpufreq-dt.c b/drivers/cpufreq/cpufreq-dt.c
index 0a9ebf0..e58bfcb 100644
--- a/drivers/cpufreq/cpufreq-dt.c
+++ b/drivers/cpufreq/cpufreq-dt.c
@@ -32,6 +32,7 @@ struct private_data {
 	struct device *cpu_dev;
 	struct thermal_cooling_device *cdev;
 	const char *reg_name;
+	bool have_static_opps;
 };
 
 static struct freq_attr *cpufreq_dt_attr[] = {
@@ -204,6 +205,15 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 		}
 	}
 
+	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
+	if (!priv) {
+		ret = -ENOMEM;
+		goto out_put_regulator;
+	}
+
+	priv->reg_name = name;
+	priv->opp_table = opp_table;
+
 	/*
 	 * Initialize OPP tables for all policy->cpus. They will be shared by
 	 * all CPUs which have marked their CPUs shared with OPP bindings.
@@ -214,7 +224,8 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 	 *
 	 * OPPs might be populated at runtime, don't check for error here
 	 */
-	dev_pm_opp_of_cpumask_add_table(policy->cpus);
+	if (!dev_pm_opp_of_cpumask_add_table(policy->cpus))
+		priv->have_static_opps = true;
 
 	/*
 	 * But we need OPP table to function so if it is not there let's
@@ -240,19 +251,10 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 				__func__, ret);
 	}
 
-	priv = kzalloc(sizeof(*priv), GFP_KERNEL);
-	if (!priv) {
-		ret = -ENOMEM;
-		goto out_free_opp;
-	}
-
-	priv->reg_name = name;
-	priv->opp_table = opp_table;
-
 	ret = dev_pm_opp_init_cpufreq_table(cpu_dev, &freq_table);
 	if (ret) {
 		dev_err(cpu_dev, "failed to init cpufreq table: %d\n", ret);
-		goto out_free_priv;
+		goto out_free_opp;
 	}
 
 	priv->cpu_dev = cpu_dev;
@@ -282,10 +284,11 @@ static int cpufreq_init(struct cpufreq_policy *policy)
 
 out_free_cpufreq_table:
 	dev_pm_opp_free_cpufreq_table(cpu_dev, &freq_table);
-out_free_priv:
-	kfree(priv);
 out_free_opp:
-	dev_pm_opp_of_cpumask_remove_table(policy->cpus);
+	if (priv->have_static_opps)
+		dev_pm_opp_of_cpumask_remove_table(policy->cpus);
+	kfree(priv);
+out_put_regulator:
 	if (name)
 		dev_pm_opp_put_regulators(opp_table);
 out_put_clk:
@@ -300,7 +303,8 @@ static int cpufreq_exit(struct cpufreq_policy *policy)
 
 	cpufreq_cooling_unregister(priv->cdev);
 	dev_pm_opp_free_cpufreq_table(priv->cpu_dev, &policy->freq_table);
-	dev_pm_opp_of_cpumask_remove_table(policy->related_cpus);
+	if (priv->have_static_opps)
+		dev_pm_opp_of_cpumask_remove_table(policy->related_cpus);
 	if (priv->reg_name)
 		dev_pm_opp_put_regulators(priv->opp_table);
 
-- 
cgit v1.1


From deac8703da5faafce96be3f91c668dcd5da931a5 Mon Sep 17 00:00:00 2001
From: Dave Gerlach <d-gerlach@ti.com>
Date: Wed, 3 Oct 2018 16:13:15 +0530
Subject: PM / OPP: _of_add_opp_table_v2(): increment count only if OPP is
 added

Currently the _of_add_opp_table_v2 call loops through the OPP nodes in
the operating-points-v2 table in the device tree and calls
_opp_add_static_v2 for each to add them to the table. It counts each
iteration through this loop as an added OPP, however there are cases
where _opp_add_static_v2() returns 0 but no new OPP is added to the
list.

This can happen while adding duplicate OPP or if the OPP isn't supported
by hardware.

Because of this the count variable will contain the number of OPP nodes
in the table in device tree but not necessarily the ones that are
actually added.

As this count value is what is checked to determine if there are any
valid OPPs, if a platform has an operating-points-v2 table with all OPP
nodes containing opp-supported-hw values that are not currently
supported, then _of_add_opp_table_v2 will fail to abort as it should due
to an empty table.

Additionally, since commit 3ba98324e81a ("PM / OPP: Get
performance state using genpd helper"), the same count variable is
compared against the number of OPPs containing performance states and
requires that either all or none have pstates set, however in the case
of any opp table that has any entries that do not get added by
_opp_add_static_v2 due to incompatible opp-supported-hw fields, these
numbers will not match and _of_add_opp_table_v2 will incorrectly fail.

We need to clearly identify all the three cases (success, failure,
unsupported/duplicate OPPs) and then increment count only on success
case. Change return type of _opp_add_static_v2() to return the pointer
to the newly added OPP instead of an integer. This routine now returns a
valid pointer if the OPP is really added, NULL for unsupported or
duplicate OPPs, and error value cased as a pointer on errors.

Ideally the fixes tag in this commit should point back to the commit
that introduced OPP v2 initially, as that's where we started incorrectly
accounting for duplicate OPPs:

commit 274659029c9d ("PM / OPP: Add support to parse "operating-points-v2" bindings")

But it wasn't a real problem until recently as the count was only used
to check if any OPPs are added or not. And so this commit points to a
rather recent commit where we added more code that depends on the value
of "count".

Fixes: 3ba98324e81a ("PM / OPP: Get performance state using genpd helper")
Reported-by: Dave Gerlach <d-gerlach@ti.com>
Reported-by: Niklas Cassel <niklas.cassel@linaro.org>
Tested-by: Niklas Cassel <niklas.cassel@linaro.org>
Signed-off-by: Dave Gerlach <d-gerlach@ti.com>
Signed-off-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/opp/of.c | 33 ++++++++++++++++++++-------------
 1 file changed, 20 insertions(+), 13 deletions(-)

diff --git a/drivers/opp/of.c b/drivers/opp/of.c
index 67a384c..5a4b479 100644
--- a/drivers/opp/of.c
+++ b/drivers/opp/of.c
@@ -297,15 +297,21 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_of_remove_table);
  * removed by dev_pm_opp_remove.
  *
  * Return:
- * 0		On success OR
+ * Valid OPP pointer:
+ *		On success
+ * NULL:
  *		Duplicate OPPs (both freq and volt are same) and opp->available
- * -EEXIST	Freq are same and volt are different OR
+ *		OR if the OPP is not supported by hardware.
+ * ERR_PTR(-EEXIST):
+ *		Freq are same and volt are different OR
  *		Duplicate OPPs (both freq and volt are same) and !opp->available
- * -ENOMEM	Memory allocation failure
- * -EINVAL	Failed parsing the OPP node
+ * ERR_PTR(-ENOMEM):
+ *		Memory allocation failure
+ * ERR_PTR(-EINVAL):
+ *		Failed parsing the OPP node
  */
-static int _opp_add_static_v2(struct opp_table *opp_table, struct device *dev,
-			      struct device_node *np)
+static struct dev_pm_opp *_opp_add_static_v2(struct opp_table *opp_table,
+		struct device *dev, struct device_node *np)
 {
 	struct dev_pm_opp *new_opp;
 	u64 rate = 0;
@@ -315,7 +321,7 @@ static int _opp_add_static_v2(struct opp_table *opp_table, struct device *dev,
 
 	new_opp = _opp_allocate(opp_table);
 	if (!new_opp)
-		return -ENOMEM;
+		return ERR_PTR(-ENOMEM);
 
 	ret = of_property_read_u64(np, "opp-hz", &rate);
 	if (ret < 0) {
@@ -390,12 +396,12 @@ static int _opp_add_static_v2(struct opp_table *opp_table, struct device *dev,
 	 * frequency/voltage list.
 	 */
 	blocking_notifier_call_chain(&opp_table->head, OPP_EVENT_ADD, new_opp);
-	return 0;
+	return new_opp;
 
 free_opp:
 	_opp_free(new_opp);
 
-	return ret;
+	return ERR_PTR(ret);
 }
 
 /* Initializes OPP tables based on new bindings */
@@ -415,14 +421,15 @@ static int _of_add_opp_table_v2(struct device *dev, struct opp_table *opp_table)
 
 	/* We have opp-table node now, iterate over it and add OPPs */
 	for_each_available_child_of_node(opp_table->np, np) {
-		count++;
-
-		ret = _opp_add_static_v2(opp_table, dev, np);
-		if (ret) {
+		opp = _opp_add_static_v2(opp_table, dev, np);
+		if (IS_ERR(opp)) {
+			ret = PTR_ERR(opp);
 			dev_err(dev, "%s: Failed to add OPP, %d\n", __func__,
 				ret);
 			of_node_put(np);
 			goto put_list_kref;
+		} else if (opp) {
+			count++;
 		}
 	}
 
-- 
cgit v1.1


From 1cdda9486f5103fb133f88e662e48c504adbb779 Mon Sep 17 00:00:00 2001
From: Rajneesh Bhardwaj <rajneesh.bhardwaj@linux.intel.com>
Date: Fri, 28 Sep 2018 14:24:02 +0530
Subject: ACPI / PM: LPIT: Register sysfs attributes based on FADT

ACPI Low Power S0 Idle capabilities are announced via FADT table and can
be used to inform the kernel about the presence of one or more Low Power
Idle (LPI) entries as descried in LPIT table. LPIT table can exist
independently even if the FADT S0 Idle flag is not set and thus it could
confuse user since the following cpuidle attributes are created.

/sys/devices/system/cpu/cpuidle/low_power_idle_cpu_residency_us
/sys/devices/system/cpu/cpuidle/low_power_idle_system_residency_us

Presence or absence of above attributes could mean that the given
platform supports S0ix state or not.

This change allows to create the above cpuidle attributes only if
FADT table supports Low Power S0 Idle.

Signed-off-by: Rajneesh Bhardwaj <rajneesh.bhardwaj@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/acpi_lpit.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/acpi/acpi_lpit.c b/drivers/acpi/acpi_lpit.c
index cf4fc01..e43cb71 100644
--- a/drivers/acpi/acpi_lpit.c
+++ b/drivers/acpi/acpi_lpit.c
@@ -117,11 +117,17 @@ static void lpit_update_residency(struct lpit_residency_info *info,
 		if (!info->iomem_addr)
 			return;
 
+		if (!(acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0))
+			return;
+
 		/* Silently fail, if cpuidle attribute group is not present */
 		sysfs_add_file_to_group(&cpu_subsys.dev_root->kobj,
 					&dev_attr_low_power_idle_system_residency_us.attr,
 					"cpuidle");
 	} else if (info->gaddr.space_id == ACPI_ADR_SPACE_FIXED_HARDWARE) {
+		if (!(acpi_gbl_FADT.flags & ACPI_FADT_LOW_POWER_S0))
+			return;
+
 		/* Silently fail, if cpuidle attribute group is not present */
 		sysfs_add_file_to_group(&cpu_subsys.dev_root->kobj,
 					&dev_attr_low_power_idle_cpu_residency_us.attr,
-- 
cgit v1.1


From 5f26bdceb9c0a5e6c696aa2899d077cd3ae93413 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 2 Oct 2018 23:42:02 +0200
Subject: cpuidle: menu: Fix wakeup statistics updates for polling state

If the CPU exits the "polling" state due to the time limit in the
loop in poll_idle(), this is not a real wakeup and it just means
that the "polling" state selection was not adequate.  The governor
mispredicted short idle duration, but had a more suitable state been
selected, the CPU might have spent more time in it.  In fact, there
is no reason to expect that there would have been a wakeup event
earlier than the next timer in that case.

Handling such cases as regular wakeups in menu_update() may cause the
menu governor to make suboptimal decisions going forward, but ignoring
them altogether would not be correct either, because every time
menu_select() is invoked, it makes a separate new attempt to predict
the idle duration taking distinct time to the closest timer event as
input and the outcomes of all those attempts should be recorded.

For this reason, make menu_update() always assume that if the
"polling" state was exited due to the time limit, the next proper
wakeup event for the CPU would be the next timer event (not
including the tick).

Fixes: a37b969a61c1 "cpuidle: poll_state: Add time limit to poll_idle()"
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/cpuidle/governors/menu.c | 10 ++++++++++
 drivers/cpuidle/poll_state.c     |  6 +++++-
 include/linux/cpuidle.h          |  1 +
 3 files changed, 16 insertions(+), 1 deletion(-)

diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index b6684fd..8b3f9c7 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -511,6 +511,16 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 		 * duration predictor do a better job next time.
 		 */
 		measured_us = 9 * MAX_INTERESTING / 10;
+	} else if ((drv->states[last_idx].flags & CPUIDLE_FLAG_POLLING) &&
+		   dev->poll_time_limit) {
+		/*
+		 * The CPU exited the "polling" state due to a time limit, so
+		 * the idle duration prediction leading to the selection of that
+		 * state was inaccurate.  If a better prediction had been made,
+		 * the CPU might have been woken up from idle by the next timer.
+		 * Assume that to be the case.
+		 */
+		measured_us = data->next_timer_us;
 	} else {
 		/* measured value */
 		measured_us = dev->last_residency;
diff --git a/drivers/cpuidle/poll_state.c b/drivers/cpuidle/poll_state.c
index 3f86d23..36ff5a1 100644
--- a/drivers/cpuidle/poll_state.c
+++ b/drivers/cpuidle/poll_state.c
@@ -17,6 +17,8 @@ static int __cpuidle poll_idle(struct cpuidle_device *dev,
 {
 	u64 time_start = local_clock();
 
+	dev->poll_time_limit = false;
+
 	local_irq_enable();
 	if (!current_set_polling_and_test()) {
 		unsigned int loop_count = 0;
@@ -27,8 +29,10 @@ static int __cpuidle poll_idle(struct cpuidle_device *dev,
 				continue;
 
 			loop_count = 0;
-			if (local_clock() - time_start > POLL_IDLE_TIME_LIMIT)
+			if (local_clock() - time_start > POLL_IDLE_TIME_LIMIT) {
+				dev->poll_time_limit = true;
 				break;
+			}
 		}
 	}
 	current_clr_polling();
diff --git a/include/linux/cpuidle.h b/include/linux/cpuidle.h
index d262f62..faed7a8 100644
--- a/include/linux/cpuidle.h
+++ b/include/linux/cpuidle.h
@@ -81,6 +81,7 @@ struct cpuidle_device {
 	unsigned int		registered:1;
 	unsigned int		enabled:1;
 	unsigned int		use_deepest_state:1;
+	unsigned int		poll_time_limit:1;
 	unsigned int		cpu;
 
 	int			last_residency;
-- 
cgit v1.1


From 23e8ceb9ce766c81d62434053aef6e7efea6fcc3 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 2 Oct 2018 23:42:56 +0200
Subject: cpuidle: menu: Compute first_idx when latency_req is known

Since menu_select() can only set first_idx to 1 if the exit latency
of the second state is not greater than the latency limit, it should
first determine that limit.  Thus first_idx should be computed after
the "interactivity" factor has been taken into account.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewedy-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/cpuidle/governors/menu.c | 32 ++++++++++++++++----------------
 1 file changed, 16 insertions(+), 16 deletions(-)

diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 8b3f9c7..98a39c5 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -322,22 +322,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	expected_interval = get_typical_interval(data);
 	expected_interval = min(expected_interval, data->next_timer_us);
 
-	first_idx = 0;
-	if (drv->states[0].flags & CPUIDLE_FLAG_POLLING) {
-		struct cpuidle_state *s = &drv->states[1];
-		unsigned int polling_threshold;
-
-		/*
-		 * Default to a physical idle state, not to busy polling, unless
-		 * a timer is going to trigger really really soon.
-		 */
-		polling_threshold = max_t(unsigned int, 20, s->target_residency);
-		if (data->next_timer_us > polling_threshold &&
-		    latency_req > s->exit_latency && !s->disabled &&
-		    !dev->states_usage[1].disable)
-			first_idx = 1;
-	}
-
 	/*
 	 * Use the lowest expected idle interval to pick the idle state.
 	 */
@@ -364,6 +348,22 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 			latency_req = interactivity_req;
 	}
 
+	first_idx = 0;
+	if (drv->states[0].flags & CPUIDLE_FLAG_POLLING) {
+		struct cpuidle_state *s = &drv->states[1];
+		unsigned int polling_threshold;
+
+		/*
+		 * Default to a physical idle state, not to busy polling, unless
+		 * a timer is going to trigger really really soon.
+		 */
+		polling_threshold = max_t(unsigned int, 20, s->target_residency);
+		if (data->next_timer_us > polling_threshold &&
+		    latency_req > s->exit_latency && !s->disabled &&
+		    !dev->states_usage[1].disable)
+			first_idx = 1;
+	}
+
 	/*
 	 * Find the idle state with the lowest power while satisfying
 	 * our constraints.
-- 
cgit v1.1


From 96c3d11df15323e7f6e55242f7bda610c4bef402 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 2 Oct 2018 23:44:06 +0200
Subject: cpuidle: menu: Get rid of first_idx from menu_select()

Rearrange the code in menu_select() so that the loop over idle states
always starts from 0 and get rid of the first_idx variable.

While at it, add two empty lines to separate conditional statements
from one another.

No intentional behavior changes.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/cpuidle/governors/menu.c | 32 ++++++++++++++------------------
 1 file changed, 14 insertions(+), 18 deletions(-)

diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 98a39c5..5c7fafc 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -285,7 +285,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	struct menu_device *data = this_cpu_ptr(&menu_devices);
 	int latency_req = cpuidle_governor_latency_req(dev->cpu);
 	int i;
-	int first_idx;
 	int idx;
 	unsigned int interactivity_req;
 	unsigned int expected_interval;
@@ -348,36 +347,33 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 			latency_req = interactivity_req;
 	}
 
-	first_idx = 0;
-	if (drv->states[0].flags & CPUIDLE_FLAG_POLLING) {
-		struct cpuidle_state *s = &drv->states[1];
-		unsigned int polling_threshold;
-
-		/*
-		 * Default to a physical idle state, not to busy polling, unless
-		 * a timer is going to trigger really really soon.
-		 */
-		polling_threshold = max_t(unsigned int, 20, s->target_residency);
-		if (data->next_timer_us > polling_threshold &&
-		    latency_req > s->exit_latency && !s->disabled &&
-		    !dev->states_usage[1].disable)
-			first_idx = 1;
-	}
-
 	/*
 	 * Find the idle state with the lowest power while satisfying
 	 * our constraints.
 	 */
 	idx = -1;
-	for (i = first_idx; i < drv->state_count; i++) {
+	for (i = 0; i < drv->state_count; i++) {
 		struct cpuidle_state *s = &drv->states[i];
 		struct cpuidle_state_usage *su = &dev->states_usage[i];
 
 		if (s->disabled || su->disable)
 			continue;
+
 		if (idx == -1)
 			idx = i; /* first enabled state */
+
 		if (s->target_residency > predicted_us) {
+			/*
+			 * Use a physical idle state, not busy polling, unless
+			 * a timer is going to trigger really really soon.
+			 */
+			if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) &&
+			    i == idx + 1 && latency_req > s->exit_latency &&
+			    data->next_timer_us > max_t(unsigned int, 20,
+							s->target_residency)) {
+				idx = i;
+				break;
+			}
 			if (predicted_us < TICK_USEC)
 				break;
 
-- 
cgit v1.1


From eb40a380bff28f84b6583bba6786b46ef26ef548 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 2 Oct 2018 23:45:07 +0200
Subject: cpuidle: menu: Do not update last_state_idx in menu_select()

It is not necessary to update data->last_state_idx in menu_select()
as it only is used in menu_update() which only runs when
data->needs_update is set and that is set only when updating
data->last_state_idx in menu_reflect().

Accordingly, drop the update of data->last_state_idx from
menu_select() and get rid of the (now redundant) "out" label
from it.

No intentional behavior changes.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
Reviewed-by: Daniel Lezcano <daniel.lezcano@linaro.org>
---
 drivers/cpuidle/governors/menu.c | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 5c7fafc..968379d 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -398,7 +398,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 			    s->target_residency <= ktime_to_us(delta_next))
 				idx = i;
 
-			goto out;
+			return idx;
 		}
 		if (s->exit_latency > latency_req) {
 			/*
@@ -445,10 +445,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 		}
 	}
 
-out:
-	data->last_state_idx = idx;
-
-	return data->last_state_idx;
+	return idx;
 }
 
 /**
-- 
cgit v1.1


From 8b007ebec9a5d120d25c00d9b771aadf45ac5177 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 2 Oct 2018 23:46:28 +0200
Subject: cpuidle: menu: Avoid computations for very close timers

If the next timer event (not including the tick) is closer than the
target residency of the second state or the PM QoS latency constraint
is below its exit latency, state[0] will be used regardless of any
other factors, so skip the computations in menu_select() then and
return 0 straight away from it.

Still, do that after the bucket has been determined to avoid
updating the correction factor for a stale bucket.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 drivers/cpuidle/governors/menu.c | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 968379d..667606c 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -309,6 +309,18 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	get_iowait_load(&nr_iowaiters, &cpu_load);
 	data->bucket = which_bucket(data->next_timer_us, nr_iowaiters);
 
+	if (unlikely(drv->state_count <= 1) ||
+	    ((data->next_timer_us < drv->states[1].target_residency ||
+	      latency_req < drv->states[1].exit_latency) &&
+	     !drv->states[0].disabled && !dev->states_usage[0].disable)) {
+		/*
+		 * In this case state[0] will be used no matter what, so return
+		 * it right away and keep the tick running.
+		 */
+		*stop_tick = false;
+		return 0;
+	}
+
 	/*
 	 * Force the result of multiplication to be 64 bits even if both
 	 * operands are 32 bits.
-- 
cgit v1.1


From 53812cdc9100e19f2e782851964355f2db5583de Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 2 Oct 2018 23:47:43 +0200
Subject: cpuidle: menu: Move the latency_req == 0 special case check

It is better to always update data->bucket before returning from
menu_select() to avoid updating the correction factor for a stale
bucket, so combine the latency_req == 0 special check with the more
general check below.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Peter Zijlstra (Intel) <peterz@infradead.org>
---
 drivers/cpuidle/governors/menu.c | 8 +-------
 1 file changed, 1 insertion(+), 7 deletions(-)

diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 667606c..b754296 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -297,19 +297,13 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 		data->needs_update = 0;
 	}
 
-	/* Special case when user has set very strict latency requirement */
-	if (unlikely(latency_req == 0)) {
-		*stop_tick = false;
-		return 0;
-	}
-
 	/* determine the expected residency time, round up */
 	data->next_timer_us = ktime_to_us(tick_nohz_get_sleep_length(&delta_next));
 
 	get_iowait_load(&nr_iowaiters, &cpu_load);
 	data->bucket = which_bucket(data->next_timer_us, nr_iowaiters);
 
-	if (unlikely(drv->state_count <= 1) ||
+	if (unlikely(drv->state_count <= 1 || latency_req == 0) ||
 	    ((data->next_timer_us < drv->states[1].target_residency ||
 	      latency_req < drv->states[1].exit_latency) &&
 	     !drv->states[0].disabled && !dev->states_usage[0].disable)) {
-- 
cgit v1.1


From 01bad1c6896db021db82042e71c2bf1f97cc026b Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Tue, 2 Oct 2018 23:50:30 +0200
Subject: cpuidle: poll_state: Revise loop termination condition

If need_resched() returns "false", breaking out of the loop in
poll_idle() will cause a new idle state to be selected, so in fact
it usually doesn't make sense to spin in it longer than the target
residency of the second state.  [Note that the "polling" state is
used only if there is at least one "real" state defined in addition
to it, so the second state is always there.]  On the other hand,
breaking out of it early (say in case the next state is disabled)
shouldn't hurt as it is polling anyway.

For this reason, make the loop in poll_idle() break if the CPU has
been spinning longer than the target residency of the second state
(the "polling" state can only be state[0]).

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/poll_state.c | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/drivers/cpuidle/poll_state.c b/drivers/cpuidle/poll_state.c
index 36ff5a1..85792d3 100644
--- a/drivers/cpuidle/poll_state.c
+++ b/drivers/cpuidle/poll_state.c
@@ -9,7 +9,6 @@
 #include <linux/sched/clock.h>
 #include <linux/sched/idle.h>
 
-#define POLL_IDLE_TIME_LIMIT	(TICK_NSEC / 16)
 #define POLL_IDLE_RELAX_COUNT	200
 
 static int __cpuidle poll_idle(struct cpuidle_device *dev,
@@ -21,6 +20,7 @@ static int __cpuidle poll_idle(struct cpuidle_device *dev,
 
 	local_irq_enable();
 	if (!current_set_polling_and_test()) {
+		u64 limit = (u64)drv->states[1].target_residency * NSEC_PER_USEC;
 		unsigned int loop_count = 0;
 
 		while (!need_resched()) {
@@ -29,7 +29,7 @@ static int __cpuidle poll_idle(struct cpuidle_device *dev,
 				continue;
 
 			loop_count = 0;
-			if (local_clock() - time_start > POLL_IDLE_TIME_LIMIT) {
+			if (local_clock() - time_start > limit) {
 				dev->poll_time_limit = true;
 				break;
 			}
-- 
cgit v1.1


From 9d21d33cab2dd5aadd23ad51baf1dc49d544fcc0 Mon Sep 17 00:00:00 2001
From: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Date: Fri, 5 Oct 2018 12:00:58 -0700
Subject: cpufreq: dt-platdev: allow RK3399 to have separate tunables per
 cluster

RK3899 has one cluster with 4 small cores, and another one with 2 big
cores, with cores in different clusters having different OPPs and thus
needing separate set of tunables. Let's enable this via
"have_governor_per_policy" platform data.

Signed-off-by: Dmitry Torokhov <dmitry.torokhov@gmail.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/cpufreq-dt-platdev.c | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/drivers/cpufreq/cpufreq-dt-platdev.c b/drivers/cpufreq/cpufreq-dt-platdev.c
index 805f8a0..b1c5468 100644
--- a/drivers/cpufreq/cpufreq-dt-platdev.c
+++ b/drivers/cpufreq/cpufreq-dt-platdev.c
@@ -79,7 +79,10 @@ static const struct of_device_id whitelist[] __initconst = {
 	{ .compatible = "rockchip,rk3328", },
 	{ .compatible = "rockchip,rk3366", },
 	{ .compatible = "rockchip,rk3368", },
-	{ .compatible = "rockchip,rk3399", },
+	{ .compatible = "rockchip,rk3399",
+	  .data = &(struct cpufreq_dt_platform_data)
+		{ .have_governor_per_policy = true, },
+	},
 
 	{ .compatible = "st-ericsson,u8500", },
 	{ .compatible = "st-ericsson,u8540", },
-- 
cgit v1.1


From 2733fb0d0699246711cf622e0e2faf02a05b69dc Mon Sep 17 00:00:00 2001
From: Anson Huang <Anson.Huang@nxp.com>
Date: Mon, 8 Oct 2018 14:07:34 +0800
Subject: cpufreq: imx6q: read OCOTP through nvmem for imx6ul/imx6ull

On i.MX6UL/i.MX6ULL, accessing OCOTP directly is wrong because
the ocotp clock needs to be enabled first. Add support for reading
OCOTP through the nvmem API, and keep the old method there to
support old dtb.

Signed-off-by: Anson Huang <Anson.Huang@nxp.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/imx6q-cpufreq.c | 52 +++++++++++++++++++++++++++--------------
 1 file changed, 35 insertions(+), 17 deletions(-)

diff --git a/drivers/cpufreq/imx6q-cpufreq.c b/drivers/cpufreq/imx6q-cpufreq.c
index b2ff423..8cfee0a 100644
--- a/drivers/cpufreq/imx6q-cpufreq.c
+++ b/drivers/cpufreq/imx6q-cpufreq.c
@@ -12,6 +12,7 @@
 #include <linux/cpu_cooling.h>
 #include <linux/err.h>
 #include <linux/module.h>
+#include <linux/nvmem-consumer.h>
 #include <linux/of.h>
 #include <linux/of_address.h>
 #include <linux/pm_opp.h>
@@ -290,20 +291,32 @@ put_node:
 #define OCOTP_CFG3_6ULL_SPEED_792MHZ	0x2
 #define OCOTP_CFG3_6ULL_SPEED_900MHZ	0x3
 
-static void imx6ul_opp_check_speed_grading(struct device *dev)
+static int imx6ul_opp_check_speed_grading(struct device *dev)
 {
-	struct device_node *np;
-	void __iomem *base;
 	u32 val;
+	int ret = 0;
 
-	np = of_find_compatible_node(NULL, NULL, "fsl,imx6ul-ocotp");
-	if (!np)
-		return;
+	if (of_find_property(dev->of_node, "nvmem-cells", NULL)) {
+		ret = nvmem_cell_read_u32(dev, "speed_grade", &val);
+		if (ret)
+			return ret;
+	} else {
+		struct device_node *np;
+		void __iomem *base;
+
+		np = of_find_compatible_node(NULL, NULL, "fsl,imx6ul-ocotp");
+		if (!np)
+			return -ENOENT;
+
+		base = of_iomap(np, 0);
+		of_node_put(np);
+		if (!base) {
+			dev_err(dev, "failed to map ocotp\n");
+			return -EFAULT;
+		}
 
-	base = of_iomap(np, 0);
-	if (!base) {
-		dev_err(dev, "failed to map ocotp\n");
-		goto put_node;
+		val = readl_relaxed(base + OCOTP_CFG3);
+		iounmap(base);
 	}
 
 	/*
@@ -314,7 +327,6 @@ static void imx6ul_opp_check_speed_grading(struct device *dev)
 	 * 2b'11: 900000000Hz on i.MX6ULL only;
 	 * We need to set the max speed of ARM according to fuse map.
 	 */
-	val = readl_relaxed(base + OCOTP_CFG3);
 	val >>= OCOTP_CFG3_SPEED_SHIFT;
 	val &= 0x3;
 
@@ -334,9 +346,7 @@ static void imx6ul_opp_check_speed_grading(struct device *dev)
 				dev_warn(dev, "failed to disable 900MHz OPP\n");
 	}
 
-	iounmap(base);
-put_node:
-	of_node_put(np);
+	return ret;
 }
 
 static int imx6q_cpufreq_probe(struct platform_device *pdev)
@@ -394,10 +404,18 @@ static int imx6q_cpufreq_probe(struct platform_device *pdev)
 	}
 
 	if (of_machine_is_compatible("fsl,imx6ul") ||
-	    of_machine_is_compatible("fsl,imx6ull"))
-		imx6ul_opp_check_speed_grading(cpu_dev);
-	else
+	    of_machine_is_compatible("fsl,imx6ull")) {
+		ret = imx6ul_opp_check_speed_grading(cpu_dev);
+		if (ret == -EPROBE_DEFER)
+			return ret;
+		if (ret) {
+			dev_err(cpu_dev, "failed to read ocotp: %d\n",
+				ret);
+			return ret;
+		}
+	} else {
 		imx6q_opp_check_speed_grading(cpu_dev);
+	}
 
 	/* Because we have added the OPPs here, we must free them */
 	free_opp = true;
-- 
cgit v1.1


From 8c22e2f695920ebd94f9a53bcf2a65eb36d4dba1 Mon Sep 17 00:00:00 2001
From: Prarit Bhargava <prarit@redhat.com>
Date: Mon, 8 Oct 2018 11:06:18 -0400
Subject: cpupower: Fix AMD Family 0x17 msr_pstate size

The msr_pstate data is only 63 bits long and should be 64 bits.

Add in the missing bit from res1 for AMD Family 0x17.

Reference: https://www.amd.com/system/files/TechDocs/54945_PPR_Family_17h_Models_00h-0Fh.pdf, page 138.

Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Stafford Horne <shorne@gmail.com>
Signed-off-by: Shuah Khan (Samsung OSG) <shuah@kernel.org>
---
 tools/power/cpupower/utils/helpers/amd.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tools/power/cpupower/utils/helpers/amd.c b/tools/power/cpupower/utils/helpers/amd.c
index bb41cdd..eed31b2 100644
--- a/tools/power/cpupower/utils/helpers/amd.c
+++ b/tools/power/cpupower/utils/helpers/amd.c
@@ -33,7 +33,7 @@ union msr_pstate {
 		unsigned vid:8;
 		unsigned iddval:8;
 		unsigned idddiv:2;
-		unsigned res1:30;
+		unsigned res1:31;
 		unsigned en:1;
 	} fam17h_bits;
 	unsigned long long val;
-- 
cgit v1.1


From f69ffc5d3db8f1f03fd6d1df5930f9a1fbd787b6 Mon Sep 17 00:00:00 2001
From: Prarit Bhargava <prarit@redhat.com>
Date: Mon, 8 Oct 2018 11:06:19 -0400
Subject: cpupower: Fix coredump on VMWare

cpupower crashes on VMWare guests.  The guests have the AMD PStateDef MSR
(0xC0010064 + state number) set to zero.  As a result fid and did are zero
and the crash occurs because of a divide by zero (cof = fid/did).  This
can be prevented by checking the enable bit in the PStateDef MSR before
calculating cof.  By doing this the value of pstate[i] remains zero and
the value can be tested before displaying the active Pstates.

Check the enable bit in the PstateDef register for all supported families
and only print out enabled Pstates.

Signed-off-by: Prarit Bhargava <prarit@redhat.com>
Cc: Shuah Khan <shuah@kernel.org>
Cc: Stafford Horne <shorne@gmail.com>
Signed-off-by: Shuah Khan (Samsung OSG) <shuah@kernel.org>
---
 tools/power/cpupower/utils/cpufreq-info.c | 2 ++
 tools/power/cpupower/utils/helpers/amd.c  | 5 +++++
 2 files changed, 7 insertions(+)

diff --git a/tools/power/cpupower/utils/cpufreq-info.c b/tools/power/cpupower/utils/cpufreq-info.c
index df43cd4..ccd08dd 100644
--- a/tools/power/cpupower/utils/cpufreq-info.c
+++ b/tools/power/cpupower/utils/cpufreq-info.c
@@ -200,6 +200,8 @@ static int get_boost_mode(unsigned int cpu)
 		printf(_("    Boost States: %d\n"), b_states);
 		printf(_("    Total States: %d\n"), pstate_no);
 		for (i = 0; i < pstate_no; i++) {
+			if (!pstates[i])
+				continue;
 			if (i < b_states)
 				printf(_("    Pstate-Pb%d: %luMHz (boost state)"
 					 "\n"), i, pstates[i]);
diff --git a/tools/power/cpupower/utils/helpers/amd.c b/tools/power/cpupower/utils/helpers/amd.c
index eed31b2..9607ada 100644
--- a/tools/power/cpupower/utils/helpers/amd.c
+++ b/tools/power/cpupower/utils/helpers/amd.c
@@ -119,6 +119,11 @@ int decode_pstates(unsigned int cpu, unsigned int cpu_family,
 		}
 		if (read_msr(cpu, MSR_AMD_PSTATE + i, &pstate.val))
 			return -1;
+		if ((cpu_family == 0x17) && (!pstate.fam17h_bits.en))
+			continue;
+		else if (!pstate.bits.en)
+			continue;
+
 		pstates[i] = get_cof(cpu_family, pstate);
 	}
 	*no = i;
-- 
cgit v1.1


From 5484f0334439701900121a107709c461215cadb6 Mon Sep 17 00:00:00 2001
From: Todd Brandt <todd.e.brandt@linux.intel.com>
Date: Mon, 8 Oct 2018 15:56:31 -0700
Subject: PM / tools: sleepgraph: first batch of v5.2 changes

general:
- add battery charge data before and after test
- remove special s0i3 handling
- remove melding of dmesg & ftrace data in old kernels, use one only
- updates to various kprobes in trace (ksys_sync, etc)
- enable pm_debug_messages during the test
- instrument more subsystems with dev functions (phy0)

error handling:
- return codes for tool show the status of the test run
- 0: success, 1: general error (no timeline), 2: fail (suspend aborted)
- monitor output of /sys/power/state, mark as failure if exception occurs
- add signal handler when using -result to catch tool exceptions

display control
- add -x commands for testing xset with mode settings and status
- allow display setting to on, off, suspend, standby
- add display mode change info to the log, along with a warning on fail

s2idle (freeze)
- remove fixed 10-phase dependency, allow any phase order & any count
- multiple phase occurences show as phase_nameN e.g. suspend_noirq3
- if multiple freezes occur, print multiple time values in header

summary:
- add new columns to summary output: issues, worst suspend/resume devices
- worst device: includes summation of all phases of suspend or resume
- issues: includes WARNING/ERROR/BUG from dmesg log, and other issues
- s2idle: multiple freezes show as FREEZExN in the issues column

Signed-off-by: Todd Brandt <todd.e.brandt@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 tools/power/pm-graph/Makefile      |    4 +-
 tools/power/pm-graph/sleepgraph.8  |   13 +-
 tools/power/pm-graph/sleepgraph.py | 1378 +++++++++++++++++++-----------------
 3 files changed, 760 insertions(+), 635 deletions(-)

diff --git a/tools/power/pm-graph/Makefile b/tools/power/pm-graph/Makefile
index c1899cd..8455415 100644
--- a/tools/power/pm-graph/Makefile
+++ b/tools/power/pm-graph/Makefile
@@ -23,8 +23,8 @@ install : uninstall
 	install -m 644 config/suspend-x2-proc.cfg $(DESTDIR)$(PREFIX)/lib/pm-graph/config
 
 	install -d  $(DESTDIR)$(PREFIX)/bin
-	ln -s $(DESTDIR)$(PREFIX)/lib/pm-graph/bootgraph.py $(DESTDIR)$(PREFIX)/bin/bootgraph
-	ln -s $(DESTDIR)$(PREFIX)/lib/pm-graph/sleepgraph.py $(DESTDIR)$(PREFIX)/bin/sleepgraph
+	ln -s ../lib/pm-graph/bootgraph.py $(DESTDIR)$(PREFIX)/bin/bootgraph
+	ln -s ../lib/pm-graph/sleepgraph.py $(DESTDIR)$(PREFIX)/bin/sleepgraph
 
 	install -d  $(DESTDIR)$(PREFIX)/share/man/man8
 	install bootgraph.8 $(DESTDIR)$(PREFIX)/share/man/man8
diff --git a/tools/power/pm-graph/sleepgraph.8 b/tools/power/pm-graph/sleepgraph.8
index 070be2c..24a2e7d 100644
--- a/tools/power/pm-graph/sleepgraph.8
+++ b/tools/power/pm-graph/sleepgraph.8
@@ -65,9 +65,9 @@ During test, enable/disable runtime suspend for all devices. The test is delayed
 by 5 seconds to allow runtime suspend changes to occur. The settings are restored
 after the test is complete.
 .TP
-\fB-display \fIon/off\fR
-Turn the display on or off for the test using the xset command. This helps
-maintain the consistecy of test data for better comparison.
+\fB-display \fIon/off/standby/suspend\fR
+Switch the display to the requested mode for the test using the xset command.
+This helps maintain the consistency of test data for better comparison.
 .TP
 \fB-skiphtml\fR
 Run the test and capture the trace logs, but skip the timeline generation.
@@ -183,6 +183,13 @@ Print out the contents of the ACPI Firmware Performance Data Table.
 \fB-battery\fR
 Print out battery status and current charge.
 .TP
+\fB-xon/-xoff/-xstandby/-xsuspend\fR
+Test xset by attempting to switch the display to the given mode. This
+is the same command which will be issued by \fB-display \fImode\fR.
+.TP
+\fB-xstat\fR
+Get the current DPMS display mode.
+.TP
 \fB-sysinfo\fR
 Print out system info extracted from BIOS. Reads /dev/mem directly instead of going through dmidecode.
 .TP
diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py
index 0c76047..343e800 100755
--- a/tools/power/pm-graph/sleepgraph.py
+++ b/tools/power/pm-graph/sleepgraph.py
@@ -54,6 +54,7 @@ import os
 import string
 import re
 import platform
+import signal
 from datetime import datetime
 import struct
 import ConfigParser
@@ -72,7 +73,7 @@ class SystemValues:
 	version = '5.1'
 	ansi = False
 	rs = 0
-	display = 0
+	display = ''
 	gzip = False
 	sync = False
 	verbose = False
@@ -99,6 +100,7 @@ class SystemValues:
 	tpath = '/sys/kernel/debug/tracing/'
 	fpdtpath = '/sys/firmware/acpi/tables/FPDT'
 	epath = '/sys/kernel/debug/tracing/events/power/'
+	pmdpath = '/sys/power/pm_debug_messages'
 	traceevents = [
 		'suspend_resume',
 		'device_pm_callback_end',
@@ -141,12 +143,10 @@ class SystemValues:
 	devprops = dict()
 	predelay = 0
 	postdelay = 0
-	procexecfmt = 'ps - (?P<ps>.*)$'
-	devpropfmt = '# Device Properties: .*'
-	tracertypefmt = '# tracer: (?P<t>.*)'
-	firmwarefmt = '# fwsuspend (?P<s>[0-9]*) fwresume (?P<r>[0-9]*)$'
+	pmdebug = ''
 	tracefuncs = {
 		'sys_sync': {},
+		'ksys_sync': {},
 		'__pm_notifier_call_chain': {},
 		'pm_prepare_console': {},
 		'pm_notifier_call_chain': {},
@@ -187,7 +187,6 @@ class SystemValues:
 	dev_tracefuncs = {
 		# general wait/delay/sleep
 		'msleep': { 'args_x86_64': {'time':'%di:s32'}, 'ub': 1 },
-		'schedule_timeout_uninterruptible': { 'args_x86_64': {'timeout':'%di:s32'}, 'ub': 1 },
 		'schedule_timeout': { 'args_x86_64': {'timeout':'%di:s32'}, 'ub': 1 },
 		'udelay': { 'func':'__const_udelay', 'args_x86_64': {'loops':'%di:s32'}, 'ub': 1 },
 		'usleep_range': { 'args_x86_64': {'min':'%di:s32', 'max':'%si:s32'}, 'ub': 1 },
@@ -199,6 +198,9 @@ class SystemValues:
 		# filesystem
 		'ext4_sync_fs': {},
 		# 80211
+		'ath10k_bmi_read_memory': { 'args_x86_64': {'length':'%cx:s32'} },
+		'ath10k_bmi_write_memory': { 'args_x86_64': {'length':'%cx:s32'} },
+		'ath10k_bmi_fast_download': { 'args_x86_64': {'length':'%cx:s32'} },
 		'iwlagn_mac_start': {},
 		'iwlagn_alloc_bcast_station': {},
 		'iwl_trans_pcie_start_hw': {},
@@ -241,6 +243,7 @@ class SystemValues:
 	timeformat = '%.3f'
 	cmdline = '%s %s' % \
 			(os.path.basename(sys.argv[0]), ' '.join(sys.argv[1:]))
+	sudouser = ''
 	def __init__(self):
 		self.archargs = 'args_'+platform.machine()
 		self.hostname = platform.node()
@@ -256,10 +259,32 @@ class SystemValues:
 		if (hasattr(sys.stdout, 'isatty') and sys.stdout.isatty()):
 			self.ansi = True
 		self.testdir = datetime.now().strftime('suspend-%y%m%d-%H%M%S')
+		if os.getuid() == 0 and 'SUDO_USER' in os.environ and \
+			os.environ['SUDO_USER']:
+			self.sudouser = os.environ['SUDO_USER']
 	def vprint(self, msg):
 		self.logmsg += msg+'\n'
-		if(self.verbose):
+		if self.verbose or msg.startswith('WARNING:'):
 			print(msg)
+	def signalHandler(self, signum, frame):
+		if not self.result:
+			return
+		signame = self.signames[signum] if signum in self.signames else 'UNKNOWN'
+		msg = 'Signal %s caused a tool exit, line %d' % (signame, frame.f_lineno)
+		sysvals.outputResult({'error':msg})
+		sys.exit(3)
+	def signalHandlerInit(self):
+		capture = ['BUS', 'SYS', 'XCPU', 'XFSZ', 'PWR', 'HUP', 'INT', 'QUIT',
+			'ILL', 'ABRT', 'FPE', 'SEGV', 'TERM', 'TSTP']
+		self.signames = dict()
+		for i in capture:
+			s = 'SIG'+i
+			try:
+				signum = getattr(signal, s)
+				signal.signal(signum, self.signalHandler)
+			except:
+				continue
+			self.signames[signum] = s
 	def rootCheck(self, fatal=True):
 		if(os.access(self.powerfile, os.W_OK)):
 			return True
@@ -267,7 +292,7 @@ class SystemValues:
 			msg = 'This command requires sysfs mount and root access'
 			print('ERROR: %s\n') % msg
 			self.outputResult({'error':msg})
-			sys.exit()
+			sys.exit(1)
 		return False
 	def rootUser(self, fatal=False):
 		if 'USER' in os.environ and os.environ['USER'] == 'root':
@@ -276,7 +301,7 @@ class SystemValues:
 			msg = 'This command must be run as root'
 			print('ERROR: %s\n') % msg
 			self.outputResult({'error':msg})
-			sys.exit()
+			sys.exit(1)
 		return False
 	def getExec(self, cmd):
 		dirlist = ['/sbin', '/bin', '/usr/sbin', '/usr/bin',
@@ -406,8 +431,8 @@ class SystemValues:
 				ktime = m.group('ktime')
 		fp.close()
 		self.dmesgstart = float(ktime)
-	def getdmesg(self, fwdata=[]):
-		op = self.writeDatafileHeader(sysvals.dmesgfile, fwdata)
+	def getdmesg(self, testdata):
+		op = self.writeDatafileHeader(sysvals.dmesgfile, testdata)
 		# store all new dmesg lines since initdmesg was called
 		fp = Popen('dmesg', stdout=PIPE).stdout
 		for line in fp:
@@ -619,6 +644,8 @@ class SystemValues:
 			self.fsetVal('0', 'events/kprobes/enable')
 			self.fsetVal('', 'kprobe_events')
 			self.fsetVal('1024', 'buffer_size_kb')
+		if self.pmdebug:
+			self.setVal(self.pmdebug, self.pmdpath)
 	def setupAllKprobes(self):
 		for name in self.tracefuncs:
 			self.defaultKprobe(name, self.tracefuncs[name])
@@ -641,6 +668,11 @@ class SystemValues:
 		# turn trace off
 		self.fsetVal('0', 'tracing_on')
 		self.cleanupFtrace()
+		# pm debug messages
+		pv = self.getVal(self.pmdpath)
+		if pv != '1':
+			self.setVal('1', self.pmdpath)
+			self.pmdebug = pv
 		# set the trace clock to global
 		self.fsetVal('global', 'trace_clock')
 		self.fsetVal('nop', 'current_tracer')
@@ -728,19 +760,24 @@ class SystemValues:
 		if not self.ansi:
 			return str
 		return '\x1B[%d;40m%s\x1B[m' % (color, str)
-	def writeDatafileHeader(self, filename, fwdata=[]):
+	def writeDatafileHeader(self, filename, testdata):
 		fp = self.openlog(filename, 'w')
 		fp.write('%s\n%s\n# command | %s\n' % (self.teststamp, self.sysstamp, self.cmdline))
-		if(self.suspendmode == 'mem' or self.suspendmode == 'command'):
-			for fw in fwdata:
+		for test in testdata:
+			if 'fw' in test:
+				fw = test['fw']
 				if(fw):
 					fp.write('# fwsuspend %u fwresume %u\n' % (fw[0], fw[1]))
+			if 'bat' in test:
+				(a1, c1), (a2, c2) = test['bat']
+				fp.write('# battery %s %d %s %d\n' % (a1, c1, a2, c2))
+			if test['error'] or len(testdata) > 1:
+				fp.write('# enter_sleep_error %s\n' % test['error'])
 		return fp
-	def sudouser(self, dir):
-		if os.path.exists(dir) and os.getuid() == 0 and \
-			'SUDO_USER' in os.environ:
+	def sudoUserchown(self, dir):
+		if os.path.exists(dir) and self.sudouser:
 			cmd = 'chown -R {0}:{0} {1} > /dev/null 2>&1'
-			call(cmd.format(os.environ['SUDO_USER'], dir), shell=True)
+			call(cmd.format(self.sudouser, dir), shell=True)
 	def outputResult(self, testdata, num=0):
 		if not self.result:
 			return
@@ -762,7 +799,7 @@ class SystemValues:
 		if 'bugurl' in testdata:
 			fp.write('url%s: %s\n' % (n, testdata['bugurl']))
 		fp.close()
-		self.sudouser(self.result)
+		self.sudoUserchown(self.result)
 	def configFile(self, file):
 		dir = os.path.dirname(os.path.realpath(__file__))
 		if os.path.exists(file):
@@ -800,11 +837,12 @@ suspendmodename = {
 #	 Simple class which holds property values collected
 #	 for all the devices used in the timeline.
 class DevProps:
-	syspath = ''
-	altname = ''
-	async = True
-	xtraclass = ''
-	xtrainfo = ''
+	def __init__(self):
+		self.syspath = ''
+		self.altname = ''
+		self.async = True
+		self.xtraclass = ''
+		self.xtrainfo = ''
 	def out(self, dev):
 		return '%s,%s,%d;' % (dev, self.altname, self.async)
 	def debug(self, dev):
@@ -831,9 +869,6 @@ class DevProps:
 #	 A container used to create a device hierachy, with a single root node
 #	 and a tree of child nodes. Used by Data.deviceTopology()
 class DeviceNode:
-	name = ''
-	children = 0
-	depth = 0
 	def __init__(self, nodename, nodedepth):
 		self.name = nodename
 		self.children = []
@@ -861,71 +896,78 @@ class DeviceNode:
 #	}
 #
 class Data:
-	dmesg = {}  # root data structure
-	phases = [] # ordered list of phases
-	start = 0.0 # test start
-	end = 0.0   # test end
-	tSuspended = 0.0 # low-level suspend start
-	tResumed = 0.0   # low-level resume start
-	tKernSus = 0.0   # kernel level suspend start
-	tKernRes = 0.0   # kernel level resume end
-	tLow = 0.0       # time spent in low-level suspend (standby/freeze)
-	fwValid = False  # is firmware data available
-	fwSuspend = 0    # time spent in firmware suspend
-	fwResume = 0     # time spent in firmware resume
-	dmesgtext = []   # dmesg text file in memory
-	pstl = 0         # process timeline
-	testnumber = 0
-	idstr = ''
-	html_device_id = 0
-	stamp = 0
-	outfile = ''
-	devpids = []
-	kerror = False
+	phasedef = {
+		'suspend_prepare': {'order': 0, 'color': '#CCFFCC'},
+		        'suspend': {'order': 1, 'color': '#88FF88'},
+		   'suspend_late': {'order': 2, 'color': '#00AA00'},
+		  'suspend_noirq': {'order': 3, 'color': '#008888'},
+		'suspend_machine': {'order': 4, 'color': '#0000FF'},
+		 'resume_machine': {'order': 5, 'color': '#FF0000'},
+		   'resume_noirq': {'order': 6, 'color': '#FF9900'},
+		   'resume_early': {'order': 7, 'color': '#FFCC00'},
+		         'resume': {'order': 8, 'color': '#FFFF88'},
+		'resume_complete': {'order': 9, 'color': '#FFFFCC'},
+	}
+	errlist = {
+		'HWERROR' : '.*\[ *Hardware Error *\].*',
+		'FWBUG'   : '.*\[ *Firmware Bug *\].*',
+		'BUG'     : '.*BUG.*',
+		'ERROR'   : '.*ERROR.*',
+		'WARNING' : '.*WARNING.*',
+		'IRQ'     : '.*genirq: .*',
+		'TASKFAIL': '.*Freezing of tasks failed.*',
+	}
 	def __init__(self, num):
 		idchar = 'abcdefghij'
-		self.pstl = dict()
+		self.start = 0.0 # test start
+		self.end = 0.0   # test end
+		self.tSuspended = 0.0 # low-level suspend start
+		self.tResumed = 0.0   # low-level resume start
+		self.tKernSus = 0.0   # kernel level suspend start
+		self.tKernRes = 0.0   # kernel level resume end
+		self.fwValid = False  # is firmware data available
+		self.fwSuspend = 0    # time spent in firmware suspend
+		self.fwResume = 0     # time spent in firmware resume
+		self.html_device_id = 0
+		self.stamp = 0
+		self.outfile = ''
+		self.kerror = False
+		self.battery = 0
+		self.enterfail = ''
+		self.currphase = ''
+		self.pstl = dict()    # process timeline
 		self.testnumber = num
 		self.idstr = idchar[num]
-		self.dmesgtext = []
-		self.phases = []
-		self.dmesg = { # fixed list of 10 phases
-			'suspend_prepare': {'list': dict(), 'start': -1.0, 'end': -1.0,
-								'row': 0, 'color': '#CCFFCC', 'order': 0},
-			        'suspend': {'list': dict(), 'start': -1.0, 'end': -1.0,
-								'row': 0, 'color': '#88FF88', 'order': 1},
-			   'suspend_late': {'list': dict(), 'start': -1.0, 'end': -1.0,
-								'row': 0, 'color': '#00AA00', 'order': 2},
-			  'suspend_noirq': {'list': dict(), 'start': -1.0, 'end': -1.0,
-								'row': 0, 'color': '#008888', 'order': 3},
-		    'suspend_machine': {'list': dict(), 'start': -1.0, 'end': -1.0,
-								'row': 0, 'color': '#0000FF', 'order': 4},
-			 'resume_machine': {'list': dict(), 'start': -1.0, 'end': -1.0,
-								'row': 0, 'color': '#FF0000', 'order': 5},
-			   'resume_noirq': {'list': dict(), 'start': -1.0, 'end': -1.0,
-								'row': 0, 'color': '#FF9900', 'order': 6},
-			   'resume_early': {'list': dict(), 'start': -1.0, 'end': -1.0,
-								'row': 0, 'color': '#FFCC00', 'order': 7},
-			         'resume': {'list': dict(), 'start': -1.0, 'end': -1.0,
-								'row': 0, 'color': '#FFFF88', 'order': 8},
-			'resume_complete': {'list': dict(), 'start': -1.0, 'end': -1.0,
-								'row': 0, 'color': '#FFFFCC', 'order': 9}
-		}
-		self.phases = self.sortedPhases()
+		self.dmesgtext = []   # dmesg text file in memory
+		self.dmesg = dict()   # root data structure
+		self.errorinfo = {'suspend':[],'resume':[]}
+		self.tLow = []        # time spent in low-level suspends (standby/freeze)
+		self.devpids = []
+		self.devicegroups = 0
+	def sortedPhases(self):
+		return sorted(self.dmesg, key=lambda k:self.dmesg[k]['order'])
+	def initDevicegroups(self):
+		# called when phases are all finished being added
+		for phase in self.dmesg.keys():
+			if '*' in phase:
+				p = phase.split('*')
+				pnew = '%s%d' % (p[0], len(p))
+				self.dmesg[pnew] = self.dmesg.pop(phase)
 		self.devicegroups = []
-		for phase in self.phases:
+		for phase in self.sortedPhases():
 			self.devicegroups.append([phase])
-		self.errorinfo = {'suspend':[],'resume':[]}
+	def nextPhase(self, phase, offset):
+		order = self.dmesg[phase]['order'] + offset
+		for p in self.dmesg:
+			if self.dmesg[p]['order'] == order:
+				return p
+		return ''
+	def lastPhase(self):
+		plist = self.sortedPhases()
+		if len(plist) < 1:
+			return ''
+		return plist[-1]
 	def extractErrorInfo(self):
-		elist = {
-			'HWERROR' : '.*\[ *Hardware Error *\].*',
-			'FWBUG'   : '.*\[ *Firmware Bug *\].*',
-			'BUG'     : '.*BUG.*',
-			'ERROR'   : '.*ERROR.*',
-			'WARNING' : '.*WARNING.*',
-			'IRQ'     : '.*genirq: .*',
-			'TASKFAIL': '.*Freezing of tasks failed.*',
-		}
 		lf = sysvals.openlog(sysvals.dmesgfile, 'r')
 		i = 0
 		list = []
@@ -939,8 +981,8 @@ class Data:
 				continue
 			dir = 'suspend' if t < self.tSuspended else 'resume'
 			msg = m.group('msg')
-			for err in elist:
-				if re.match(elist[err], msg):
+			for err in self.errlist:
+				if re.match(self.errlist[err], msg):
 					list.append((err, dir, t, i, i))
 					self.kerror = True
 					break
@@ -956,7 +998,7 @@ class Data:
 	def setEnd(self, time):
 		self.end = time
 	def isTraceEventOutsideDeviceCalls(self, pid, time):
-		for phase in self.phases:
+		for phase in self.sortedPhases():
 			list = self.dmesg[phase]['list']
 			for dev in list:
 				d = list[dev]
@@ -964,16 +1006,10 @@ class Data:
 					time < d['end']):
 					return False
 		return True
-	def phaseCollision(self, phase, isbegin, line):
-		key = 'end'
-		if isbegin:
-			key = 'start'
-		if self.dmesg[phase][key] >= 0:
-			sysvals.vprint('IGNORE: %s' % line.strip())
-			return True
-		return False
 	def sourcePhase(self, start):
-		for phase in self.phases:
+		for phase in self.sortedPhases():
+			if 'machine' in phase:
+				continue
 			pend = self.dmesg[phase]['end']
 			if start <= pend:
 				return phase
@@ -1004,14 +1040,15 @@ class Data:
 		return tgtdev
 	def addDeviceFunctionCall(self, displayname, kprobename, proc, pid, start, end, cdata, rdata):
 		# try to place the call in a device
-		tgtdev = self.sourceDevice(self.phases, start, end, pid, 'device')
+		phases = self.sortedPhases()
+		tgtdev = self.sourceDevice(phases, start, end, pid, 'device')
 		# calls with device pids that occur outside device bounds are dropped
 		# TODO: include these somehow
 		if not tgtdev and pid in self.devpids:
 			return False
 		# try to place the call in a thread
 		if not tgtdev:
-			tgtdev = self.sourceDevice(self.phases, start, end, pid, 'thread')
+			tgtdev = self.sourceDevice(phases, start, end, pid, 'thread')
 		# create new thread blocks, expand as new calls are found
 		if not tgtdev:
 			if proc == '<...>':
@@ -1053,7 +1090,7 @@ class Data:
 	def overflowDevices(self):
 		# get a list of devices that extend beyond the end of this test run
 		devlist = []
-		for phase in self.phases:
+		for phase in self.sortedPhases():
 			list = self.dmesg[phase]['list']
 			for devname in list:
 				dev = list[devname]
@@ -1064,7 +1101,7 @@ class Data:
 		# merge any devices that overlap devlist
 		for dev in devlist:
 			devname = dev['name']
-			for phase in self.phases:
+			for phase in self.sortedPhases():
 				list = self.dmesg[phase]['list']
 				if devname not in list:
 					continue
@@ -1079,7 +1116,7 @@ class Data:
 				del list[devname]
 	def usurpTouchingThread(self, name, dev):
 		# the caller test has priority of this thread, give it to him
-		for phase in self.phases:
+		for phase in self.sortedPhases():
 			list = self.dmesg[phase]['list']
 			if name in list:
 				tdev = list[name]
@@ -1093,7 +1130,7 @@ class Data:
 				break
 	def stitchTouchingThreads(self, testlist):
 		# merge any threads between tests that touch
-		for phase in self.phases:
+		for phase in self.sortedPhases():
 			list = self.dmesg[phase]['list']
 			for devname in list:
 				dev = list[devname]
@@ -1103,7 +1140,7 @@ class Data:
 					data.usurpTouchingThread(devname, dev)
 	def optimizeDevSrc(self):
 		# merge any src call loops to reduce timeline size
-		for phase in self.phases:
+		for phase in self.sortedPhases():
 			list = self.dmesg[phase]['list']
 			for dev in list:
 				if 'src' not in list[dev]:
@@ -1141,7 +1178,7 @@ class Data:
 		self.tKernSus = self.trimTimeVal(self.tKernSus, t0, dT, left)
 		self.tKernRes = self.trimTimeVal(self.tKernRes, t0, dT, left)
 		self.end = self.trimTimeVal(self.end, t0, dT, left)
-		for phase in self.phases:
+		for phase in self.sortedPhases():
 			p = self.dmesg[phase]
 			p['start'] = self.trimTimeVal(p['start'], t0, dT, left)
 			p['end'] = self.trimTimeVal(p['end'], t0, dT, left)
@@ -1150,6 +1187,7 @@ class Data:
 				d = list[name]
 				d['start'] = self.trimTimeVal(d['start'], t0, dT, left)
 				d['end'] = self.trimTimeVal(d['end'], t0, dT, left)
+				d['length'] = d['end'] - d['start']
 				if('ftrace' in d):
 					cg = d['ftrace']
 					cg.start = self.trimTimeVal(cg.start, t0, dT, left)
@@ -1166,30 +1204,59 @@ class Data:
 				tm = self.trimTimeVal(tm, t0, dT, left)
 				list.append((type, tm, idx1, idx2))
 			self.errorinfo[dir] = list
-	def normalizeTime(self, tZero):
+	def trimFreezeTime(self, tZero):
 		# trim out any standby or freeze clock time
-		if(self.tSuspended != self.tResumed):
-			if(self.tResumed > tZero):
-				self.trimTime(self.tSuspended, \
-					self.tResumed-self.tSuspended, True)
-			else:
-				self.trimTime(self.tSuspended, \
-					self.tResumed-self.tSuspended, False)
+		lp = ''
+		for phase in self.sortedPhases():
+			if 'resume_machine' in phase and 'suspend_machine' in lp:
+				tS, tR = self.dmesg[lp]['end'], self.dmesg[phase]['start']
+				tL = tR - tS
+				if tL > 0:
+					left = True if tR > tZero else False
+					self.trimTime(tS, tL, left)
+					self.tLow.append('%.0f'%(tL*1000))
+			lp = phase
 	def getTimeValues(self):
-		sktime = (self.dmesg['suspend_machine']['end'] - \
-			self.tKernSus) * 1000
-		rktime = (self.dmesg['resume_complete']['end'] - \
-			self.dmesg['resume_machine']['start']) * 1000
+		if 'suspend_machine' in self.dmesg:
+			sktime = (self.dmesg['suspend_machine']['end'] - \
+				self.tKernSus) * 1000
+		else:
+			sktime = (self.tSuspended - self.tKernSus) * 1000
+		if 'resume_machine' in self.dmesg:
+			rktime = (self.tKernRes - \
+				self.dmesg['resume_machine']['start']) * 1000
+		else:
+			rktime = (self.tKernRes - self.tResumed) * 1000
 		return (sktime, rktime)
-	def setPhase(self, phase, ktime, isbegin):
+	def setPhase(self, phase, ktime, isbegin, order=-1):
 		if(isbegin):
+			# phase start over current phase
+			if self.currphase:
+				if 'resume_machine' not in self.currphase:
+					sysvals.vprint('WARNING: phase %s failed to end' % self.currphase)
+				self.dmesg[self.currphase]['end'] = ktime
+			phases = self.dmesg.keys()
+			color = self.phasedef[phase]['color']
+			count = len(phases) if order < 0 else order
+			# create unique name for every new phase
+			while phase in phases:
+				phase += '*'
+			self.dmesg[phase] = {'list': dict(), 'start': -1.0, 'end': -1.0,
+				'row': 0, 'color': color, 'order': count}
 			self.dmesg[phase]['start'] = ktime
+			self.currphase = phase
 		else:
+			# phase end without a start
+			if phase not in self.currphase:
+				if self.currphase:
+					sysvals.vprint('WARNING: %s ended instead of %s, ftrace corruption?' % (phase, self.currphase))
+				else:
+					sysvals.vprint('WARNING: %s ended without a start, ftrace corruption?' % phase)
+					return phase
+			phase = self.currphase
 			self.dmesg[phase]['end'] = ktime
-	def dmesgSortVal(self, phase):
-		return self.dmesg[phase]['order']
-	def sortedPhases(self):
-		return sorted(self.dmesg, key=self.dmesgSortVal)
+			self.currphase = ''
+		return phase
 	def sortedDevices(self, phase):
 		list = self.dmesg[phase]['list']
 		slist = []
@@ -1208,13 +1275,13 @@ class Data:
 		for devname in phaselist:
 			dev = phaselist[devname]
 			if(dev['end'] < 0):
-				for p in self.phases:
+				for p in self.sortedPhases():
 					if self.dmesg[p]['end'] > dev['start']:
 						dev['end'] = self.dmesg[p]['end']
 						break
 				sysvals.vprint('%s (%s): callback didnt return' % (devname, phase))
 	def deviceFilter(self, devicefilter):
-		for phase in self.phases:
+		for phase in self.sortedPhases():
 			list = self.dmesg[phase]['list']
 			rmlist = []
 			for name in list:
@@ -1229,7 +1296,7 @@ class Data:
 				del list[name]
 	def fixupInitcallsThatDidntReturn(self):
 		# if any calls never returned, clip them at system resume end
-		for phase in self.phases:
+		for phase in self.sortedPhases():
 			self.fixupInitcalls(phase)
 	def phaseOverlap(self, phases):
 		rmgroups = []
@@ -1248,17 +1315,18 @@ class Data:
 		self.devicegroups.append(newgroup)
 	def newActionGlobal(self, name, start, end, pid=-1, color=''):
 		# which phase is this device callback or action in
+		phases = self.sortedPhases()
 		targetphase = 'none'
 		htmlclass = ''
 		overlap = 0.0
-		phases = []
-		for phase in self.phases:
+		myphases = []
+		for phase in phases:
 			pstart = self.dmesg[phase]['start']
 			pend = self.dmesg[phase]['end']
 			# see if the action overlaps this phase
 			o = max(0, min(end, pend) - max(start, pstart))
 			if o > 0:
-				phases.append(phase)
+				myphases.append(phase)
 			# set the target phase to the one that overlaps most
 			if o > overlap:
 				if overlap > 0 and phase == 'post_resume':
@@ -1267,19 +1335,19 @@ class Data:
 				overlap = o
 		# if no target phase was found, pin it to the edge
 		if targetphase == 'none':
-			p0start = self.dmesg[self.phases[0]]['start']
+			p0start = self.dmesg[phases[0]]['start']
 			if start <= p0start:
-				targetphase = self.phases[0]
+				targetphase = phases[0]
 			else:
-				targetphase = self.phases[-1]
+				targetphase = phases[-1]
 		if pid == -2:
 			htmlclass = ' bg'
 		elif pid == -3:
 			htmlclass = ' ps'
-		if len(phases) > 1:
+		if len(myphases) > 1:
 			htmlclass = ' bg'
-			self.phaseOverlap(phases)
-		if targetphase in self.phases:
+			self.phaseOverlap(myphases)
+		if targetphase in phases:
 			newname = self.newAction(targetphase, name, pid, '', start, end, '', htmlclass, color)
 			return (targetphase, newname)
 		return False
@@ -1315,7 +1383,7 @@ class Data:
 		sysvals.vprint('Timeline Details:')
 		sysvals.vprint('          test start: %f' % self.start)
 		sysvals.vprint('kernel suspend start: %f' % self.tKernSus)
-		for phase in self.phases:
+		for phase in self.sortedPhases():
 			dc = len(self.dmesg[phase]['list'])
 			sysvals.vprint('    %16s: %f - %f (%d devices)' % (phase, \
 				self.dmesg[phase]['start'], self.dmesg[phase]['end'], dc))
@@ -1323,7 +1391,7 @@ class Data:
 		sysvals.vprint('            test end: %f' % self.end)
 	def deviceChildrenAllPhases(self, devname):
 		devlist = []
-		for phase in self.phases:
+		for phase in self.sortedPhases():
 			list = self.deviceChildren(devname, phase)
 			for dev in list:
 				if dev not in devlist:
@@ -1344,7 +1412,7 @@ class Data:
 		if node.name:
 			info = ''
 			drv = ''
-			for phase in self.phases:
+			for phase in self.sortedPhases():
 				list = self.dmesg[phase]['list']
 				if node.name in list:
 					s = list[node.name]['start']
@@ -1478,8 +1546,29 @@ class Data:
 			c = self.addProcessUsageEvent(ps, tres)
 			if c > 0:
 				sysvals.vprint('%25s (res): %d' % (ps, c))
+	def handleEndMarker(self, time):
+		dm = self.dmesg
+		self.setEnd(time)
+		self.initDevicegroups()
+		# give suspend_prepare an end if needed
+		if 'suspend_prepare' in dm and dm['suspend_prepare']['end'] < 0:
+			dm['suspend_prepare']['end'] = time
+		# assume resume machine ends at next phase start
+		if 'resume_machine' in dm and dm['resume_machine']['end'] < 0:
+			np = self.nextPhase('resume_machine', 1)
+			if np:
+				dm['resume_machine']['end'] = dm[np]['start']
+		# if kernel resume end not found, assume its the end marker
+		if self.tKernRes == 0.0:
+			self.tKernRes = time
+		# if kernel suspend start not found, assume its the end marker
+		if self.tKernSus == 0.0:
+			self.tKernSus = time
+		# set resume complete to end at end marker
+		if 'resume_complete' in dm:
+			dm['resume_complete']['end'] = time
 	def debugPrint(self):
-		for p in self.phases:
+		for p in self.sortedPhases():
 			list = self.dmesg[p]['list']
 			for devname in list:
 				dev = list[devname]
@@ -1490,9 +1579,9 @@ class Data:
 # Description:
 #	 A container for kprobe function data we want in the dev timeline
 class DevFunction:
-	row = 0
-	count = 1
 	def __init__(self, name, args, caller, ret, start, end, u, proc, pid, color):
+		self.row = 0
+		self.count = 1
 		self.name = name
 		self.args = args
 		self.caller = caller
@@ -1546,16 +1635,15 @@ class DevFunction:
 #			 suspend_resume: phase or custom exec block data
 #			 device_pm_callback: device callback info
 class FTraceLine:
-	time = 0.0
-	length = 0.0
-	fcall = False
-	freturn = False
-	fevent = False
-	fkprobe = False
-	depth = 0
-	name = ''
-	type = ''
 	def __init__(self, t, m='', d=''):
+		self.length = 0.0
+		self.fcall = False
+		self.freturn = False
+		self.fevent = False
+		self.fkprobe = False
+		self.depth = 0
+		self.name = ''
+		self.type = ''
 		self.time = float(t)
 		if not m and not d:
 			return
@@ -1675,19 +1763,13 @@ class FTraceLine:
 #	 Each instance is tied to a single device in a single phase, and is
 #	 comprised of an ordered list of FTraceLine objects
 class FTraceCallGraph:
-	id = ''
-	start = -1.0
-	end = -1.0
-	list = []
-	invalid = False
-	depth = 0
-	pid = 0
-	name = ''
-	partial = False
 	vfname = 'missing_function_name'
-	ignore = False
-	sv = 0
 	def __init__(self, pid, sv):
+		self.id = ''
+		self.invalid = False
+		self.name = ''
+		self.partial = False
+		self.ignore = False
 		self.start = -1.0
 		self.end = -1.0
 		self.list = []
@@ -1943,7 +2025,7 @@ class FTraceCallGraph:
 						dev['ftrace'] = cg
 					found = devname
 			return found
-		for p in data.phases:
+		for p in data.sortedPhases():
 			if(data.dmesg[p]['start'] <= self.start and
 				self.start <= data.dmesg[p]['end']):
 				list = data.dmesg[p]['list']
@@ -1966,7 +2048,7 @@ class FTraceCallGraph:
 		if fs < data.start or fe > data.end:
 			return
 		phase = ''
-		for p in data.phases:
+		for p in data.sortedPhases():
 			if(data.dmesg[p]['start'] <= self.start and
 				self.start < data.dmesg[p]['end']):
 				phase = p
@@ -2008,23 +2090,20 @@ class DevItem:
 #	 A container for a device timeline which calculates
 #	 all the html properties to display it correctly
 class Timeline:
-	html = ''
-	height = 0	# total timeline height
-	scaleH = 20	# timescale (top) row height
-	rowH = 30	# device row height
-	bodyH = 0	# body height
-	rows = 0	# total timeline rows
-	rowlines = dict()
-	rowheight = dict()
 	html_tblock = '<div id="block{0}" class="tblock" style="left:{1}%;width:{2}%;"><div class="tback" style="height:{3}px"></div>\n'
 	html_device = '<div id="{0}" title="{1}" class="thread{7}" style="left:{2}%;top:{3}px;height:{4}px;width:{5}%;{8}">{6}</div>\n'
 	html_phase = '<div class="phase" style="left:{0}%;width:{1}%;top:{2}px;height:{3}px;background:{4}">{5}</div>\n'
 	html_phaselet = '<div id="{0}" class="phaselet" style="left:{1}%;width:{2}%;background:{3}"></div>\n'
 	html_legend = '<div id="p{3}" class="square" style="left:{0}%;background:{1}">&nbsp;{2}</div>\n'
 	def __init__(self, rowheight, scaleheight):
-		self.rowH = rowheight
-		self.scaleH = scaleheight
 		self.html = ''
+		self.height = 0  # total timeline height
+		self.scaleH = scaleheight # timescale (top) row height
+		self.rowH = rowheight     # device row height
+		self.bodyH = 0   # body height
+		self.rows = 0    # total timeline rows
+		self.rowlines = dict()
+		self.rowheight = dict()
 	def createHeader(self, sv, stamp):
 		if(not stamp['time']):
 			return
@@ -2251,18 +2330,18 @@ class Timeline:
 # Description:
 #	 A list of values describing the properties of these test runs
 class TestProps:
-	stamp = ''
-	sysinfo = ''
-	cmdline = ''
-	kparams = ''
-	S0i3 = False
-	fwdata = []
 	stampfmt = '# [a-z]*-(?P<m>[0-9]{2})(?P<d>[0-9]{2})(?P<y>[0-9]{2})-'+\
 				'(?P<H>[0-9]{2})(?P<M>[0-9]{2})(?P<S>[0-9]{2})'+\
 				' (?P<host>.*) (?P<mode>.*) (?P<kernel>.*)$'
+	batteryfmt = '^# battery (?P<a1>\w*) (?P<c1>\d*) (?P<a2>\w*) (?P<c2>\d*)'
+	testerrfmt = '^# enter_sleep_error (?P<e>.*)'
 	sysinfofmt = '^# sysinfo .*'
 	cmdlinefmt = '^# command \| (?P<cmd>.*)'
 	kparamsfmt = '^# kparams \| (?P<kp>.*)'
+	devpropfmt = '# Device Properties: .*'
+	tracertypefmt = '# tracer: (?P<t>.*)'
+	firmwarefmt = '# fwsuspend (?P<s>[0-9]*) fwresume (?P<r>[0-9]*)$'
+	procexecfmt = 'ps - (?P<ps>.*)$'
 	ftrace_line_fmt_fg = \
 		'^ *(?P<time>[0-9\.]*) *\| *(?P<cpu>[0-9]*)\)'+\
 		' *(?P<proc>.*)-(?P<pid>[0-9]*) *\|'+\
@@ -2271,11 +2350,17 @@ class TestProps:
 		' *(?P<proc>.*)-(?P<pid>[0-9]*) *\[(?P<cpu>[0-9]*)\] *'+\
 		'(?P<flags>.{4}) *(?P<time>[0-9\.]*): *'+\
 		'(?P<msg>.*)'
-	ftrace_line_fmt = ftrace_line_fmt_nop
-	cgformat = False
-	data = 0
-	ktemp = dict()
 	def __init__(self):
+		self.stamp = ''
+		self.sysinfo = ''
+		self.cmdline = ''
+		self.kparams = ''
+		self.testerror = []
+		self.battery = []
+		self.fwdata = []
+		self.ftrace_line_fmt = self.ftrace_line_fmt_nop
+		self.cgformat = False
+		self.data = 0
 		self.ktemp = dict()
 	def setTracerType(self, tracer):
 		if(tracer == 'function_graph'):
@@ -2286,6 +2371,7 @@ class TestProps:
 		else:
 			doError('Invalid tracer format: [%s]' % tracer)
 	def parseStamp(self, data, sv):
+		# global test data
 		m = re.match(self.stampfmt, self.stamp)
 		data.stamp = {'time': '', 'host': '', 'mode': ''}
 		dt = datetime(int(m.group('y'))+2000, int(m.group('m')),
@@ -2324,23 +2410,36 @@ class TestProps:
 				sv.kparams = m.group('kp')
 		if not sv.stamp:
 			sv.stamp = data.stamp
+		# firmware data
+		if sv.suspendmode == 'mem' and len(self.fwdata) > data.testnumber:
+			data.fwSuspend, data.fwResume = self.fwdata[data.testnumber]
+			if(data.fwSuspend > 0 or data.fwResume > 0):
+				data.fwValid = True
+		# battery data
+		if len(self.battery) > data.testnumber:
+			m = re.match(self.batteryfmt, self.battery[data.testnumber])
+			if m:
+				data.battery = m.groups()
+		# sleep mode enter errors
+		if len(self.testerror) > data.testnumber:
+			m = re.match(self.testerrfmt, self.testerror[data.testnumber])
+			if m:
+				data.enterfail = m.group('e')
 
 # Class: TestRun
 # Description:
 #	 A container for a suspend/resume test run. This is necessary as
 #	 there could be more than one, and they need to be separate.
 class TestRun:
-	ftemp = dict()
-	ttemp = dict()
-	data = 0
 	def __init__(self, dataobj):
 		self.data = dataobj
 		self.ftemp = dict()
 		self.ttemp = dict()
 
 class ProcessMonitor:
-	proclist = dict()
-	running = False
+	def __init__(self):
+		self.proclist = dict()
+		self.running = False
 	def procstat(self):
 		c = ['cat /proc/[1-9]*/stat 2>/dev/null']
 		process = Popen(c, shell=True, stdout=PIPE)
@@ -2391,8 +2490,8 @@ class ProcessMonitor:
 #	 markers, and/or kprobes required for primary parsing.
 def doesTraceLogHaveTraceEvents():
 	kpcheck = ['_cal: (', '_cpu_down()']
-	techeck = ['suspend_resume']
-	tmcheck = ['SUSPEND START', 'RESUME COMPLETE']
+	techeck = ['suspend_resume', 'device_pm_callback']
+	tmcheck = ['tracing_mark_write']
 	sysvals.usekprobes = False
 	fp = sysvals.openlog(sysvals.ftracefile, 'r')
 	for line in fp:
@@ -2414,23 +2513,14 @@ def doesTraceLogHaveTraceEvents():
 				check.remove(i)
 		tmcheck = check
 	fp.close()
-	if len(techeck) == 0:
-		sysvals.usetraceevents = True
-	else:
-		sysvals.usetraceevents = False
-	if len(tmcheck) == 0:
-		sysvals.usetracemarkers = True
-	else:
-		sysvals.usetracemarkers = False
+	sysvals.usetraceevents = True if len(techeck) < 2 else False
+	sysvals.usetracemarkers = True if len(tmcheck) == 0 else False
 
 # Function: appendIncompleteTraceLog
 # Description:
 #	 [deprecated for kernel 3.15 or newer]
-#	 Legacy support of ftrace outputs that lack the device_pm_callback
-#	 and/or suspend_resume trace events. The primary data should be
-#	 taken from dmesg, and this ftrace is used only for callgraph data
-#	 or custom actions in the timeline. The data is appended to the Data
-#	 objects provided.
+#	 Adds callgraph data which lacks trace event data. This is only
+#	 for timelines generated from 3.15 or older
 # Arguments:
 #	 testruns: the array of Data objects obtained from parseKernelLog
 def appendIncompleteTraceLog(testruns):
@@ -2460,13 +2550,19 @@ def appendIncompleteTraceLog(testruns):
 		elif re.match(tp.cmdlinefmt, line):
 			tp.cmdline = line
 			continue
+		elif re.match(tp.batteryfmt, line):
+			tp.battery.append(line)
+			continue
+		elif re.match(tp.testerrfmt, line):
+			tp.testerror.append(line)
+			continue
 		# determine the trace data type (required for further parsing)
-		m = re.match(sysvals.tracertypefmt, line)
+		m = re.match(tp.tracertypefmt, line)
 		if(m):
 			tp.setTracerType(m.group('t'))
 			continue
 		# device properties line
-		if(re.match(sysvals.devpropfmt, line)):
+		if(re.match(tp.devpropfmt, line)):
 			devProps(line)
 			continue
 		# parse only valid lines, if this is not one move on
@@ -2506,87 +2602,7 @@ def appendIncompleteTraceLog(testruns):
 			continue
 		# trace event processing
 		if(t.fevent):
-			# general trace events have two types, begin and end
-			if(re.match('(?P<name>.*) begin$', t.name)):
-				isbegin = True
-			elif(re.match('(?P<name>.*) end$', t.name)):
-				isbegin = False
-			else:
-				continue
-			m = re.match('(?P<name>.*)\[(?P<val>[0-9]*)\] .*', t.name)
-			if(m):
-				val = m.group('val')
-				if val == '0':
-					name = m.group('name')
-				else:
-					name = m.group('name')+'['+val+']'
-			else:
-				m = re.match('(?P<name>.*) .*', t.name)
-				name = m.group('name')
-			# special processing for trace events
-			if re.match('dpm_prepare\[.*', name):
-				continue
-			elif re.match('machine_suspend.*', name):
-				continue
-			elif re.match('suspend_enter\[.*', name):
-				if(not isbegin):
-					data.dmesg['suspend_prepare']['end'] = t.time
-				continue
-			elif re.match('dpm_suspend\[.*', name):
-				if(not isbegin):
-					data.dmesg['suspend']['end'] = t.time
-				continue
-			elif re.match('dpm_suspend_late\[.*', name):
-				if(isbegin):
-					data.dmesg['suspend_late']['start'] = t.time
-				else:
-					data.dmesg['suspend_late']['end'] = t.time
-				continue
-			elif re.match('dpm_suspend_noirq\[.*', name):
-				if(isbegin):
-					data.dmesg['suspend_noirq']['start'] = t.time
-				else:
-					data.dmesg['suspend_noirq']['end'] = t.time
-				continue
-			elif re.match('dpm_resume_noirq\[.*', name):
-				if(isbegin):
-					data.dmesg['resume_machine']['end'] = t.time
-					data.dmesg['resume_noirq']['start'] = t.time
-				else:
-					data.dmesg['resume_noirq']['end'] = t.time
-				continue
-			elif re.match('dpm_resume_early\[.*', name):
-				if(isbegin):
-					data.dmesg['resume_early']['start'] = t.time
-				else:
-					data.dmesg['resume_early']['end'] = t.time
-				continue
-			elif re.match('dpm_resume\[.*', name):
-				if(isbegin):
-					data.dmesg['resume']['start'] = t.time
-				else:
-					data.dmesg['resume']['end'] = t.time
-				continue
-			elif re.match('dpm_complete\[.*', name):
-				if(isbegin):
-					data.dmesg['resume_complete']['start'] = t.time
-				else:
-					data.dmesg['resume_complete']['end'] = t.time
-				continue
-			# skip trace events inside devices calls
-			if(not data.isTraceEventOutsideDeviceCalls(pid, t.time)):
-				continue
-			# global events (outside device calls) are simply graphed
-			if(isbegin):
-				# store each trace event in ttemp
-				if(name not in testrun[testidx].ttemp):
-					testrun[testidx].ttemp[name] = []
-				testrun[testidx].ttemp[name].append(\
-					{'begin': t.time, 'end': t.time})
-			else:
-				# finish off matching trace event in ttemp
-				if(name in testrun[testidx].ttemp):
-					testrun[testidx].ttemp[name][-1]['end'] = t.time
+			continue
 		# call/return processing
 		elif sysvals.usecallgraph:
 			# create a callgraph object for the data
@@ -2603,12 +2619,6 @@ def appendIncompleteTraceLog(testruns):
 	tf.close()
 
 	for test in testrun:
-		# add the traceevent data to the device hierarchy
-		if(sysvals.usetraceevents):
-			for name in test.ttemp:
-				for event in test.ttemp[name]:
-					test.data.newActionGlobal(name, event['begin'], event['end'])
-
 		# add the callgraph data to the device hierarchy
 		for pid in test.ftemp:
 			for cg in test.ftemp[pid]:
@@ -2621,7 +2631,7 @@ def appendIncompleteTraceLog(testruns):
 					continue
 				callstart = cg.start
 				callend = cg.end
-				for p in test.data.phases:
+				for p in test.data.sortedPhases():
 					if(test.data.dmesg[p]['start'] <= callstart and
 						callstart <= test.data.dmesg[p]['end']):
 						list = test.data.dmesg[p]['list']
@@ -2648,10 +2658,12 @@ def parseTraceLog(live=False):
 		doError('%s does not exist' % sysvals.ftracefile)
 	if not live:
 		sysvals.setupAllKprobes()
+	krescalls = ['pm_notifier_call_chain', 'pm_restore_console']
 	tracewatch = []
 	if sysvals.usekprobes:
 		tracewatch += ['sync_filesystems', 'freeze_processes', 'syscore_suspend',
-			'syscore_resume', 'resume_console', 'thaw_processes', 'CPU_ON', 'CPU_OFF']
+			'syscore_resume', 'resume_console', 'thaw_processes', 'CPU_ON',
+			'CPU_OFF', 'timekeeping_freeze', 'acpi_suspend']
 
 	# extract the callgraph and traceevent data
 	tp = TestProps()
@@ -2674,18 +2686,24 @@ def parseTraceLog(live=False):
 		elif re.match(tp.cmdlinefmt, line):
 			tp.cmdline = line
 			continue
+		elif re.match(tp.batteryfmt, line):
+			tp.battery.append(line)
+			continue
+		elif re.match(tp.testerrfmt, line):
+			tp.testerror.append(line)
+			continue
 		# firmware line: pull out any firmware data
-		m = re.match(sysvals.firmwarefmt, line)
+		m = re.match(tp.firmwarefmt, line)
 		if(m):
 			tp.fwdata.append((int(m.group('s')), int(m.group('r'))))
 			continue
 		# tracer type line: determine the trace data type
-		m = re.match(sysvals.tracertypefmt, line)
+		m = re.match(tp.tracertypefmt, line)
 		if(m):
 			tp.setTracerType(m.group('t'))
 			continue
 		# device properties line
-		if(re.match(sysvals.devpropfmt, line)):
+		if(re.match(tp.devpropfmt, line)):
 			devProps(line)
 			continue
 		# ignore all other commented lines
@@ -2714,20 +2732,19 @@ def parseTraceLog(live=False):
 			continue
 		# find the start of suspend
 		if(t.startMarker()):
-			phase = 'suspend_prepare'
 			data = Data(len(testdata))
 			testdata.append(data)
 			testrun = TestRun(data)
 			testruns.append(testrun)
 			tp.parseStamp(data, sysvals)
 			data.setStart(t.time)
-			data.tKernSus = t.time
+			phase = data.setPhase('suspend_prepare', t.time, True)
 			continue
 		if(not data):
 			continue
 		# process cpu exec line
 		if t.type == 'tracing_mark_write':
-			m = re.match(sysvals.procexecfmt, t.name)
+			m = re.match(tp.procexecfmt, t.name)
 			if(m):
 				proclist = dict()
 				for ps in m.group('ps').split(','):
@@ -2740,28 +2757,17 @@ def parseTraceLog(live=False):
 				continue
 		# find the end of resume
 		if(t.endMarker()):
-			data.setEnd(t.time)
-			if data.tKernRes == 0.0:
-				data.tKernRes = t.time
-			if data.dmesg['resume_complete']['end'] < 0:
-				data.dmesg['resume_complete']['end'] = t.time
-			if sysvals.suspendmode == 'mem' and len(tp.fwdata) > data.testnumber:
-				data.fwSuspend, data.fwResume = tp.fwdata[data.testnumber]
-				if(data.tSuspended != 0 and data.tResumed != 0 and \
-					(data.fwSuspend > 0 or data.fwResume > 0)):
-					data.fwValid = True
+			data.handleEndMarker(t.time)
 			if(not sysvals.usetracemarkers):
 				# no trace markers? then quit and be sure to finish recording
 				# the event we used to trigger resume end
-				if(len(testrun.ttemp['thaw_processes']) > 0):
+				if('thaw_processes' in testrun.ttemp and len(testrun.ttemp['thaw_processes']) > 0):
 					# if an entry exists, assume this is its end
 					testrun.ttemp['thaw_processes'][-1]['end'] = t.time
 				break
 			continue
 		# trace event processing
 		if(t.fevent):
-			if(phase == 'post_resume'):
-				data.setEnd(t.time)
 			if(t.type == 'suspend_resume'):
 				# suspend_resume trace events have two types, begin and end
 				if(re.match('(?P<name>.*) begin$', t.name)):
@@ -2786,86 +2792,61 @@ def parseTraceLog(live=False):
 				# -- phase changes --
 				# start of kernel suspend
 				if(re.match('suspend_enter\[.*', t.name)):
-					if(isbegin and data.start == data.tKernSus):
-						data.dmesg[phase]['start'] = t.time
+					if(isbegin):
 						data.tKernSus = t.time
 					continue
 				# suspend_prepare start
 				elif(re.match('dpm_prepare\[.*', t.name)):
 					phase = 'suspend_prepare'
-					if(not isbegin):
-						data.dmesg[phase]['end'] = t.time
-						if data.dmesg[phase]['start'] < 0:
-							data.dmesg[phase]['start'] = data.start
+					if not isbegin:
+						data.setPhase(phase, t.time, isbegin)
+					if isbegin and data.tKernSus == 0:
+						data.tKernSus = t.time
 					continue
 				# suspend start
 				elif(re.match('dpm_suspend\[.*', t.name)):
-					phase = 'suspend'
-					data.setPhase(phase, t.time, isbegin)
+					phase = data.setPhase('suspend', t.time, isbegin)
 					continue
 				# suspend_late start
 				elif(re.match('dpm_suspend_late\[.*', t.name)):
-					phase = 'suspend_late'
-					data.setPhase(phase, t.time, isbegin)
+					phase = data.setPhase('suspend_late', t.time, isbegin)
 					continue
 				# suspend_noirq start
 				elif(re.match('dpm_suspend_noirq\[.*', t.name)):
-					if data.phaseCollision('suspend_noirq', isbegin, line):
-						continue
-					phase = 'suspend_noirq'
-					data.setPhase(phase, t.time, isbegin)
-					if(not isbegin):
-						phase = 'suspend_machine'
-						data.dmesg[phase]['start'] = t.time
+					phase = data.setPhase('suspend_noirq', t.time, isbegin)
 					continue
 				# suspend_machine/resume_machine
 				elif(re.match('machine_suspend\[.*', t.name)):
 					if(isbegin):
-						phase = 'suspend_machine'
-						data.dmesg[phase]['end'] = t.time
-						data.tSuspended = t.time
-					else:
-						if(sysvals.suspendmode in ['mem', 'disk'] and not tp.S0i3):
-							data.dmesg['suspend_machine']['end'] = t.time
+						lp = data.lastPhase()
+						phase = data.setPhase('suspend_machine', data.dmesg[lp]['end'], True)
+						data.setPhase(phase, t.time, False)
+						if data.tSuspended == 0:
 							data.tSuspended = t.time
-						phase = 'resume_machine'
-						data.dmesg[phase]['start'] = t.time
-						data.tResumed = t.time
-						data.tLow = data.tResumed - data.tSuspended
-					continue
-				# acpi_suspend
-				elif(re.match('acpi_suspend\[.*', t.name)):
-					# acpi_suspend[0] S0i3
-					if(re.match('acpi_suspend\[0\] begin', t.name)):
-						if(sysvals.suspendmode == 'mem'):
-							tp.S0i3 = True
-							data.dmesg['suspend_machine']['end'] = t.time
+					else:
+						phase = data.setPhase('resume_machine', t.time, True)
+						if(sysvals.suspendmode in ['mem', 'disk']):
+							if 'suspend_machine' in data.dmesg:
+								data.dmesg['suspend_machine']['end'] = t.time
 							data.tSuspended = t.time
+						if data.tResumed == 0:
+							data.tResumed = t.time
 					continue
 				# resume_noirq start
 				elif(re.match('dpm_resume_noirq\[.*', t.name)):
-					if data.phaseCollision('resume_noirq', isbegin, line):
-						continue
-					phase = 'resume_noirq'
-					data.setPhase(phase, t.time, isbegin)
-					if(isbegin):
-						data.dmesg['resume_machine']['end'] = t.time
+					phase = data.setPhase('resume_noirq', t.time, isbegin)
 					continue
 				# resume_early start
 				elif(re.match('dpm_resume_early\[.*', t.name)):
-					phase = 'resume_early'
-					data.setPhase(phase, t.time, isbegin)
+					phase = data.setPhase('resume_early', t.time, isbegin)
 					continue
 				# resume start
 				elif(re.match('dpm_resume\[.*', t.name)):
-					phase = 'resume'
-					data.setPhase(phase, t.time, isbegin)
+					phase = data.setPhase('resume', t.time, isbegin)
 					continue
 				# resume complete start
 				elif(re.match('dpm_complete\[.*', t.name)):
-					phase = 'resume_complete'
-					if(isbegin):
-						data.dmesg[phase]['start'] = t.time
+					phase = data.setPhase('resume_complete', t.time, isbegin)
 					continue
 				# skip trace events inside devices calls
 				if(not data.isTraceEventOutsideDeviceCalls(pid, t.time)):
@@ -2881,13 +2862,10 @@ def parseTraceLog(live=False):
 					if(len(testrun.ttemp[name]) > 0):
 						# if an entry exists, assume this is its end
 						testrun.ttemp[name][-1]['end'] = t.time
-					elif(phase == 'post_resume'):
-						# post resume events can just have ends
-						testrun.ttemp[name].append({
-							'begin': data.dmesg[phase]['start'],
-							'end': t.time})
 			# device callback start
 			elif(t.type == 'device_pm_callback_start'):
+				if phase not in data.dmesg:
+					continue
 				m = re.match('(?P<drv>.*) (?P<d>.*), parent: *(?P<p>.*), .*',\
 					t.name);
 				if(not m):
@@ -2901,6 +2879,8 @@ def parseTraceLog(live=False):
 						data.devpids.append(pid)
 			# device callback finish
 			elif(t.type == 'device_pm_callback_end'):
+				if phase not in data.dmesg:
+					continue
 				m = re.match('(?P<drv>.*) (?P<d>.*), err.*', t.name);
 				if(not m):
 					continue
@@ -2941,9 +2921,9 @@ def parseTraceLog(live=False):
 					e['end'] = t.time
 					e['rdata'] = kprobedata
 				# end of kernel resume
-				if(kprobename == 'pm_notifier_call_chain' or \
-					kprobename == 'pm_restore_console'):
-					data.dmesg[phase]['end'] = t.time
+				if(phase != 'suspend_prepare' and kprobename in krescalls):
+					if phase in data.dmesg:
+						data.dmesg[phase]['end'] = t.time
 					data.tKernRes = t.time
 
 		# callgraph processing
@@ -2961,10 +2941,13 @@ def parseTraceLog(live=False):
 			if(res == -1):
 				testrun.ftemp[key][-1].addLine(t)
 	tf.close()
+	if data and not data.devicegroups:
+		sysvals.vprint('WARNING: end marker is missing')
+		data.handleEndMarker(t.time)
 
 	if sysvals.suspendmode == 'command':
 		for test in testruns:
-			for p in test.data.phases:
+			for p in test.data.sortedPhases():
 				if p == 'suspend_prepare':
 					test.data.dmesg[p]['start'] = test.data.start
 					test.data.dmesg[p]['end'] = test.data.end
@@ -2973,7 +2956,6 @@ def parseTraceLog(live=False):
 					test.data.dmesg[p]['end'] = test.data.end
 			test.data.tSuspended = test.data.end
 			test.data.tResumed = test.data.end
-			test.data.tLow = 0
 			test.data.fwValid = False
 
 	# dev source and procmon events can be unreadable with mixed phase height
@@ -3040,8 +3022,8 @@ def parseTraceLog(live=False):
 						sortkey = '%f%f%d' % (cg.start, cg.end, pid)
 						sortlist[sortkey] = cg
 					elif len(cg.list) > 1000000:
-						print 'WARNING: the callgraph for %s is massive (%d lines)' %\
-							(devname, len(cg.list))
+						sysvals.vprint('WARNING: the callgraph for %s is massive (%d lines)' %\
+							(devname, len(cg.list)))
 			# create blocks for orphan cg data
 			for sortkey in sorted(sortlist):
 				cg = sortlist[sortkey]
@@ -3057,25 +3039,34 @@ def parseTraceLog(live=False):
 	for data in testdata:
 		tn = '' if len(testdata) == 1 else ('%d' % (data.testnumber + 1))
 		terr = ''
-		lp = data.phases[0]
-		for p in data.phases:
-			if(data.dmesg[p]['start'] < 0 and data.dmesg[p]['end'] < 0):
+		phasedef = data.phasedef
+		lp = 'suspend_prepare'
+		for p in sorted(phasedef, key=lambda k:phasedef[k]['order']):
+			if p not in data.dmesg:
 				if not terr:
 					print 'TEST%s FAILED: %s failed in %s phase' % (tn, sysvals.suspendmode, lp)
 					terr = '%s%s failed in %s phase' % (sysvals.suspendmode, tn, lp)
 					error.append(terr)
+					if data.tSuspended == 0:
+						data.tSuspended = data.dmesg[lp]['end']
+					if data.tResumed == 0:
+						data.tResumed = data.dmesg[lp]['end']
+					data.fwValid = False
 				sysvals.vprint('WARNING: phase "%s" is missing!' % p)
-			if(data.dmesg[p]['start'] < 0):
-				data.dmesg[p]['start'] = data.dmesg[lp]['end']
-				if(p == 'resume_machine'):
-					data.tSuspended = data.dmesg[lp]['end']
-					data.tResumed = data.dmesg[lp]['end']
-					data.tLow = 0
-			if(data.dmesg[p]['end'] < 0):
-				data.dmesg[p]['end'] = data.dmesg[p]['start']
+			lp = p
+		if not terr and data.enterfail:
+			print 'test%s FAILED: enter %s failed with %s' % (tn, sysvals.suspendmode, data.enterfail)
+			terr = 'test%s failed to enter %s mode' % (tn, sysvals.suspendmode)
+			error.append(terr)
+		lp = data.sortedPhases()[0]
+		for p in data.sortedPhases():
 			if(p != lp and not ('machine' in p and 'machine' in lp)):
 				data.dmesg[lp]['end'] = data.dmesg[p]['start']
 			lp = p
+		if data.tSuspended == 0:
+			data.tSuspended = data.tKernRes
+		if data.tResumed == 0:
+			data.tResumed = data.tSuspended
 
 		if(len(sysvals.devicefilter) > 0):
 			data.deviceFilter(sysvals.devicefilter)
@@ -3127,7 +3118,13 @@ def loadKernelLog():
 		elif re.match(tp.cmdlinefmt, line):
 			tp.cmdline = line
 			continue
-		m = re.match(sysvals.firmwarefmt, line)
+		elif re.match(tp.batteryfmt, line):
+			tp.battery.append(line)
+			continue
+		elif re.match(tp.testerrfmt, line):
+			tp.testerror.append(line)
+			continue
+		m = re.match(tp.firmwarefmt, line)
 		if(m):
 			tp.fwdata.append((int(m.group('s')), int(m.group('r'))))
 			continue
@@ -3140,10 +3137,6 @@ def loadKernelLog():
 				testruns.append(data)
 			data = Data(len(testruns))
 			tp.parseStamp(data, sysvals)
-			if len(tp.fwdata) > data.testnumber:
-				data.fwSuspend, data.fwResume = tp.fwdata[data.testnumber]
-				if(data.fwSuspend > 0 or data.fwResume > 0):
-					data.fwValid = True
 		if(not data):
 			continue
 		m = re.match('.* *(?P<k>[0-9]\.[0-9]{2}\.[0-9]-.*) .*', msg)
@@ -3199,30 +3192,30 @@ def parseKernelLog(data):
 
 	# dmesg phase match table
 	dm = {
-		'suspend_prepare': 'PM: Syncing filesystems.*',
-		        'suspend': 'PM: Entering [a-z]* sleep.*',
-		   'suspend_late': 'PM: suspend of devices complete after.*',
-		  'suspend_noirq': 'PM: late suspend of devices complete after.*',
-		'suspend_machine': 'PM: noirq suspend of devices complete after.*',
-		 'resume_machine': 'ACPI: Low-level resume complete.*',
-		   'resume_noirq': 'ACPI: Waking up from system sleep state.*',
-		   'resume_early': 'PM: noirq resume of devices complete after.*',
-		         'resume': 'PM: early resume of devices complete after.*',
-		'resume_complete': 'PM: resume of devices complete after.*',
-		    'post_resume': '.*Restarting tasks \.\.\..*',
+		'suspend_prepare': ['PM: Syncing filesystems.*'],
+		        'suspend': ['PM: Entering [a-z]* sleep.*', 'Suspending console.*'],
+		   'suspend_late': ['PM: suspend of devices complete after.*'],
+		  'suspend_noirq': ['PM: late suspend of devices complete after.*'],
+		'suspend_machine': ['PM: noirq suspend of devices complete after.*'],
+		 'resume_machine': ['ACPI: Low-level resume complete.*'],
+		   'resume_noirq': ['ACPI: Waking up from system sleep state.*'],
+		   'resume_early': ['PM: noirq resume of devices complete after.*'],
+		         'resume': ['PM: early resume of devices complete after.*'],
+		'resume_complete': ['PM: resume of devices complete after.*'],
+		    'post_resume': ['.*Restarting tasks \.\.\..*'],
 	}
 	if(sysvals.suspendmode == 'standby'):
-		dm['resume_machine'] = 'PM: Restoring platform NVS memory'
+		dm['resume_machine'] = ['PM: Restoring platform NVS memory']
 	elif(sysvals.suspendmode == 'disk'):
-		dm['suspend_late'] = 'PM: freeze of devices complete after.*'
-		dm['suspend_noirq'] = 'PM: late freeze of devices complete after.*'
-		dm['suspend_machine'] = 'PM: noirq freeze of devices complete after.*'
-		dm['resume_machine'] = 'PM: Restoring platform NVS memory'
-		dm['resume_early'] = 'PM: noirq restore of devices complete after.*'
-		dm['resume'] = 'PM: early restore of devices complete after.*'
-		dm['resume_complete'] = 'PM: restore of devices complete after.*'
+		dm['suspend_late'] = ['PM: freeze of devices complete after.*']
+		dm['suspend_noirq'] = ['PM: late freeze of devices complete after.*']
+		dm['suspend_machine'] = ['PM: noirq freeze of devices complete after.*']
+		dm['resume_machine'] = ['PM: Restoring platform NVS memory']
+		dm['resume_early'] = ['PM: noirq restore of devices complete after.*']
+		dm['resume'] = ['PM: early restore of devices complete after.*']
+		dm['resume_complete'] = ['PM: restore of devices complete after.*']
 	elif(sysvals.suspendmode == 'freeze'):
-		dm['resume_machine'] = 'ACPI: resume from mwait'
+		dm['resume_machine'] = ['ACPI: resume from mwait']
 
 	# action table (expected events that occur and show up in dmesg)
 	at = {
@@ -3264,81 +3257,89 @@ def parseKernelLog(data):
 		else:
 			continue
 
+		# check for a phase change line
+		phasechange = False
+		for p in dm:
+			for s in dm[p]:
+				if(re.match(s, msg)):
+					phasechange, phase = True, p
+					break
+
 		# hack for determining resume_machine end for freeze
 		if(not sysvals.usetraceevents and sysvals.suspendmode == 'freeze' \
 			and phase == 'resume_machine' and \
 			re.match('calling  (?P<f>.*)\+ @ .*, parent: .*', msg)):
-			data.dmesg['resume_machine']['end'] = ktime
+			data.setPhase(phase, ktime, False)
 			phase = 'resume_noirq'
-			data.dmesg[phase]['start'] = ktime
-
-		# suspend start
-		if(re.match(dm['suspend_prepare'], msg)):
-			phase = 'suspend_prepare'
-			data.dmesg[phase]['start'] = ktime
-			data.setStart(ktime)
-			data.tKernSus = ktime
-		# suspend start
-		elif(re.match(dm['suspend'], msg)):
-			data.dmesg['suspend_prepare']['end'] = ktime
-			phase = 'suspend'
-			data.dmesg[phase]['start'] = ktime
-		# suspend_late start
-		elif(re.match(dm['suspend_late'], msg)):
-			data.dmesg['suspend']['end'] = ktime
-			phase = 'suspend_late'
-			data.dmesg[phase]['start'] = ktime
-		# suspend_noirq start
-		elif(re.match(dm['suspend_noirq'], msg)):
-			data.dmesg['suspend_late']['end'] = ktime
-			phase = 'suspend_noirq'
-			data.dmesg[phase]['start'] = ktime
-		# suspend_machine start
-		elif(re.match(dm['suspend_machine'], msg)):
-			data.dmesg['suspend_noirq']['end'] = ktime
-			phase = 'suspend_machine'
-			data.dmesg[phase]['start'] = ktime
-		# resume_machine start
-		elif(re.match(dm['resume_machine'], msg)):
-			if(sysvals.suspendmode in ['freeze', 'standby']):
-				data.tSuspended = prevktime
-				data.dmesg['suspend_machine']['end'] = prevktime
-			else:
-				data.tSuspended = ktime
-				data.dmesg['suspend_machine']['end'] = ktime
-			phase = 'resume_machine'
-			data.tResumed = ktime
-			data.tLow = data.tResumed - data.tSuspended
-			data.dmesg[phase]['start'] = ktime
-		# resume_noirq start
-		elif(re.match(dm['resume_noirq'], msg)):
-			data.dmesg['resume_machine']['end'] = ktime
-			phase = 'resume_noirq'
-			data.dmesg[phase]['start'] = ktime
-		# resume_early start
-		elif(re.match(dm['resume_early'], msg)):
-			data.dmesg['resume_noirq']['end'] = ktime
-			phase = 'resume_early'
-			data.dmesg[phase]['start'] = ktime
-		# resume start
-		elif(re.match(dm['resume'], msg)):
-			data.dmesg['resume_early']['end'] = ktime
-			phase = 'resume'
-			data.dmesg[phase]['start'] = ktime
-		# resume complete start
-		elif(re.match(dm['resume_complete'], msg)):
-			data.dmesg['resume']['end'] = ktime
-			phase = 'resume_complete'
-			data.dmesg[phase]['start'] = ktime
-		# post resume start
-		elif(re.match(dm['post_resume'], msg)):
-			data.dmesg['resume_complete']['end'] = ktime
-			data.setEnd(ktime)
-			data.tKernRes = ktime
-			break
+			data.setPhase(phase, ktime, True)
+
+		if phasechange:
+			if phase == 'suspend_prepare':
+				data.setPhase(phase, ktime, True)
+				data.setStart(ktime)
+				data.tKernSus = ktime
+			elif phase == 'suspend':
+				lp = data.lastPhase()
+				if lp:
+					data.setPhase(lp, ktime, False)
+				data.setPhase(phase, ktime, True)
+			elif phase == 'suspend_late':
+				lp = data.lastPhase()
+				if lp:
+					data.setPhase(lp, ktime, False)
+				data.setPhase(phase, ktime, True)
+			elif phase == 'suspend_noirq':
+				lp = data.lastPhase()
+				if lp:
+					data.setPhase(lp, ktime, False)
+				data.setPhase(phase, ktime, True)
+			elif phase == 'suspend_machine':
+				lp = data.lastPhase()
+				if lp:
+					data.setPhase(lp, ktime, False)
+				data.setPhase(phase, ktime, True)
+			elif phase == 'resume_machine':
+				lp = data.lastPhase()
+				if(sysvals.suspendmode in ['freeze', 'standby']):
+					data.tSuspended = prevktime
+					if lp:
+						data.setPhase(lp, prevktime, False)
+				else:
+					data.tSuspended = ktime
+					if lp:
+						data.setPhase(lp, prevktime, False)
+				data.tResumed = ktime
+				data.setPhase(phase, ktime, True)
+			elif phase == 'resume_noirq':
+				lp = data.lastPhase()
+				if lp:
+					data.setPhase(lp, ktime, False)
+				data.setPhase(phase, ktime, True)
+			elif phase == 'resume_early':
+				lp = data.lastPhase()
+				if lp:
+					data.setPhase(lp, ktime, False)
+				data.setPhase(phase, ktime, True)
+			elif phase == 'resume':
+				lp = data.lastPhase()
+				if lp:
+					data.setPhase(lp, ktime, False)
+				data.setPhase(phase, ktime, True)
+			elif phase == 'resume_complete':
+				lp = data.lastPhase()
+				if lp:
+					data.setPhase(lp, ktime, False)
+				data.setPhase(phase, ktime, True)
+			elif phase == 'post_resume':
+				lp = data.lastPhase()
+				if lp:
+					data.setPhase(lp, ktime, False)
+				data.setEnd(ktime)
+				data.tKernRes = ktime
+				break
 
 		# -- device callbacks --
-		if(phase in data.phases):
+		if(phase in data.sortedPhases()):
 			# device init call
 			if(re.match('calling  (?P<f>.*)\+ @ .*, parent: .*', msg)):
 				sm = re.match('calling  (?P<f>.*)\+ @ '+\
@@ -3396,24 +3397,31 @@ def parseKernelLog(data):
 				actions[cpu].append({'begin': cpu_start, 'end': ktime})
 				cpu_start = ktime
 		prevktime = ktime
+	data.initDevicegroups()
 
 	# fill in any missing phases
-	lp = data.phases[0]
-	for p in data.phases:
-		if(data.dmesg[p]['start'] < 0 and data.dmesg[p]['end'] < 0):
-			print('WARNING: phase "%s" is missing, something went wrong!' % p)
-			print('    In %s, this dmesg line denotes the start of %s:' % \
-				(sysvals.suspendmode, p))
-			print('        "%s"' % dm[p])
-		if(data.dmesg[p]['start'] < 0):
-			data.dmesg[p]['start'] = data.dmesg[lp]['end']
-			if(p == 'resume_machine'):
-				data.tSuspended = data.dmesg[lp]['end']
-				data.tResumed = data.dmesg[lp]['end']
-				data.tLow = 0
-		if(data.dmesg[p]['end'] < 0):
-			data.dmesg[p]['end'] = data.dmesg[p]['start']
+	phasedef = data.phasedef
+	terr, lp = '', 'suspend_prepare'
+	for p in sorted(phasedef, key=lambda k:phasedef[k]['order']):
+		if p not in data.dmesg:
+			if not terr:
+				print 'TEST FAILED: %s failed in %s phase' % (sysvals.suspendmode, lp)
+				terr = '%s failed in %s phase' % (sysvals.suspendmode, lp)
+				if data.tSuspended == 0:
+					data.tSuspended = data.dmesg[lp]['end']
+				if data.tResumed == 0:
+					data.tResumed = data.dmesg[lp]['end']
+			sysvals.vprint('WARNING: phase "%s" is missing!' % p)
+		lp = p
+	lp = data.sortedPhases()[0]
+	for p in data.sortedPhases():
+		if(p != lp and not ('machine' in p and 'machine' in lp)):
+			data.dmesg[lp]['end'] = data.dmesg[p]['start']
 		lp = p
+	if data.tSuspended == 0:
+		data.tSuspended = data.tKernRes
+	if data.tResumed == 0:
+		data.tResumed = data.tSuspended
 
 	# fill in any actions we've found
 	for name in actions:
@@ -3462,7 +3470,7 @@ def addCallgraphs(sv, hf, data):
 	hf.write('<section id="callgraphs" class="callgraph">\n')
 	# write out the ftrace data converted to html
 	num = 0
-	for p in data.phases:
+	for p in data.sortedPhases():
 		if sv.cgphase and p != sv.cgphase:
 			continue
 		list = data.dmesg[p]['list']
@@ -3505,7 +3513,7 @@ def createHTMLSummarySimple(testruns, htmlfile, folder):
 		table {width:100%;border-collapse: collapse;}\n\
 		.summary {border:1px solid;}\n\
 		th {border: 1px solid black;background:#222;color:white;}\n\
-		td {font: 16px "Times New Roman";text-align: center;}\n\
+		td {font: 14px "Times New Roman";text-align: center;}\n\
 		tr.head td {border: 1px solid black;background:#aaa;}\n\
 		tr.alt {background-color:#ddd;}\n\
 		tr.notice {color:red;}\n\
@@ -3521,7 +3529,7 @@ def createHTMLSummarySimple(testruns, htmlfile, folder):
 	iMin, iMed, iMax = [0, 0], [0, 0], [0, 0]
 	num = 0
 	lastmode = ''
-	cnt = {'pass':0, 'fail':0, 'hang':0}
+	cnt = dict()
 	for data in sorted(testruns, key=lambda v:(v['mode'], v['host'], v['kernel'], v['time'])):
 		mode = data['mode']
 		if mode not in list:
@@ -3541,10 +3549,14 @@ def createHTMLSummarySimple(testruns, htmlfile, folder):
 		tVal = [float(data['suspend']), float(data['resume'])]
 		list[mode]['data'].append([data['host'], data['kernel'],
 			data['time'], tVal[0], tVal[1], data['url'], data['result'],
-			data['issues']])
+			data['issues'], data['sus_worst'], data['sus_worsttime'],
+			data['res_worst'], data['res_worsttime']])
 		idx = len(list[mode]['data']) - 1
+		if data['result'] not in cnt:
+			cnt[data['result']] = 1
+		else:
+			cnt[data['result']] += 1
 		if data['result'] == 'pass':
-			cnt['pass'] += 1
 			for i in range(2):
 				tMed[i].append(tVal[i])
 				tAvg[i] += tVal[i]
@@ -3555,10 +3567,6 @@ def createHTMLSummarySimple(testruns, htmlfile, folder):
 					iMax[i] = idx
 					tMax[i] = tVal[i]
 			num += 1
-		elif data['result'] == 'hang':
-			cnt['hang'] += 1
-		elif data['result'] == 'fail':
-			cnt['fail'] += 1
 		lastmode = mode
 	if lastmode and num > 0:
 		for i in range(2):
@@ -3585,11 +3593,14 @@ def createHTMLSummarySimple(testruns, htmlfile, folder):
 	html += '<table class="summary">\n<tr>\n' + th.format('#') +\
 		th.format('Mode') + th.format('Host') + th.format('Kernel') +\
 		th.format('Test Time') + th.format('Result') + th.format('Issues') +\
-		th.format('Suspend') + th.format('Resume') + th.format('Detail') + '</tr>\n'
+		th.format('Suspend') + th.format('Resume') +\
+		th.format('Worst Suspend Device') + th.format('SD Time') +\
+		th.format('Worst Resume Device') + th.format('RD Time') +\
+		th.format('Detail') + '</tr>\n'
 
 	# export list into html
 	head = '<tr class="head"><td>{0}</td><td>{1}</td>'+\
-		'<td colspan=8 class="sus">Suspend Avg={2} '+\
+		'<td colspan=12 class="sus">Suspend Avg={2} '+\
 		'<span class=minval><a href="#s{10}min">Min={3}</a></span> '+\
 		'<span class=medval><a href="#s{10}med">Med={4}</a></span> '+\
 		'<span class=maxval><a href="#s{10}max">Max={5}</a></span> '+\
@@ -3598,7 +3609,7 @@ def createHTMLSummarySimple(testruns, htmlfile, folder):
 		'<span class=medval><a href="#r{10}med">Med={8}</a></span> '+\
 		'<span class=maxval><a href="#r{10}max">Max={9}</a></span></td>'+\
 		'</tr>\n'
-	headnone = '<tr class="head"><td>{0}</td><td>{1}</td><td colspan=8></td></tr>\n'
+	headnone = '<tr class="head"><td>{0}</td><td>{1}</td><td colspan=12></td></tr>\n'
 	for mode in list:
 		# header line for each suspend mode
 		num = 0
@@ -3641,6 +3652,10 @@ def createHTMLSummarySimple(testruns, htmlfile, folder):
 			html += td.format(d[7])										# issues
 			html += tdh.format('%.3f ms' % d[3], tHigh[0]) if d[3] else td.format('')	# suspend
 			html += tdh.format('%.3f ms' % d[4], tHigh[1]) if d[4] else td.format('')	# resume
+			html += td.format(d[8])										# sus_worst
+			html += td.format('%.3f ms' % d[9])	if d[9] else td.format('')		# sus_worst time
+			html += td.format(d[10])									# res_worst
+			html += td.format('%.3f ms' % d[11]) if d[11] else td.format('')	# res_worst time
 			html += tdlink.format(d[5]) if d[5] else td.format('')		# url
 			html += '</tr>\n'
 			num += 1
@@ -3677,7 +3692,8 @@ def createHTML(testruns, testfail):
 	for data in testruns:
 		if data.kerror:
 			kerror = True
-		data.normalizeTime(testruns[-1].tSuspended)
+		if(sysvals.suspendmode in ['freeze', 'standby']):
+			data.trimFreezeTime(testruns[-1].tSuspended)
 
 	# html function templates
 	html_error = '<div id="{1}" title="kernel error/warning" class="err" style="right:{0}%">{2}&rarr;</div>\n'
@@ -3721,8 +3737,8 @@ def createHTML(testruns, testfail):
 		sktime, rktime = data.getTimeValues()
 		if(tTotal == 0):
 			doError('No timeline data')
-		if(data.tLow > 0):
-			low_time = '%.0f'%(data.tLow*1000)
+		if(len(data.tLow) > 0):
+			low_time = '|'.join(data.tLow)
 		if sysvals.suspendmode == 'command':
 			run_time = '%.0f'%((data.end-data.start)*1000)
 			if sysvals.testcommand:
@@ -3743,7 +3759,7 @@ def createHTML(testruns, testfail):
 			if(len(testruns) > 1):
 				testdesc1 = testdesc2 = ordinal(data.testnumber+1)
 				testdesc2 += ' '
-			if(data.tLow == 0):
+			if(len(data.tLow) == 0):
 				thtml = html_timetotal.format(suspend_time, \
 					resume_time, testdesc1, stitle, rtitle)
 			else:
@@ -3762,7 +3778,7 @@ def createHTML(testruns, testfail):
 			rtitle = 'time from firmware mode to return from kernel enter_state(%s) [kernel time only]' % sysvals.suspendmode
 			if(len(testruns) > 1):
 				testdesc = ordinal(data.testnumber+1)+' '+testdesc
-			if(data.tLow == 0):
+			if(len(data.tLow) == 0):
 				thtml = html_timetotal.format(suspend_time, \
 					resume_time, testdesc, stitle, rtitle)
 			else:
@@ -3820,15 +3836,14 @@ def createHTML(testruns, testfail):
 
 	# draw the full timeline
 	devtl.createZoomBox(sysvals.suspendmode, len(testruns))
-	phases = {'suspend':[],'resume':[]}
-	for phase in data.dmesg:
-		if 'resume' in phase:
-			phases['resume'].append(phase)
-		else:
-			phases['suspend'].append(phase)
-
-	# draw each test run chronologically
 	for data in testruns:
+		# draw each test run and block chronologically
+		phases = {'suspend':[],'resume':[]}
+		for phase in data.sortedPhases():
+			if data.dmesg[phase]['start'] >= data.tSuspended:
+				phases['resume'].append(phase)
+			else:
+				phases['suspend'].append(phase)
 		# now draw the actual timeline blocks
 		for dir in phases:
 			# draw suspend and resume blocks separately
@@ -3850,7 +3865,7 @@ def createHTML(testruns, testfail):
 				continue
 			width = '%f' % (((mTotal*100.0)-sysvals.srgap/2)/tTotal)
 			devtl.html += devtl.html_tblock.format(bname, left, width, devtl.scaleH)
-			for b in sorted(phases[dir]):
+			for b in phases[dir]:
 				# draw the phase color background
 				phase = data.dmesg[b]
 				length = phase['end']-phase['start']
@@ -3865,7 +3880,7 @@ def createHTML(testruns, testfail):
 				id = '%d_%d' % (idx1, idx2)
 				right = '%f' % (((mMax-t)*100.0)/mTotal)
 				devtl.html += html_error.format(right, id, type)
-			for b in sorted(phases[dir]):
+			for b in phases[dir]:
 				# draw the devices for this phase
 				phaselist = data.dmesg[b]['list']
 				for d in data.tdevlist[b]:
@@ -3942,19 +3957,17 @@ def createHTML(testruns, testfail):
 
 	# draw a legend which describes the phases by color
 	if sysvals.suspendmode != 'command':
-		data = testruns[-1]
+		phasedef = testruns[-1].phasedef
 		devtl.html += '<div class="legend">\n'
-		pdelta = 100.0/len(data.phases)
+		pdelta = 100.0/len(phasedef.keys())
 		pmargin = pdelta / 4.0
-		for phase in data.phases:
-			tmp = phase.split('_')
-			id = tmp[0][0]
-			if(len(tmp) > 1):
-				id += tmp[1][0]
-			order = '%.2f' % ((data.dmesg[phase]['order'] * pdelta) + pmargin)
+		for phase in sorted(phasedef, key=lambda k:phasedef[k]['order']):
+			id, p = '', phasedef[phase]
+			for word in phase.split('_'):
+				id += word[0]
+			order = '%.2f' % ((p['order'] * pdelta) + pmargin)
 			name = string.replace(phase, '_', ' &nbsp;')
-			devtl.html += devtl.html_legend.format(order, \
-				data.dmesg[phase]['color'], name, id)
+			devtl.html += devtl.html_legend.format(order, p['color'], name, id)
 		devtl.html += '</div>\n'
 
 	hf = open(sysvals.htmlfile, 'w')
@@ -3970,7 +3983,7 @@ def createHTML(testruns, testfail):
 		pscolor = 'linear-gradient(to top left, #ccc, #eee)'
 		hf.write(devtl.html_phaselet.format('pre_suspend_process', \
 			'0', '0', pscolor))
-		for b in data.phases:
+		for b in data.sortedPhases():
 			phase = data.dmesg[b]
 			length = phase['end']-phase['start']
 			left = '%.3f' % (((phase['start']-t0)*100.0)/tTotal)
@@ -4542,16 +4555,12 @@ def setRuntimeSuspend(before=True):
 def executeSuspend():
 	pm = ProcessMonitor()
 	tp = sysvals.tpath
-	fwdata = []
+	testdata = []
+	battery = True if getBattery() else False
 	# run these commands to prepare the system for suspend
 	if sysvals.display:
-		if sysvals.display > 0:
-			print('TURN DISPLAY ON')
-			call('xset -d :0.0 dpms force suspend', shell=True)
-			call('xset -d :0.0 dpms force on', shell=True)
-		else:
-			print('TURN DISPLAY OFF')
-			call('xset -d :0.0 dpms force suspend', shell=True)
+		print('SET DISPLAY TO %s' % sysvals.display.upper())
+		displayControl(sysvals.display)
 		time.sleep(1)
 	if sysvals.sync:
 		print('SYNCING FILESYSTEMS')
@@ -4579,6 +4588,7 @@ def executeSuspend():
 				print('SUSPEND START')
 			else:
 				print('SUSPEND START (press a key to resume)')
+		bat1 = getBattery() if battery else False
 		# set rtcwake
 		if(sysvals.rtcwake):
 			print('will issue an rtcwake in %d seconds' % sysvals.rtcwaketime)
@@ -4592,8 +4602,11 @@ def executeSuspend():
 			time.sleep(sysvals.predelay/1000.0)
 			sysvals.fsetVal('WAIT END', 'trace_marker')
 		# initiate suspend or command
+		tdata = {'error': ''}
 		if sysvals.testcommand != '':
-			call(sysvals.testcommand+' 2>&1', shell=True);
+			res = call(sysvals.testcommand+' 2>&1', shell=True);
+			if res != 0:
+				tdata['error'] = 'cmd returned %d' % res
 		else:
 			mode = sysvals.suspendmode
 			if sysvals.memmode and os.path.exists(sysvals.mempowerfile):
@@ -4606,8 +4619,8 @@ def executeSuspend():
 			# execution will pause here
 			try:
 				pf.close()
-			except:
-				pass
+			except Exception as e:
+				tdata['error'] = str(e)
 		if(sysvals.rtcwake):
 			sysvals.rtcWakeAlarmOff()
 		# postdelay delay
@@ -4620,23 +4633,29 @@ def executeSuspend():
 		if(sysvals.usecallgraph or sysvals.usetraceevents):
 			sysvals.fsetVal('RESUME COMPLETE', 'trace_marker')
 		if(sysvals.suspendmode == 'mem' or sysvals.suspendmode == 'command'):
-			fwdata.append(getFPDT(False))
+			tdata['fw'] = getFPDT(False)
+		bat2 = getBattery() if battery else False
+		if battery and bat1 and bat2:
+			tdata['bat'] = (bat1, bat2)
+		testdata.append(tdata)
 	# stop ftrace
 	if(sysvals.usecallgraph or sysvals.usetraceevents):
 		if sysvals.useprocmon:
 			pm.stop()
 		sysvals.fsetVal('0', 'tracing_on')
+	# grab a copy of the dmesg output
+	print('CAPTURING DMESG')
+	sysvals.getdmesg(testdata)
+	# grab a copy of the ftrace output
+	if(sysvals.usecallgraph or sysvals.usetraceevents):
 		print('CAPTURING TRACE')
-		op = sysvals.writeDatafileHeader(sysvals.ftracefile, fwdata)
+		op = sysvals.writeDatafileHeader(sysvals.ftracefile, testdata)
 		fp = open(tp+'trace', 'r')
 		for line in fp:
 			op.write(line)
 		op.close()
 		sysvals.fsetVal('', 'trace')
 		devProps()
-	# grab a copy of the dmesg output
-	print('CAPTURING DMESG')
-	sysvals.getdmesg(fwdata)
 
 def readFile(file):
 	if os.path.islink(file):
@@ -4766,7 +4785,7 @@ def devProps(data=0):
 			alreadystamped = True
 			continue
 		# determine the trace data type (required for further parsing)
-		m = re.match(sysvals.tracertypefmt, line)
+		m = re.match(tp.tracertypefmt, line)
 		if(m):
 			tp.setTracerType(m.group('t'))
 			continue
@@ -4994,8 +5013,9 @@ def dmidecode(mempath, fatal=False):
 	return out
 
 def getBattery():
-	p = '/sys/class/power_supply'
-	bat = dict()
+	p, charge, bat = '/sys/class/power_supply', 0, {}
+	if not os.path.exists(p):
+		return False
 	for d in os.listdir(p):
 		type = sysvals.getVal(os.path.join(p, d, 'type')).strip().lower()
 		if type != 'battery':
@@ -5003,15 +5023,42 @@ def getBattery():
 		for v in ['status', 'energy_now', 'capacity_now']:
 			bat[v] = sysvals.getVal(os.path.join(p, d, v)).strip().lower()
 		break
-	ac = True
-	if 'status' in bat and 'discharging' in bat['status']:
-		ac = False
-	charge = 0
+	if 'status' not in bat:
+		return False
+	ac = False if 'discharging' in bat['status'] else True
 	for v in ['energy_now', 'capacity_now']:
 		if v in bat and bat[v]:
 			charge = int(bat[v])
 	return (ac, charge)
 
+def displayControl(cmd):
+	xset, ret = 'xset -d :0.0 {0}', 0
+	if sysvals.sudouser:
+		xset = 'sudo -u %s %s' % (sysvals.sudouser, xset)
+	if cmd == 'init':
+		ret = call(xset.format('dpms 0 0 0'), shell=True)
+		ret = call(xset.format('s off'), shell=True)
+	elif cmd == 'reset':
+		ret = call(xset.format('s reset'), shell=True)
+	elif cmd in ['on', 'off', 'standby', 'suspend']:
+		b4 = displayControl('stat')
+		ret = call(xset.format('dpms force %s' % cmd), shell=True)
+		curr = displayControl('stat')
+		sysvals.vprint('Display Switched: %s -> %s' % (b4, curr))
+		if curr != cmd:
+			sysvals.vprint('WARNING: Display failed to change to %s' % cmd)
+	elif cmd == 'stat':
+		fp = Popen(xset.format('q').split(' '), stdout=PIPE).stdout
+		ret = 'unknown'
+		for line in fp:
+			m = re.match('[\s]*Monitor is (?P<m>.*)', line)
+			if(m and len(m.group('m')) >= 2):
+				out = m.group('m').lower()
+				ret = out[3:] if out[0:2] == 'in' else out
+				break
+		fp.close()
+	return ret
+
 # Function: getFPDT
 # Description:
 #	 Read the acpi bios tables and pull out FPDT, the firmware data
@@ -5149,7 +5196,7 @@ def getFPDT(output):
 # Output:
 #	 True if the test will work, False if not
 def statusCheck(probecheck=False):
-	status = True
+	status = ''
 
 	print('Checking this system (%s)...' % platform.node())
 
@@ -5160,7 +5207,7 @@ def statusCheck(probecheck=False):
 	print('    have root access: %s' % res)
 	if(res != 'YES'):
 		print('    Try running this script with sudo')
-		return False
+		return 'missing root access'
 
 	# check sysfs is mounted
 	res = sysvals.colorText('NO (No features of this tool will work!)')
@@ -5168,7 +5215,7 @@ def statusCheck(probecheck=False):
 		res = 'YES'
 	print('    is sysfs mounted: %s' % res)
 	if(res != 'YES'):
-		return False
+		return 'sysfs is missing'
 
 	# check target mode is a valid mode
 	if sysvals.suspendmode != 'command':
@@ -5177,7 +5224,7 @@ def statusCheck(probecheck=False):
 		if(sysvals.suspendmode in modes):
 			res = 'YES'
 		else:
-			status = False
+			status = '%s mode is not supported' % sysvals.suspendmode
 		print('    is "%s" a valid power mode: %s' % (sysvals.suspendmode, res))
 		if(res == 'NO'):
 			print('      valid power modes are: %s' % modes)
@@ -5189,7 +5236,7 @@ def statusCheck(probecheck=False):
 	if(ftgood):
 		res = 'YES'
 	elif(sysvals.usecallgraph):
-		status = False
+		status = 'ftrace is not properly supported'
 	print('    is ftrace supported: %s' % res)
 
 	# check if kprobes are available
@@ -5217,7 +5264,7 @@ def statusCheck(probecheck=False):
 	if(sysvals.rtcpath != ''):
 		res = 'YES'
 	elif(sysvals.rtcwake):
-		status = False
+		status = 'rtcwake is not properly supported'
 	print('    is rtcwake supported: %s' % res)
 
 	if not probecheck:
@@ -5245,7 +5292,7 @@ def doError(msg, help=False):
 		printHelp()
 	print('ERROR: %s\n') % msg
 	sysvals.outputResult({'error':msg})
-	sys.exit()
+	sys.exit(1)
 
 # Function: getArgInt
 # Description:
@@ -5301,11 +5348,16 @@ def processData(live=False):
 			appendIncompleteTraceLog(testruns)
 	sysvals.vprint('Command:\n    %s' % sysvals.cmdline)
 	for data in testruns:
+		if data.battery:
+			a1, c1, a2, c2 = data.battery
+			s = 'Battery:\n    Before - AC: %s, Charge: %d\n     After - AC: %s, Charge: %d' % \
+				(a1, int(c1), a2, int(c2))
+			sysvals.vprint(s)
 		data.printDetails()
 	if sysvals.cgdump:
 		for data in testruns:
 			data.debugPrint()
-		sys.exit()
+		sys.exit(0)
 	if len(testruns) < 1:
 		return (testruns, {'error': 'timeline generation failed'})
 	sysvals.vprint('Creating the html timeline (%s)...' % sysvals.htmlfile)
@@ -5335,6 +5387,7 @@ def rerunTest():
 		elif not os.access(sysvals.htmlfile, os.W_OK):
 			doError('missing permission to write to %s' % sysvals.htmlfile)
 	testruns, stamp = processData(False)
+	sysvals.logmsg = ''
 	return stamp
 
 # Function: runTest
@@ -5349,13 +5402,16 @@ def runTest(n=0):
 	executeSuspend()
 	sysvals.cleanupFtrace()
 	if sysvals.skiphtml:
-		sysvals.sudouser(sysvals.testdir)
+		sysvals.sudoUserchown(sysvals.testdir)
 		return
 	testruns, stamp = processData(True)
 	for data in testruns:
 		del data
-	sysvals.sudouser(sysvals.testdir)
+	sysvals.sudoUserchown(sysvals.testdir)
 	sysvals.outputResult(stamp, n)
+	if 'error' in stamp:
+		return 2
+	return 0
 
 def find_in_html(html, start, end, firstonly=True):
 	n, out = 0, []
@@ -5380,14 +5436,86 @@ def find_in_html(html, start, end, firstonly=True):
 		return ''
 	return out
 
+def data_from_html(file, outpath, devlist=False):
+	html = open(file, 'r').read()
+	suspend = find_in_html(html, 'Kernel Suspend', 'ms')
+	resume = find_in_html(html, 'Kernel Resume', 'ms')
+	line = find_in_html(html, '<div class="stamp">', '</div>')
+	stmp = line.split()
+	if not suspend or not resume or len(stmp) != 8:
+		return False
+	try:
+		dt = datetime.strptime(' '.join(stmp[3:]), '%B %d %Y, %I:%M:%S %p')
+	except:
+		return False
+	tstr = dt.strftime('%Y/%m/%d %H:%M:%S')
+	error = find_in_html(html, '<table class="testfail"><tr><td>', '</td>')
+	if error:
+		m = re.match('[a-z]* failed in (?P<p>[a-z0-9_]*) phase', error)
+		if m:
+			result = 'fail in %s' % m.group('p')
+		else:
+			result = 'fail'
+	else:
+		result = 'pass'
+	ilist = []
+	e = find_in_html(html, 'class="err"[\w=":;\.%\- ]*>', '&rarr;</div>', False)
+	for i in list(set(e)):
+		ilist.append('%sx%d' % (i, e.count(i)) if e.count(i) > 1 else i)
+	low = find_in_html(html, 'freeze time: <b>', ' ms</b>')
+	if low and '|' in low:
+		ilist.append('FREEZEx%d' % len(low.split('|')))
+	devices = dict()
+	for line in html.split('\n'):
+		m = re.match(' *<div id=\"[a,0-9]*\" *title=\"(?P<title>.*)\" class=\"thread.*', line)
+		if not m or 'thread kth' in line or 'thread sec' in line:
+			continue
+		m = re.match('(?P<n>.*) \((?P<t>[0-9,\.]*) ms\) (?P<p>.*)', m.group('title'))
+		if not m:
+			continue
+		name, time, phase = m.group('n'), m.group('t'), m.group('p')
+		if ' async' in name or ' sync' in name:
+			name = ' '.join(name.split(' ')[:-1])
+		d = phase.split('_')[0]
+		if d not in devices:
+			devices[d] = dict()
+		if name not in devices[d]:
+			devices[d][name] = 0.0
+		devices[d][name] += float(time)
+	worst  = {'suspend': {'name':'', 'time': 0.0},
+		'resume': {'name':'', 'time': 0.0}}
+	for d in devices:
+		if d not in worst:
+			worst[d] = dict()
+		dev = devices[d]
+		if len(dev.keys()) > 0:
+			n = sorted(dev, key=dev.get, reverse=True)[0]
+			worst[d]['name'], worst[d]['time'] = n, dev[n]
+	data = {
+		'mode': stmp[2],
+		'host': stmp[0],
+		'kernel': stmp[1],
+		'time': tstr,
+		'result': result,
+		'issues': ' '.join(ilist),
+		'suspend': suspend,
+		'resume': resume,
+		'sus_worst': worst['suspend']['name'],
+		'sus_worsttime': worst['suspend']['time'],
+		'res_worst': worst['resume']['name'],
+		'res_worsttime': worst['resume']['time'],
+		'url': os.path.relpath(file, outpath),
+	}
+	if devlist:
+		data['devlist'] = devices
+	return data
+
 # Function: runSummary
 # Description:
 #	 create a summary of tests in a sub-directory
 def runSummary(subdir, local=True, genhtml=False):
 	inpath = os.path.abspath(subdir)
-	outpath = inpath
-	if local:
-		outpath = os.path.abspath('.')
+	outpath = os.path.abspath('.') if local else inpath
 	print('Generating a summary of folder "%s"' % inpath)
 	if genhtml:
 		for dirname, dirnames, filenames in os.walk(subdir):
@@ -5409,36 +5537,9 @@ def runSummary(subdir, local=True, genhtml=False):
 		for filename in filenames:
 			if(not re.match('.*.html', filename)):
 				continue
-			file = os.path.join(dirname, filename)
-			html = open(file, 'r').read()
-			suspend = find_in_html(html, 'Kernel Suspend', 'ms')
-			resume = find_in_html(html, 'Kernel Resume', 'ms')
-			line = find_in_html(html, '<div class="stamp">', '</div>')
-			stmp = line.split()
-			if not suspend or not resume or len(stmp) != 8:
+			data = data_from_html(os.path.join(dirname, filename), outpath)
+			if(not data):
 				continue
-			try:
-				dt = datetime.strptime(' '.join(stmp[3:]), '%B %d %Y, %I:%M:%S %p')
-			except:
-				continue
-			tstr = dt.strftime('%Y/%m/%d %H:%M:%S')
-			error = find_in_html(html, '<table class="testfail"><tr><td>', '</td>')
-			result = 'fail' if error else 'pass'
-			ilist = []
-			e = find_in_html(html, 'class="err"[\w=":;\.%\- ]*>', '&rarr;</div>', False)
-			for i in list(set(e)):
-				ilist.append('%sx%d' % (i, e.count(i)) if e.count(i) > 1 else i)
-			data = {
-				'mode': stmp[2],
-				'host': stmp[0],
-				'kernel': stmp[1],
-				'time': tstr,
-				'result': result,
-				'issues': ','.join(ilist),
-				'suspend': suspend,
-				'resume': resume,
-				'url': os.path.relpath(file, outpath),
-			}
 			testruns.append(data)
 	outfile = os.path.join(outpath, 'summary.html')
 	print('Summary file: %s' % outfile)
@@ -5499,13 +5600,10 @@ def configFromFile(file):
 				else:
 					doError('invalid value --> (%s: %s), use "enable/disable"' % (option, value), True)
 			elif(option == 'display'):
-				if value in switchvalues:
-					if value in switchoff:
-						sysvals.display = -1
-					else:
-						sysvals.display = 1
-				else:
-					doError('invalid value --> (%s: %s), use "on/off"' % (option, value), True)
+				disopt = ['on', 'off', 'standby', 'suspend']
+				if value not in disopt:
+					doError('invalid value --> (%s: %s), use %s' % (option, value, disopt), True)
+				sysvals.display = value
 			elif(option == 'gzip'):
 				sysvals.gzip = checkArgBool(option, value)
 			elif(option == 'cgfilter'):
@@ -5521,9 +5619,9 @@ def configFromFile(file):
 				sysvals.cgtest = getArgInt('cgtest', value, 0, 1, False)
 			elif(option == 'cgphase'):
 				d = Data(0)
-				if value not in d.phases:
+				if value not in d.sortedPhases():
 					doError('invalid phase --> (%s: %s), valid phases are %s'\
-						% (option, value, d.phases), True)
+						% (option, value, d.sortedPhases()), True)
 				sysvals.cgphase = value
 			elif(option == 'fadd'):
 				file = sysvals.configFile(value)
@@ -5697,7 +5795,7 @@ def printHelp():
 	print('  [testprep]')
 	print('   -sync        Sync the filesystems before starting the test')
 	print('   -rs on/off   Enable/disable runtime suspend for all devices, restore all after test')
-	print('   -display on/off  Turn the display on or off for the test')
+	print('   -display m   Change the display mode to m for the test (on/off/standby/suspend)')
 	print('  [advanced]')
 	print('   -gzip        Gzip the trace and dmesg logs to save space')
 	print('   -cmd {s}     Run the timeline over a custom command, e.g. "sync -d"')
@@ -5729,6 +5827,7 @@ def printHelp():
 	print('   -status      Test to see if the system is enabled to run this tool')
 	print('   -fpdt        Print out the contents of the ACPI Firmware Performance Data Table')
 	print('   -battery     Print out battery info (if available)')
+	print('   -x<mode>     Test xset by toggling the given mode (on/off/standby/suspend)')
 	print('   -sysinfo     Print out system info extracted from BIOS')
 	print('   -devinfo     Print out the pm settings of all devices which support runtime suspend')
 	print('   -flist       Print the list of functions currently being captured in ftrace')
@@ -5745,7 +5844,9 @@ def printHelp():
 if __name__ == '__main__':
 	genhtml = False
 	cmd = ''
-	simplecmds = ['-sysinfo', '-modes', '-fpdt', '-flist', '-flistall', '-devinfo', '-status', '-battery']
+	simplecmds = ['-sysinfo', '-modes', '-fpdt', '-flist', '-flistall',
+		'-devinfo', '-status', '-battery', '-xon', '-xoff', '-xstandby',
+		'-xsuspend', '-xinit', '-xreset', '-xstat']
 	if '-f' in sys.argv:
 		sysvals.cgskip = sysvals.configFile('cgskip.txt')
 	# loop through the command line arguments
@@ -5763,10 +5864,10 @@ if __name__ == '__main__':
 			cmd = arg[1:]
 		elif(arg == '-h'):
 			printHelp()
-			sys.exit()
+			sys.exit(0)
 		elif(arg == '-v'):
 			print("Version %s" % sysvals.version)
-			sys.exit()
+			sys.exit(0)
 		elif(arg == '-x2'):
 			sysvals.execcount = 2
 		elif(arg == '-x2delay'):
@@ -5785,6 +5886,10 @@ if __name__ == '__main__':
 			genhtml = True
 		elif(arg == '-addlogs'):
 			sysvals.dmesglog = sysvals.ftracelog = True
+		elif(arg == '-addlogdmesg'):
+			sysvals.dmesglog = True
+		elif(arg == '-addlogftrace'):
+			sysvals.ftracelog = True
 		elif(arg == '-verbose'):
 			sysvals.verbose = True
 		elif(arg == '-proc'):
@@ -5811,14 +5916,11 @@ if __name__ == '__main__':
 			try:
 				val = args.next()
 			except:
-				doError('-display requires "on" or "off"', True)
-			if val.lower() in switchvalues:
-				if val.lower() in switchoff:
-					sysvals.display = -1
-				else:
-					sysvals.display = 1
-			else:
-				doError('invalid option: %s, use "on/off"' % val, True)
+				doError('-display requires an mode value', True)
+			disopt = ['on', 'off', 'standby', 'suspend']
+			if val.lower() not in disopt:
+				doError('valid display mode values are %s' % disopt, True)
+			sysvals.display = val.lower()
 		elif(arg == '-maxdepth'):
 			sysvals.max_graph_depth = getArgInt('-maxdepth', args, 0, 1000)
 		elif(arg == '-rtcwake'):
@@ -5847,9 +5949,9 @@ if __name__ == '__main__':
 			except:
 				doError('No phase name supplied', True)
 			d = Data(0)
-			if val not in d.phases:
+			if val not in d.phasedef:
 				doError('invalid phase --> (%s: %s), valid phases are %s'\
-					% (arg, val, d.phases), True)
+					% (arg, val, d.phasedef.keys()), True)
 			sysvals.cgphase = val
 		elif(arg == '-cgfilter'):
 			try:
@@ -5951,6 +6053,7 @@ if __name__ == '__main__':
 			except:
 				doError('No result file supplied', True)
 			sysvals.result = val
+			sysvals.signalHandlerInit()
 		else:
 			doError('Invalid argument: '+arg, True)
 
@@ -5975,12 +6078,20 @@ if __name__ == '__main__':
 
 	# just run a utility command and exit
 	if(cmd != ''):
+		ret = 0
 		if(cmd == 'status'):
-			statusCheck(True)
+			if not statusCheck(True):
+				ret = 1
 		elif(cmd == 'fpdt'):
-			getFPDT(True)
+			if not getFPDT(True):
+				ret = 1
 		elif(cmd == 'battery'):
-			print 'AC Connect: %s\nCharge: %d' % getBattery()
+			out = getBattery()
+			if out:
+				print 'AC Connect    : %s\nBattery Charge: %d' % out
+			else:
+				print 'no battery found'
+				ret = 1
 		elif(cmd == 'sysinfo'):
 			sysvals.printSystemInfo(True)
 		elif(cmd == 'devinfo'):
@@ -5993,17 +6104,23 @@ if __name__ == '__main__':
 			sysvals.getFtraceFilterFunctions(False)
 		elif(cmd == 'summary'):
 			runSummary(sysvals.outdir, True, genhtml)
-		sys.exit()
+		elif(cmd in ['xon', 'xoff', 'xstandby', 'xsuspend', 'xinit', 'xreset']):
+			sysvals.verbose = True
+			ret = displayControl(cmd[1:])
+		elif(cmd == 'xstat'):
+			print 'Display Status: %s' % displayControl('stat').upper()
+		sys.exit(ret)
 
 	# if instructed, re-analyze existing data files
 	if(sysvals.notestrun):
 		stamp = rerunTest()
 		sysvals.outputResult(stamp)
-		sys.exit()
+		sys.exit(0)
 
 	# verify that we can run a test
-	if(not statusCheck()):
-		doError('Check FAILED, aborting the test run!')
+	error = statusCheck()
+	if(error):
+		doError(error)
 
 	# extract mem modes and convert
 	mode = sysvals.suspendmode
@@ -6025,8 +6142,8 @@ if __name__ == '__main__':
 
 	setRuntimeSuspend(True)
 	if sysvals.display:
-		call('xset -d :0.0 dpms 0 0 0', shell=True)
-		call('xset -d :0.0 s off', shell=True)
+		displayControl('init')
+	ret = 0
 	if sysvals.multitest['run']:
 		# run multiple tests in a separate subdirectory
 		if not sysvals.outdir:
@@ -6041,17 +6158,18 @@ if __name__ == '__main__':
 			print('TEST (%d/%d) START' % (i+1, sysvals.multitest['count']))
 			fmt = 'suspend-%y%m%d-%H%M%S'
 			sysvals.testdir = os.path.join(sysvals.outdir, datetime.now().strftime(fmt))
-			runTest(i+1)
+			ret = runTest(i+1)
 			print('TEST (%d/%d) COMPLETE' % (i+1, sysvals.multitest['count']))
 			sysvals.logmsg = ''
 		if not sysvals.skiphtml:
 			runSummary(sysvals.outdir, False, False)
-		sysvals.sudouser(sysvals.outdir)
+		sysvals.sudoUserchown(sysvals.outdir)
 	else:
 		if sysvals.outdir:
 			sysvals.testdir = sysvals.outdir
 		# run the test in the current directory
-		runTest()
+		ret = runTest()
 	if sysvals.display:
-		call('xset -d :0.0 s reset', shell=True)
+		displayControl('reset')
 	setRuntimeSuspend(False)
+	sys.exit(ret)
-- 
cgit v1.1


From 18d3f8fc0c2c5a55d40d1bedcf46865ecb87d84e Mon Sep 17 00:00:00 2001
From: Todd Brandt <todd.e.brandt@linux.intel.com>
Date: Mon, 8 Oct 2018 15:56:32 -0700
Subject: PM / tools: sleepgraph and bootgraph: upgrade to v5.2

bootgraph & sleepgraph:
- funnel all prints through the pprint function
- remove superfluous print calls, arrange them in single blocks
- flush stdout on every print, enables log capture on hang

sleepgraph:
- in -summary, if all tests have the same host+kernel+mode, add to title
- update verbose device detail print to include machine suspend/resume
- match tKernSus and tKernRes to pm_prepare/restore_console
- fully support multiple suspend/resumes in a single timeline
- enable various disk modes (disk-suspend, disk-test_resume, etc)
- add warnings when -display (xset) fails

Signed-off-by: Todd Brandt <todd.e.brandt@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 tools/power/pm-graph/bootgraph.py                  | 125 ++---
 tools/power/pm-graph/config/cgskip.txt             |   1 +
 .../pm-graph/config/custom-timeline-functions.cfg  |   4 +-
 tools/power/pm-graph/sleepgraph.py                 | 532 ++++++++++++---------
 4 files changed, 364 insertions(+), 298 deletions(-)

diff --git a/tools/power/pm-graph/bootgraph.py b/tools/power/pm-graph/bootgraph.py
index 8ee626c..6dae570 100755
--- a/tools/power/pm-graph/bootgraph.py
+++ b/tools/power/pm-graph/bootgraph.py
@@ -34,6 +34,10 @@ from datetime import datetime, timedelta
 from subprocess import call, Popen, PIPE
 import sleepgraph as aslib
 
+def pprint(msg):
+	print(msg)
+	sys.stdout.flush()
+
 # ----------------- CLASSES --------------------
 
 # Class: SystemValues
@@ -157,11 +161,11 @@ class SystemValues(aslib.SystemValues):
 		return cmdline
 	def manualRebootRequired(self):
 		cmdline = self.kernelParams()
-		print 'To generate a new timeline manually, follow these steps:\n'
-		print '1. Add the CMDLINE string to your kernel command line.'
-		print '2. Reboot the system.'
-		print '3. After reboot, re-run this tool with the same arguments but no command (w/o -reboot or -manual).\n'
-		print 'CMDLINE="%s"' % cmdline
+		pprint('To generate a new timeline manually, follow these steps:\n\n'\
+		'1. Add the CMDLINE string to your kernel command line.\n'\
+		'2. Reboot the system.\n'\
+		'3. After reboot, re-run this tool with the same arguments but no command (w/o -reboot or -manual).\n\n'\
+		'CMDLINE="%s"' % cmdline)
 		sys.exit()
 	def blGrub(self):
 		blcmd = ''
@@ -431,7 +435,7 @@ def parseTraceLog(data):
 			if len(cg.list) < 1 or cg.invalid or (cg.end - cg.start == 0):
 				continue
 			if(not cg.postProcess()):
-				print('Sanity check failed for %s-%d' % (proc, pid))
+				pprint('Sanity check failed for %s-%d' % (proc, pid))
 				continue
 			# match cg data to devices
 			devname = data.deviceMatch(pid, cg)
@@ -442,8 +446,8 @@ def parseTraceLog(data):
 				sysvals.vprint('%s callgraph found for %s %s-%d [%f - %f]' %\
 					(kind, cg.name, proc, pid, cg.start, cg.end))
 			elif len(cg.list) > 1000000:
-				print 'WARNING: the callgraph found for %s is massive! (%d lines)' %\
-					(devname, len(cg.list))
+				pprint('WARNING: the callgraph found for %s is massive! (%d lines)' %\
+					(devname, len(cg.list)))
 
 # Function: retrieveLogs
 # Description:
@@ -528,7 +532,7 @@ def createBootGraph(data):
 	tMax = data.end
 	tTotal = tMax - t0
 	if(tTotal == 0):
-		print('ERROR: No timeline data')
+		pprint('ERROR: No timeline data')
 		return False
 	user_mode = '%.0f'%(data.tUserMode*1000)
 	last_init = '%.0f'%(tTotal*1000)
@@ -734,7 +738,7 @@ def updateCron(restore=False):
 		op.close()
 		res = call([cmd, cronfile])
 	except Exception, e:
-		print 'Exception: %s' % str(e)
+		pprint('Exception: %s' % str(e))
 		shutil.move(backfile, cronfile)
 		res = -1
 	if res != 0:
@@ -750,7 +754,7 @@ def updateGrub(restore=False):
 			call(sysvals.blexec, stderr=PIPE, stdout=PIPE,
 				env={'PATH': '.:/sbin:/usr/sbin:/usr/bin:/sbin:/bin'})
 		except Exception, e:
-			print 'Exception: %s\n' % str(e)
+			pprint('Exception: %s\n' % str(e))
 		return
 	# extract the option and create a grub config without it
 	sysvals.rootUser(True)
@@ -797,7 +801,7 @@ def updateGrub(restore=False):
 		res = call(sysvals.blexec)
 		os.remove(grubfile)
 	except Exception, e:
-		print 'Exception: %s' % str(e)
+		pprint('Exception: %s' % str(e))
 		res = -1
 	# cleanup
 	shutil.move(tempfile, grubfile)
@@ -821,7 +825,7 @@ def updateKernelParams(restore=False):
 def doError(msg, help=False):
 	if help == True:
 		printHelp()
-	print 'ERROR: %s\n' % msg
+	pprint('ERROR: %s\n' % msg)
 	sysvals.outputResult({'error':msg})
 	sys.exit()
 
@@ -829,52 +833,51 @@ def doError(msg, help=False):
 # Description:
 #	 print out the help text
 def printHelp():
-	print('')
-	print('%s v%s' % (sysvals.title, sysvals.version))
-	print('Usage: bootgraph <options> <command>')
-	print('')
-	print('Description:')
-	print('  This tool reads in a dmesg log of linux kernel boot and')
-	print('  creates an html representation of the boot timeline up to')
-	print('  the start of the init process.')
-	print('')
-	print('  If no specific command is given the tool reads the current dmesg')
-	print('  and/or ftrace log and creates a timeline')
-	print('')
-	print('  Generates output files in subdirectory: boot-yymmdd-HHMMSS')
-	print('   HTML output:                    <hostname>_boot.html')
-	print('   raw dmesg output:               <hostname>_boot_dmesg.txt')
-	print('   raw ftrace output:              <hostname>_boot_ftrace.txt')
-	print('')
-	print('Options:')
-	print('  -h            Print this help text')
-	print('  -v            Print the current tool version')
-	print('  -verbose      Print extra information during execution and analysis')
-	print('  -addlogs      Add the dmesg log to the html output')
-	print('  -result fn    Export a results table to a text file for parsing.')
-	print('  -o name       Overrides the output subdirectory name when running a new test')
-	print('                default: boot-{date}-{time}')
-	print(' [advanced]')
-	print('  -fstat        Use ftrace to add function detail and statistics (default: disabled)')
-	print('  -f/-callgraph Add callgraph detail, can be very large (default: disabled)')
-	print('  -maxdepth N   limit the callgraph data to N call levels (default: 2)')
-	print('  -mincg ms     Discard all callgraphs shorter than ms milliseconds (e.g. 0.001 for us)')
-	print('  -timeprec N   Number of significant digits in timestamps (0:S, 3:ms, [6:us])')
-	print('  -expandcg     pre-expand the callgraph data in the html output (default: disabled)')
-	print('  -func list    Limit ftrace to comma-delimited list of functions (default: do_one_initcall)')
-	print('  -cgfilter S   Filter the callgraph output in the timeline')
-	print('  -cgskip file  Callgraph functions to skip, off to disable (default: cgskip.txt)')
-	print('  -bl name      Use the following boot loader for kernel params (default: grub)')
-	print('  -reboot       Reboot the machine automatically and generate a new timeline')
-	print('  -manual       Show the steps to generate a new timeline manually (used with -reboot)')
-	print('')
-	print('Other commands:')
-	print('  -flistall     Print all functions capable of being captured in ftrace')
-	print('  -sysinfo      Print out system info extracted from BIOS')
-	print(' [redo]')
-	print('  -dmesg file   Create HTML output using dmesg input (used with -ftrace)')
-	print('  -ftrace file  Create HTML output using ftrace input (used with -dmesg)')
-	print('')
+	pprint('\n%s v%s\n'\
+	'Usage: bootgraph <options> <command>\n'\
+	'\n'\
+	'Description:\n'\
+	'  This tool reads in a dmesg log of linux kernel boot and\n'\
+	'  creates an html representation of the boot timeline up to\n'\
+	'  the start of the init process.\n'\
+	'\n'\
+	'  If no specific command is given the tool reads the current dmesg\n'\
+	'  and/or ftrace log and creates a timeline\n'\
+	'\n'\
+	'  Generates output files in subdirectory: boot-yymmdd-HHMMSS\n'\
+	'   HTML output:                    <hostname>_boot.html\n'\
+	'   raw dmesg output:               <hostname>_boot_dmesg.txt\n'\
+	'   raw ftrace output:              <hostname>_boot_ftrace.txt\n'\
+	'\n'\
+	'Options:\n'\
+	'  -h            Print this help text\n'\
+	'  -v            Print the current tool version\n'\
+	'  -verbose      Print extra information during execution and analysis\n'\
+	'  -addlogs      Add the dmesg log to the html output\n'\
+	'  -result fn    Export a results table to a text file for parsing.\n'\
+	'  -o name       Overrides the output subdirectory name when running a new test\n'\
+	'                default: boot-{date}-{time}\n'\
+	' [advanced]\n'\
+	'  -fstat        Use ftrace to add function detail and statistics (default: disabled)\n'\
+	'  -f/-callgraph Add callgraph detail, can be very large (default: disabled)\n'\
+	'  -maxdepth N   limit the callgraph data to N call levels (default: 2)\n'\
+	'  -mincg ms     Discard all callgraphs shorter than ms milliseconds (e.g. 0.001 for us)\n'\
+	'  -timeprec N   Number of significant digits in timestamps (0:S, 3:ms, [6:us])\n'\
+	'  -expandcg     pre-expand the callgraph data in the html output (default: disabled)\n'\
+	'  -func list    Limit ftrace to comma-delimited list of functions (default: do_one_initcall)\n'\
+	'  -cgfilter S   Filter the callgraph output in the timeline\n'\
+	'  -cgskip file  Callgraph functions to skip, off to disable (default: cgskip.txt)\n'\
+	'  -bl name      Use the following boot loader for kernel params (default: grub)\n'\
+	'  -reboot       Reboot the machine automatically and generate a new timeline\n'\
+	'  -manual       Show the steps to generate a new timeline manually (used with -reboot)\n'\
+	'\n'\
+	'Other commands:\n'\
+	'  -flistall     Print all functions capable of being captured in ftrace\n'\
+	'  -sysinfo      Print out system info extracted from BIOS\n'\
+	' [redo]\n'\
+	'  -dmesg file   Create HTML output using dmesg input (used with -ftrace)\n'\
+	'  -ftrace file  Create HTML output using ftrace input (used with -dmesg)\n'\
+	'' % (sysvals.title, sysvals.version))
 	return True
 
 # ----------------- MAIN --------------------
@@ -895,7 +898,7 @@ if __name__ == '__main__':
 			printHelp()
 			sys.exit()
 		elif(arg == '-v'):
-			print("Version %s" % sysvals.version)
+			pprint("Version %s" % sysvals.version)
 			sys.exit()
 		elif(arg == '-verbose'):
 			sysvals.verbose = True
@@ -1016,7 +1019,7 @@ if __name__ == '__main__':
 				print f
 		elif cmd == 'checkbl':
 			sysvals.getBootLoader()
-			print 'Boot Loader: %s\n%s' % (sysvals.bootloader, sysvals.blexec)
+			pprint('Boot Loader: %s\n%s' % (sysvals.bootloader, sysvals.blexec))
 		elif(cmd == 'sysinfo'):
 			sysvals.printSystemInfo(True)
 		sys.exit()
diff --git a/tools/power/pm-graph/config/cgskip.txt b/tools/power/pm-graph/config/cgskip.txt
index e48d588..9ff88e7 100644
--- a/tools/power/pm-graph/config/cgskip.txt
+++ b/tools/power/pm-graph/config/cgskip.txt
@@ -27,6 +27,7 @@ ktime_get
 # console calls
 printk
 dev_printk
+__dev_printk
 console_unlock
 
 # memory handling
diff --git a/tools/power/pm-graph/config/custom-timeline-functions.cfg b/tools/power/pm-graph/config/custom-timeline-functions.cfg
index f8fcb06..4f80ad7 100644
--- a/tools/power/pm-graph/config/custom-timeline-functions.cfg
+++ b/tools/power/pm-graph/config/custom-timeline-functions.cfg
@@ -105,7 +105,7 @@ override-dev-timeline-functions: true
 #       example: [color=#CC00CC]
 #
 #   arglist: A list of arguments from registers/stack addresses. See URL:
-#            https://www.kernel.org/doc/Documentation/trace/kprobetrace.rst
+#            https://www.kernel.org/doc/Documentation/trace/kprobetrace.txt
 #
 #       example: cpu=%di:s32
 #
@@ -170,7 +170,7 @@ pm_restore_console:
 #       example: [color=#CC00CC]
 #
 #   arglist: A list of arguments from registers/stack addresses. See URL:
-#            https://www.kernel.org/doc/Documentation/trace/kprobetrace.rst
+#            https://www.kernel.org/doc/Documentation/trace/kprobetrace.txt
 #
 #       example: port=+36(%di):s32
 #
diff --git a/tools/power/pm-graph/sleepgraph.py b/tools/power/pm-graph/sleepgraph.py
index 343e800..52618f3 100755
--- a/tools/power/pm-graph/sleepgraph.py
+++ b/tools/power/pm-graph/sleepgraph.py
@@ -62,6 +62,10 @@ import gzip
 from threading import Thread
 from subprocess import call, Popen, PIPE
 
+def pprint(msg):
+	print(msg)
+	sys.stdout.flush()
+
 # ----------------- CLASSES --------------------
 
 # Class: SystemValues
@@ -70,7 +74,7 @@ from subprocess import call, Popen, PIPE
 #	 store system values and test parameters
 class SystemValues:
 	title = 'SleepGraph'
-	version = '5.1'
+	version = '5.2'
 	ansi = False
 	rs = 0
 	display = ''
@@ -111,8 +115,10 @@ class SystemValues:
 	mempath = '/dev/mem'
 	powerfile = '/sys/power/state'
 	mempowerfile = '/sys/power/mem_sleep'
+	diskpowerfile = '/sys/power/disk'
 	suspendmode = 'mem'
 	memmode = ''
+	diskmode = ''
 	hostname = 'localhost'
 	prefix = 'test'
 	teststamp = ''
@@ -139,6 +145,7 @@ class SystemValues:
 	useprocmon = False
 	notestrun = False
 	cgdump = False
+	devdump = False
 	mixedphaseheight = True
 	devprops = dict()
 	predelay = 0
@@ -265,7 +272,7 @@ class SystemValues:
 	def vprint(self, msg):
 		self.logmsg += msg+'\n'
 		if self.verbose or msg.startswith('WARNING:'):
-			print(msg)
+			pprint(msg)
 	def signalHandler(self, signum, frame):
 		if not self.result:
 			return
@@ -290,7 +297,7 @@ class SystemValues:
 			return True
 		if fatal:
 			msg = 'This command requires sysfs mount and root access'
-			print('ERROR: %s\n') % msg
+			pprint('ERROR: %s\n' % msg)
 			self.outputResult({'error':msg})
 			sys.exit(1)
 		return False
@@ -299,7 +306,7 @@ class SystemValues:
 			return True
 		if fatal:
 			msg = 'This command must be run as root'
-			print('ERROR: %s\n') % msg
+			pprint('ERROR: %s\n' % msg)
 			self.outputResult({'error':msg})
 			sys.exit(1)
 		return False
@@ -560,7 +567,7 @@ class SystemValues:
 		if len(self.kprobes) < 1:
 			return
 		if output:
-			print('    kprobe functions in this kernel:')
+			pprint('    kprobe functions in this kernel:')
 		# first test each kprobe
 		rejects = []
 		# sort kprobes: trace, ub-dev, custom, dev
@@ -582,7 +589,7 @@ class SystemValues:
 				else:
 					kpl[2].append(name)
 			if output:
-				print('         %s: %s' % (name, res))
+				pprint('         %s: %s' % (name, res))
 		kplist = kpl[0] + kpl[1] + kpl[2] + kpl[3]
 		# remove all failed ones from the list
 		for name in rejects:
@@ -596,7 +603,7 @@ class SystemValues:
 		if output:
 			check = self.fgetVal('kprobe_events')
 			linesack = (len(check.split('\n')) - 1) / 2
-			print('    kprobe functions enabled: %d/%d' % (linesack, linesout))
+			pprint('    kprobe functions enabled: %d/%d' % (linesack, linesout))
 		self.fsetVal('1', 'events/kprobes/enable')
 	def testKprobe(self, kname, kprobe):
 		self.fsetVal('0', 'events/kprobes/enable')
@@ -664,7 +671,7 @@ class SystemValues:
 		return False
 	def initFtrace(self):
 		self.printSystemInfo(False)
-		print('INITIALIZING FTRACE...')
+		pprint('INITIALIZING FTRACE...')
 		# turn trace off
 		self.fsetVal('0', 'tracing_on')
 		self.cleanupFtrace()
@@ -681,7 +688,8 @@ class SystemValues:
 		if self.bufsize > 0:
 			tgtsize = self.bufsize
 		elif self.usecallgraph or self.usedevsrc:
-			tgtsize = min(self.memfree, 3*1024*1024)
+			bmax = (1*1024*1024) if self.suspendmode == 'disk' else (3*1024*1024)
+			tgtsize = min(self.memfree, bmax)
 		else:
 			tgtsize = 65536
 		while not self.fsetVal('%d' % (tgtsize / cpus), 'buffer_size_kb'):
@@ -690,7 +698,7 @@ class SystemValues:
 			if tgtsize < 65536:
 				tgtsize = int(self.fgetVal('buffer_size_kb')) * cpus
 				break
-		print 'Setting trace buffers to %d kB (%d kB per cpu)' % (tgtsize, tgtsize/cpus)
+		pprint('Setting trace buffers to %d kB (%d kB per cpu)' % (tgtsize, tgtsize/cpus))
 		# initialize the callgraph trace
 		if(self.usecallgraph):
 			# set trace type
@@ -723,7 +731,7 @@ class SystemValues:
 			if self.usedevsrc:
 				for name in self.dev_tracefuncs:
 					self.defaultKprobe(name, self.dev_tracefuncs[name])
-			print('INITIALIZING KPROBES...')
+			pprint('INITIALIZING KPROBES...')
 			self.addKprobes(self.verbose)
 		if(self.usetraceevents):
 			# turn trace events on
@@ -846,7 +854,7 @@ class DevProps:
 	def out(self, dev):
 		return '%s,%s,%d;' % (dev, self.altname, self.async)
 	def debug(self, dev):
-		print '%s:\n\taltname = %s\n\t  async = %s' % (dev, self.altname, self.async)
+		pprint('%s:\n\taltname = %s\n\t  async = %s' % (dev, self.altname, self.async))
 	def altName(self, dev):
 		if not self.altname or self.altname == dev:
 			return dev
@@ -1217,16 +1225,8 @@ class Data:
 					self.tLow.append('%.0f'%(tL*1000))
 			lp = phase
 	def getTimeValues(self):
-		if 'suspend_machine' in self.dmesg:
-			sktime = (self.dmesg['suspend_machine']['end'] - \
-				self.tKernSus) * 1000
-		else:
-			sktime = (self.tSuspended - self.tKernSus) * 1000
-		if 'resume_machine' in self.dmesg:
-			rktime = (self.tKernRes - \
-				self.dmesg['resume_machine']['start']) * 1000
-		else:
-			rktime = (self.tKernRes - self.tResumed) * 1000
+		sktime = (self.tSuspended - self.tKernSus) * 1000
+		rktime = (self.tKernRes - self.tResumed) * 1000
 		return (sktime, rktime)
 	def setPhase(self, phase, ktime, isbegin, order=-1):
 		if(isbegin):
@@ -1379,14 +1379,38 @@ class Data:
 			if(list[child]['par'] == devname):
 				devlist.append(child)
 		return devlist
+	def maxDeviceNameSize(self, phase):
+		size = 0
+		for name in self.dmesg[phase]['list']:
+			if len(name) > size:
+				size = len(name)
+		return size
 	def printDetails(self):
 		sysvals.vprint('Timeline Details:')
 		sysvals.vprint('          test start: %f' % self.start)
 		sysvals.vprint('kernel suspend start: %f' % self.tKernSus)
+		tS = tR = False
 		for phase in self.sortedPhases():
-			dc = len(self.dmesg[phase]['list'])
-			sysvals.vprint('    %16s: %f - %f (%d devices)' % (phase, \
-				self.dmesg[phase]['start'], self.dmesg[phase]['end'], dc))
+			devlist = self.dmesg[phase]['list']
+			dc, ps, pe = len(devlist), self.dmesg[phase]['start'], self.dmesg[phase]['end']
+			if not tS and ps >= self.tSuspended:
+				sysvals.vprint('   machine suspended: %f' % self.tSuspended)
+				tS = True
+			if not tR and ps >= self.tResumed:
+				sysvals.vprint('     machine resumed: %f' % self.tResumed)
+				tR = True
+			sysvals.vprint('%20s: %f - %f (%d devices)' % (phase, ps, pe, dc))
+			if sysvals.devdump:
+				sysvals.vprint(''.join('-' for i in range(80)))
+				maxname = '%d' % self.maxDeviceNameSize(phase)
+				fmt = '%3d) %'+maxname+'s - %f - %f'
+				c = 1
+				for name in devlist:
+					s = devlist[name]['start']
+					e = devlist[name]['end']
+					sysvals.vprint(fmt % (c, name, s, e))
+					c += 1
+				sysvals.vprint(''.join('-' for i in range(80)))
 		sysvals.vprint('   kernel resume end: %f' % self.tKernRes)
 		sysvals.vprint('            test end: %f' % self.end)
 	def deviceChildrenAllPhases(self, devname):
@@ -1721,13 +1745,13 @@ class FTraceLine:
 		return len(str)/2
 	def debugPrint(self, info=''):
 		if self.isLeaf():
-			print(' -- %12.6f (depth=%02d): %s(); (%.3f us) %s' % (self.time, \
+			pprint(' -- %12.6f (depth=%02d): %s(); (%.3f us) %s' % (self.time, \
 				self.depth, self.name, self.length*1000000, info))
 		elif self.freturn:
-			print(' -- %12.6f (depth=%02d): %s} (%.3f us) %s' % (self.time, \
+			pprint(' -- %12.6f (depth=%02d): %s} (%.3f us) %s' % (self.time, \
 				self.depth, self.name, self.length*1000000, info))
 		else:
-			print(' -- %12.6f (depth=%02d): %s() { (%.3f us) %s' % (self.time, \
+			pprint(' -- %12.6f (depth=%02d): %s() { (%.3f us) %s' % (self.time, \
 				self.depth, self.name, self.length*1000000, info))
 	def startMarker(self):
 		# Is this the starting line of a suspend?
@@ -1868,7 +1892,7 @@ class FTraceCallGraph:
 			if warning and ('[make leaf]', line) not in info:
 				info.append(('', line))
 		if warning:
-			print 'WARNING: ftrace data missing, corrections made:'
+			pprint('WARNING: ftrace data missing, corrections made:')
 			for i in info:
 				t, obj = i
 				if obj:
@@ -1928,10 +1952,10 @@ class FTraceCallGraph:
 		id = 'task %s' % (self.pid)
 		window = '(%f - %f)' % (self.start, line.time)
 		if(self.depth < 0):
-			print('Data misalignment for '+id+\
+			pprint('Data misalignment for '+id+\
 				' (buffer overflow), ignoring this callback')
 		else:
-			print('Too much data for '+id+\
+			pprint('Too much data for '+id+\
 				' '+window+', ignoring this callback')
 	def slice(self, dev):
 		minicg = FTraceCallGraph(dev['pid'], self.sv)
@@ -1984,7 +2008,7 @@ class FTraceCallGraph:
 			elif l.isReturn():
 				if(l.depth not in stack):
 					if self.sv.verbose:
-						print 'Post Process Error: Depth missing'
+						pprint('Post Process Error: Depth missing')
 						l.debugPrint()
 					return False
 				# calculate call length from call/return lines
@@ -2001,7 +2025,7 @@ class FTraceCallGraph:
 			return True
 		elif(cnt < 0):
 			if self.sv.verbose:
-				print 'Post Process Error: Depth is less than 0'
+				pprint('Post Process Error: Depth is less than 0')
 			return False
 		# trace ended before call tree finished
 		return self.repair(cnt)
@@ -2060,20 +2084,20 @@ class FTraceCallGraph:
 			phase, myname = out
 			data.dmesg[phase]['list'][myname]['ftrace'] = self
 	def debugPrint(self, info=''):
-		print('%s pid=%d [%f - %f] %.3f us') % \
+		pprint('%s pid=%d [%f - %f] %.3f us' % \
 			(self.name, self.pid, self.start, self.end,
-			(self.end - self.start)*1000000)
+			(self.end - self.start)*1000000))
 		for l in self.list:
 			if l.isLeaf():
-				print('%f (%02d): %s(); (%.3f us)%s' % (l.time, \
+				pprint('%f (%02d): %s(); (%.3f us)%s' % (l.time, \
 					l.depth, l.name, l.length*1000000, info))
 			elif l.freturn:
-				print('%f (%02d): %s} (%.3f us)%s' % (l.time, \
+				pprint('%f (%02d): %s} (%.3f us)%s' % (l.time, \
 					l.depth, l.name, l.length*1000000, info))
 			else:
-				print('%f (%02d): %s() { (%.3f us)%s' % (l.time, \
+				pprint('%f (%02d): %s() { (%.3f us)%s' % (l.time, \
 					l.depth, l.name, l.length*1000000, info))
-		print(' ')
+		pprint(' ')
 
 class DevItem:
 	def __init__(self, test, phase, dev):
@@ -2658,7 +2682,8 @@ def parseTraceLog(live=False):
 		doError('%s does not exist' % sysvals.ftracefile)
 	if not live:
 		sysvals.setupAllKprobes()
-	krescalls = ['pm_notifier_call_chain', 'pm_restore_console']
+	ksuscalls = ['pm_prepare_console']
+	krescalls = ['pm_restore_console']
 	tracewatch = []
 	if sysvals.usekprobes:
 		tracewatch += ['sync_filesystems', 'freeze_processes', 'syscore_suspend',
@@ -2738,6 +2763,7 @@ def parseTraceLog(live=False):
 			testruns.append(testrun)
 			tp.parseStamp(data, sysvals)
 			data.setStart(t.time)
+			data.first_suspend_prepare = True
 			phase = data.setPhase('suspend_prepare', t.time, True)
 			continue
 		if(not data):
@@ -2797,11 +2823,12 @@ def parseTraceLog(live=False):
 					continue
 				# suspend_prepare start
 				elif(re.match('dpm_prepare\[.*', t.name)):
-					phase = 'suspend_prepare'
-					if not isbegin:
-						data.setPhase(phase, t.time, isbegin)
-					if isbegin and data.tKernSus == 0:
-						data.tKernSus = t.time
+					if isbegin and data.first_suspend_prepare:
+						data.first_suspend_prepare = False
+						if data.tKernSus == 0:
+							data.tKernSus = t.time
+						continue
+					phase = data.setPhase('suspend_prepare', t.time, isbegin)
 					continue
 				# suspend start
 				elif(re.match('dpm_suspend\[.*', t.name)):
@@ -2826,11 +2853,11 @@ def parseTraceLog(live=False):
 					else:
 						phase = data.setPhase('resume_machine', t.time, True)
 						if(sysvals.suspendmode in ['mem', 'disk']):
-							if 'suspend_machine' in data.dmesg:
-								data.dmesg['suspend_machine']['end'] = t.time
+							susp = phase.replace('resume', 'suspend')
+							if susp in data.dmesg:
+								data.dmesg[susp]['end'] = t.time
 							data.tSuspended = t.time
-						if data.tResumed == 0:
-							data.tResumed = t.time
+						data.tResumed = t.time
 					continue
 				# resume_noirq start
 				elif(re.match('dpm_resume_noirq\[.*', t.name)):
@@ -2911,6 +2938,9 @@ def parseTraceLog(live=False):
 					'cdata': kprobedata,
 					'proc': m_proc,
 				})
+				# start of kernel resume
+				if(phase == 'suspend_prepare' and kprobename in ksuscalls):
+					data.tKernSus = t.time
 			elif(t.freturn):
 				if(key not in tp.ktemp) or len(tp.ktemp[key]) < 1:
 					continue
@@ -2962,6 +2992,14 @@ def parseTraceLog(live=False):
 	if sysvals.usedevsrc or sysvals.useprocmon:
 		sysvals.mixedphaseheight = False
 
+	# expand phase boundaries so there are no gaps
+	for data in testdata:
+		lp = data.sortedPhases()[0]
+		for p in data.sortedPhases():
+			if(p != lp and not ('machine' in p and 'machine' in lp)):
+				data.dmesg[lp]['end'] = data.dmesg[p]['start']
+			lp = p
+
 	for i in range(len(testruns)):
 		test = testruns[i]
 		data = test.data
@@ -3044,7 +3082,7 @@ def parseTraceLog(live=False):
 		for p in sorted(phasedef, key=lambda k:phasedef[k]['order']):
 			if p not in data.dmesg:
 				if not terr:
-					print 'TEST%s FAILED: %s failed in %s phase' % (tn, sysvals.suspendmode, lp)
+					pprint('TEST%s FAILED: %s failed in %s phase' % (tn, sysvals.suspendmode, lp))
 					terr = '%s%s failed in %s phase' % (sysvals.suspendmode, tn, lp)
 					error.append(terr)
 					if data.tSuspended == 0:
@@ -3055,14 +3093,9 @@ def parseTraceLog(live=False):
 				sysvals.vprint('WARNING: phase "%s" is missing!' % p)
 			lp = p
 		if not terr and data.enterfail:
-			print 'test%s FAILED: enter %s failed with %s' % (tn, sysvals.suspendmode, data.enterfail)
+			pprint('test%s FAILED: enter %s failed with %s' % (tn, sysvals.suspendmode, data.enterfail))
 			terr = 'test%s failed to enter %s mode' % (tn, sysvals.suspendmode)
 			error.append(terr)
-		lp = data.sortedPhases()[0]
-		for p in data.sortedPhases():
-			if(p != lp and not ('machine' in p and 'machine' in lp)):
-				data.dmesg[lp]['end'] = data.dmesg[p]['start']
-			lp = p
 		if data.tSuspended == 0:
 			data.tSuspended = data.tKernRes
 		if data.tResumed == 0:
@@ -3151,7 +3184,7 @@ def loadKernelLog():
 	if data:
 		testruns.append(data)
 	if len(testruns) < 1:
-		print('ERROR: dmesg log has no suspend/resume data: %s' \
+		pprint('ERROR: dmesg log has no suspend/resume data: %s' \
 			% sysvals.dmesgfile)
 
 	# fix lines with same timestamp/function with the call and return swapped
@@ -3405,7 +3438,7 @@ def parseKernelLog(data):
 	for p in sorted(phasedef, key=lambda k:phasedef[k]['order']):
 		if p not in data.dmesg:
 			if not terr:
-				print 'TEST FAILED: %s failed in %s phase' % (sysvals.suspendmode, lp)
+				pprint('TEST FAILED: %s failed in %s phase' % (sysvals.suspendmode, lp))
 				terr = '%s failed in %s phase' % (sysvals.suspendmode, lp)
 				if data.tSuspended == 0:
 					data.tSuspended = data.dmesg[lp]['end']
@@ -3503,7 +3536,7 @@ def addCallgraphs(sv, hf, data):
 #	 Create summary html file for a series of tests
 # Arguments:
 #	 testruns: array of Data objects from parseTraceLog
-def createHTMLSummarySimple(testruns, htmlfile, folder):
+def createHTMLSummarySimple(testruns, htmlfile, title):
 	# write the html header first (html head, css code, up to body start)
 	html = '<!DOCTYPE html>\n<html>\n<head>\n\
 	<meta http-equiv="content-type" content="text/html; charset=UTF-8">\n\
@@ -3583,7 +3616,7 @@ def createHTMLSummarySimple(testruns, htmlfile, folder):
 	for ilk in sorted(cnt, reverse=True):
 		if cnt[ilk] > 0:
 			desc.append('%d %s' % (cnt[ilk], ilk))
-	html += '<div class="stamp">%s (%d tests: %s)</div>\n' % (folder, len(testruns), ', '.join(desc))
+	html += '<div class="stamp">%s (%d tests: %s)</div>\n' % (title, len(testruns), ', '.join(desc))
 	th = '\t<th>{0}</th>\n'
 	td = '\t<td>{0}</td>\n'
 	tdh = '\t<td{1}>{0}</td>\n'
@@ -3685,7 +3718,7 @@ def ordinal(value):
 #	 True if the html file was created, false if it failed
 def createHTML(testruns, testfail):
 	if len(testruns) < 1:
-		print('ERROR: Not enough test data to build a timeline')
+		pprint('ERROR: Not enough test data to build a timeline')
 		return
 
 	kerror = False
@@ -4535,18 +4568,18 @@ def setRuntimeSuspend(before=True):
 			sv.rstgt, sv.rsval, sv.rsdir = 'on', 'auto', 'enabled'
 		else:
 			sv.rstgt, sv.rsval, sv.rsdir = 'auto', 'on', 'disabled'
-		print('CONFIGURING RUNTIME SUSPEND...')
+		pprint('CONFIGURING RUNTIME SUSPEND...')
 		sv.rslist = deviceInfo(sv.rstgt)
 		for i in sv.rslist:
 			sv.setVal(sv.rsval, i)
-		print('runtime suspend %s on all devices (%d changed)' % (sv.rsdir, len(sv.rslist)))
-		print('waiting 5 seconds...')
+		pprint('runtime suspend %s on all devices (%d changed)' % (sv.rsdir, len(sv.rslist)))
+		pprint('waiting 5 seconds...')
 		time.sleep(5)
 	else:
 		# runtime suspend re-enable or re-disable
 		for i in sv.rslist:
 			sv.setVal(sv.rstgt, i)
-		print('runtime suspend settings restored on %d devices' % len(sv.rslist))
+		pprint('runtime suspend settings restored on %d devices' % len(sv.rslist))
 
 # Function: executeSuspend
 # Description:
@@ -4559,17 +4592,17 @@ def executeSuspend():
 	battery = True if getBattery() else False
 	# run these commands to prepare the system for suspend
 	if sysvals.display:
-		print('SET DISPLAY TO %s' % sysvals.display.upper())
+		pprint('SET DISPLAY TO %s' % sysvals.display.upper())
 		displayControl(sysvals.display)
 		time.sleep(1)
 	if sysvals.sync:
-		print('SYNCING FILESYSTEMS')
+		pprint('SYNCING FILESYSTEMS')
 		call('sync', shell=True)
 	# mark the start point in the kernel ring buffer just as we start
 	sysvals.initdmesg()
 	# start ftrace
 	if(sysvals.usecallgraph or sysvals.usetraceevents):
-		print('START TRACING')
+		pprint('START TRACING')
 		sysvals.fsetVal('1', 'tracing_on')
 		if sysvals.useprocmon:
 			pm.start()
@@ -4582,16 +4615,16 @@ def executeSuspend():
 			sysvals.fsetVal('WAIT END', 'trace_marker')
 		# start message
 		if sysvals.testcommand != '':
-			print('COMMAND START')
+			pprint('COMMAND START')
 		else:
 			if(sysvals.rtcwake):
-				print('SUSPEND START')
+				pprint('SUSPEND START')
 			else:
-				print('SUSPEND START (press a key to resume)')
+				pprint('SUSPEND START (press a key to resume)')
 		bat1 = getBattery() if battery else False
 		# set rtcwake
 		if(sysvals.rtcwake):
-			print('will issue an rtcwake in %d seconds' % sysvals.rtcwaketime)
+			pprint('will issue an rtcwake in %d seconds' % sysvals.rtcwaketime)
 			sysvals.rtcWakeAlarmOn()
 		# start of suspend trace marker
 		if(sysvals.usecallgraph or sysvals.usetraceevents):
@@ -4614,6 +4647,11 @@ def executeSuspend():
 				pf = open(sysvals.mempowerfile, 'w')
 				pf.write(sysvals.memmode)
 				pf.close()
+			if sysvals.diskmode and os.path.exists(sysvals.diskpowerfile):
+				mode = 'disk'
+				pf = open(sysvals.diskpowerfile, 'w')
+				pf.write(sysvals.diskmode)
+				pf.close()
 			pf = open(sysvals.powerfile, 'w')
 			pf.write(mode)
 			# execution will pause here
@@ -4629,7 +4667,7 @@ def executeSuspend():
 			time.sleep(sysvals.postdelay/1000.0)
 			sysvals.fsetVal('WAIT END', 'trace_marker')
 		# return from suspend
-		print('RESUME COMPLETE')
+		pprint('RESUME COMPLETE')
 		if(sysvals.usecallgraph or sysvals.usetraceevents):
 			sysvals.fsetVal('RESUME COMPLETE', 'trace_marker')
 		if(sysvals.suspendmode == 'mem' or sysvals.suspendmode == 'command'):
@@ -4644,11 +4682,11 @@ def executeSuspend():
 			pm.stop()
 		sysvals.fsetVal('0', 'tracing_on')
 	# grab a copy of the dmesg output
-	print('CAPTURING DMESG')
+	pprint('CAPTURING DMESG')
 	sysvals.getdmesg(testdata)
 	# grab a copy of the ftrace output
 	if(sysvals.usecallgraph or sysvals.usetraceevents):
-		print('CAPTURING TRACE')
+		pprint('CAPTURING TRACE')
 		op = sysvals.writeDatafileHeader(sysvals.ftracefile, testdata)
 		fp = open(tp+'trace', 'r')
 		for line in fp:
@@ -4692,15 +4730,15 @@ def yesno(val):
 #	 a list of USB device names to sysvals for better timeline readability
 def deviceInfo(output=''):
 	if not output:
-		print('LEGEND')
-		print('---------------------------------------------------------------------------------------------')
-		print('  A = async/sync PM queue (A/S)               C = runtime active children')
-		print('  R = runtime suspend enabled/disabled (E/D)  rACTIVE = runtime active (min/sec)')
-		print('  S = runtime status active/suspended (A/S)   rSUSPEND = runtime suspend (min/sec)')
-		print('  U = runtime usage count')
-		print('---------------------------------------------------------------------------------------------')
-		print('DEVICE                     NAME                       A R S U C    rACTIVE   rSUSPEND')
-		print('---------------------------------------------------------------------------------------------')
+		pprint('LEGEND\n'\
+		'---------------------------------------------------------------------------------------------\n'\
+		'  A = async/sync PM queue (A/S)               C = runtime active children\n'\
+		'  R = runtime suspend enabled/disabled (E/D)  rACTIVE = runtime active (min/sec)\n'\
+		'  S = runtime status active/suspended (A/S)   rSUSPEND = runtime suspend (min/sec)\n'\
+		'  U = runtime usage count\n'\
+		'---------------------------------------------------------------------------------------------\n'\
+		'DEVICE                     NAME                       A R S U C    rACTIVE   rSUSPEND\n'\
+		'---------------------------------------------------------------------------------------------')
 
 	res = []
 	tgtval = 'runtime_status'
@@ -4889,6 +4927,11 @@ def getModes():
 		fp.close()
 		if 'mem' in modes and not deep:
 			modes.remove('mem')
+	if('disk' in modes and os.path.exists(sysvals.diskpowerfile)):
+		fp = open(sysvals.diskpowerfile, 'r')
+		for m in string.split(fp.read()):
+			modes.append('disk-%s' % m.strip('[]'))
+		fp.close()
 	return modes
 
 # Function: dmidecode
@@ -5037,16 +5080,21 @@ def displayControl(cmd):
 		xset = 'sudo -u %s %s' % (sysvals.sudouser, xset)
 	if cmd == 'init':
 		ret = call(xset.format('dpms 0 0 0'), shell=True)
-		ret = call(xset.format('s off'), shell=True)
+		if not ret:
+			ret = call(xset.format('s off'), shell=True)
 	elif cmd == 'reset':
 		ret = call(xset.format('s reset'), shell=True)
 	elif cmd in ['on', 'off', 'standby', 'suspend']:
 		b4 = displayControl('stat')
 		ret = call(xset.format('dpms force %s' % cmd), shell=True)
-		curr = displayControl('stat')
-		sysvals.vprint('Display Switched: %s -> %s' % (b4, curr))
-		if curr != cmd:
-			sysvals.vprint('WARNING: Display failed to change to %s' % cmd)
+		if not ret:
+			curr = displayControl('stat')
+			sysvals.vprint('Display Switched: %s -> %s' % (b4, curr))
+			if curr != cmd:
+				sysvals.vprint('WARNING: Display failed to change to %s' % cmd)
+		if ret:
+			sysvals.vprint('WARNING: Display failed to change to %s with xset' % cmd)
+			return ret
 	elif cmd == 'stat':
 		fp = Popen(xset.format('q').split(' '), stdout=PIPE).stdout
 		ret = 'unknown'
@@ -5102,18 +5150,19 @@ def getFPDT(output):
 
 	table = struct.unpack('4sIBB6s8sI4sI', buf[0:36])
 	if(output):
-		print('')
-		print('Firmware Performance Data Table (%s)' % table[0])
-		print('                  Signature : %s' % table[0])
-		print('               Table Length : %u' % table[1])
-		print('                   Revision : %u' % table[2])
-		print('                   Checksum : 0x%x' % table[3])
-		print('                     OEM ID : %s' % table[4])
-		print('               OEM Table ID : %s' % table[5])
-		print('               OEM Revision : %u' % table[6])
-		print('                 Creator ID : %s' % table[7])
-		print('           Creator Revision : 0x%x' % table[8])
-		print('')
+		pprint('\n'\
+		'Firmware Performance Data Table (%s)\n'\
+		'                  Signature : %s\n'\
+		'               Table Length : %u\n'\
+		'                   Revision : %u\n'\
+		'                   Checksum : 0x%x\n'\
+		'                     OEM ID : %s\n'\
+		'               OEM Table ID : %s\n'\
+		'               OEM Revision : %u\n'\
+		'                 Creator ID : %s\n'\
+		'           Creator Revision : 0x%x\n'\
+		'' % (table[0], table[0], table[1], table[2], table[3],
+			table[4], table[5], table[6], table[7], table[8]))
 
 	if(table[0] != 'FPDT'):
 		if(output):
@@ -5139,22 +5188,24 @@ def getFPDT(output):
 			first = fp.read(8)
 		except:
 			if(output):
-				print('Bad address 0x%x in %s' % (addr, sysvals.mempath))
+				pprint('Bad address 0x%x in %s' % (addr, sysvals.mempath))
 			return [0, 0]
 		rechead = struct.unpack('4sI', first)
 		recdata = fp.read(rechead[1]-8)
 		if(rechead[0] == 'FBPT'):
 			record = struct.unpack('HBBIQQQQQ', recdata)
 			if(output):
-				print('%s (%s)' % (rectype[header[0]], rechead[0]))
-				print('                  Reset END : %u ns' % record[4])
-				print('  OS Loader LoadImage Start : %u ns' % record[5])
-				print(' OS Loader StartImage Start : %u ns' % record[6])
-				print('     ExitBootServices Entry : %u ns' % record[7])
-				print('      ExitBootServices Exit : %u ns' % record[8])
+				pprint('%s (%s)\n'\
+				'                  Reset END : %u ns\n'\
+				'  OS Loader LoadImage Start : %u ns\n'\
+				' OS Loader StartImage Start : %u ns\n'\
+				'     ExitBootServices Entry : %u ns\n'\
+				'      ExitBootServices Exit : %u ns'\
+				'' % (rectype[header[0]], rechead[0], record[4], record[5],
+					record[6], record[7], record[8]))
 		elif(rechead[0] == 'S3PT'):
 			if(output):
-				print('%s (%s)' % (rectype[header[0]], rechead[0]))
+				pprint('%s (%s)' % (rectype[header[0]], rechead[0]))
 			j = 0
 			while(j < len(recdata)):
 				prechead = struct.unpack('HBB', recdata[j:j+4])
@@ -5164,27 +5215,26 @@ def getFPDT(output):
 					record = struct.unpack('IIQQ', recdata[j:j+prechead[1]])
 					fwData[1] = record[2]
 					if(output):
-						print('    %s' % prectype[prechead[0]])
-						print('               Resume Count : %u' % \
-							record[1])
-						print('                 FullResume : %u ns' % \
-							record[2])
-						print('              AverageResume : %u ns' % \
-							record[3])
+						pprint('    %s\n'\
+						'               Resume Count : %u\n'\
+						'                 FullResume : %u ns\n'\
+						'              AverageResume : %u ns'\
+						'' % (prectype[prechead[0]], record[1],
+								record[2], record[3]))
 				elif(prechead[0] == 1):
 					record = struct.unpack('QQ', recdata[j+4:j+prechead[1]])
 					fwData[0] = record[1] - record[0]
 					if(output):
-						print('    %s' % prectype[prechead[0]])
-						print('               SuspendStart : %u ns' % \
-							record[0])
-						print('                 SuspendEnd : %u ns' % \
-							record[1])
-						print('                SuspendTime : %u ns' % \
-							fwData[0])
+						pprint('    %s\n'\
+						'               SuspendStart : %u ns\n'\
+						'                 SuspendEnd : %u ns\n'\
+						'                SuspendTime : %u ns'\
+						'' % (prectype[prechead[0]], record[0],
+								record[1], fwData[0]))
+
 				j += prechead[1]
 		if(output):
-			print('')
+			pprint('')
 		i += header[1]
 	fp.close()
 	return fwData
@@ -5198,22 +5248,22 @@ def getFPDT(output):
 def statusCheck(probecheck=False):
 	status = ''
 
-	print('Checking this system (%s)...' % platform.node())
+	pprint('Checking this system (%s)...' % platform.node())
 
 	# check we have root access
 	res = sysvals.colorText('NO (No features of this tool will work!)')
 	if(sysvals.rootCheck(False)):
 		res = 'YES'
-	print('    have root access: %s' % res)
+	pprint('    have root access: %s' % res)
 	if(res != 'YES'):
-		print('    Try running this script with sudo')
+		pprint('    Try running this script with sudo')
 		return 'missing root access'
 
 	# check sysfs is mounted
 	res = sysvals.colorText('NO (No features of this tool will work!)')
 	if(os.path.exists(sysvals.powerfile)):
 		res = 'YES'
-	print('    is sysfs mounted: %s' % res)
+	pprint('    is sysfs mounted: %s' % res)
 	if(res != 'YES'):
 		return 'sysfs is missing'
 
@@ -5225,10 +5275,10 @@ def statusCheck(probecheck=False):
 			res = 'YES'
 		else:
 			status = '%s mode is not supported' % sysvals.suspendmode
-		print('    is "%s" a valid power mode: %s' % (sysvals.suspendmode, res))
+		pprint('    is "%s" a valid power mode: %s' % (sysvals.suspendmode, res))
 		if(res == 'NO'):
-			print('      valid power modes are: %s' % modes)
-			print('      please choose one with -m')
+			pprint('      valid power modes are: %s' % modes)
+			pprint('      please choose one with -m')
 
 	# check if ftrace is available
 	res = sysvals.colorText('NO')
@@ -5237,7 +5287,7 @@ def statusCheck(probecheck=False):
 		res = 'YES'
 	elif(sysvals.usecallgraph):
 		status = 'ftrace is not properly supported'
-	print('    is ftrace supported: %s' % res)
+	pprint('    is ftrace supported: %s' % res)
 
 	# check if kprobes are available
 	res = sysvals.colorText('NO')
@@ -5246,7 +5296,7 @@ def statusCheck(probecheck=False):
 		res = 'YES'
 	else:
 		sysvals.usedevsrc = False
-	print('    are kprobes supported: %s' % res)
+	pprint('    are kprobes supported: %s' % res)
 
 	# what data source are we using
 	res = 'DMESG'
@@ -5257,7 +5307,7 @@ def statusCheck(probecheck=False):
 				sysvals.usetraceevents = False
 		if(sysvals.usetraceevents):
 			res = 'FTRACE (all trace events found)'
-	print('    timeline data source: %s' % res)
+	pprint('    timeline data source: %s' % res)
 
 	# check if rtcwake
 	res = sysvals.colorText('NO')
@@ -5265,7 +5315,7 @@ def statusCheck(probecheck=False):
 		res = 'YES'
 	elif(sysvals.rtcwake):
 		status = 'rtcwake is not properly supported'
-	print('    is rtcwake supported: %s' % res)
+	pprint('    is rtcwake supported: %s' % res)
 
 	if not probecheck:
 		return status
@@ -5290,7 +5340,7 @@ def statusCheck(probecheck=False):
 def doError(msg, help=False):
 	if(help == True):
 		printHelp()
-	print('ERROR: %s\n') % msg
+	pprint('ERROR: %s\n' % msg)
 	sysvals.outputResult({'error':msg})
 	sys.exit(1)
 
@@ -5333,7 +5383,7 @@ def getArgFloat(name, args, min, max, main=True):
 	return val
 
 def processData(live=False):
-	print('PROCESSING DATA')
+	pprint('PROCESSING DATA')
 	error = ''
 	if(sysvals.usetraceevents):
 		testruns, error = parseTraceLog(live)
@@ -5359,10 +5409,11 @@ def processData(live=False):
 			data.debugPrint()
 		sys.exit(0)
 	if len(testruns) < 1:
+		pprint('ERROR: Not enough test data to build a timeline')
 		return (testruns, {'error': 'timeline generation failed'})
 	sysvals.vprint('Creating the html timeline (%s)...' % sysvals.htmlfile)
 	createHTML(testruns, error)
-	print('DONE')
+	pprint('DONE')
 	data = testruns[0]
 	stamp = data.stamp
 	stamp['suspend'], stamp['resume'] = data.getTimeValues()
@@ -5516,7 +5567,7 @@ def data_from_html(file, outpath, devlist=False):
 def runSummary(subdir, local=True, genhtml=False):
 	inpath = os.path.abspath(subdir)
 	outpath = os.path.abspath('.') if local else inpath
-	print('Generating a summary of folder "%s"' % inpath)
+	pprint('Generating a summary of folder "%s"' % inpath)
 	if genhtml:
 		for dirname, dirnames, filenames in os.walk(subdir):
 			sysvals.dmesgfile = sysvals.ftracefile = sysvals.htmlfile = ''
@@ -5528,11 +5579,12 @@ def runSummary(subdir, local=True, genhtml=False):
 			sysvals.setOutputFile()
 			if sysvals.ftracefile and sysvals.htmlfile and \
 				not os.path.exists(sysvals.htmlfile):
-				print('FTRACE: %s' % sysvals.ftracefile)
+				pprint('FTRACE: %s' % sysvals.ftracefile)
 				if sysvals.dmesgfile:
-					print('DMESG : %s' % sysvals.dmesgfile)
+					pprint('DMESG : %s' % sysvals.dmesgfile)
 				rerunTest()
 	testruns = []
+	desc = {'host':[],'mode':[],'kernel':[]}
 	for dirname, dirnames, filenames in os.walk(subdir):
 		for filename in filenames:
 			if(not re.match('.*.html', filename)):
@@ -5541,9 +5593,16 @@ def runSummary(subdir, local=True, genhtml=False):
 			if(not data):
 				continue
 			testruns.append(data)
+			for key in desc:
+				if data[key] not in desc[key]:
+					desc[key].append(data[key])
 	outfile = os.path.join(outpath, 'summary.html')
-	print('Summary file: %s' % outfile)
-	createHTMLSummarySimple(testruns, outfile, inpath)
+	pprint('Summary file: %s' % outfile)
+	if len(desc['host']) == len(desc['mode']) == len(desc['kernel']) == 1:
+		title = '%s %s %s' % (desc['host'][0], desc['kernel'][0], desc['mode'][0])
+	else:
+		title = inpath
+	createHTMLSummarySimple(testruns, outfile, title)
 
 # Function: checkArgBool
 # Description:
@@ -5758,85 +5817,86 @@ def configFromFile(file):
 # Description:
 #	 print out the help text
 def printHelp():
-	print('')
-	print('%s v%s' % (sysvals.title, sysvals.version))
-	print('Usage: sudo sleepgraph <options> <commands>')
-	print('')
-	print('Description:')
-	print('  This tool is designed to assist kernel and OS developers in optimizing')
-	print('  their linux stack\'s suspend/resume time. Using a kernel image built')
-	print('  with a few extra options enabled, the tool will execute a suspend and')
-	print('  capture dmesg and ftrace data until resume is complete. This data is')
-	print('  transformed into a device timeline and an optional callgraph to give')
-	print('  a detailed view of which devices/subsystems are taking the most')
-	print('  time in suspend/resume.')
-	print('')
-	print('  If no specific command is given, the default behavior is to initiate')
-	print('  a suspend/resume and capture the dmesg/ftrace output as an html timeline.')
-	print('')
-	print('  Generates output files in subdirectory: suspend-yymmdd-HHMMSS')
-	print('   HTML output:                    <hostname>_<mode>.html')
-	print('   raw dmesg output:               <hostname>_<mode>_dmesg.txt')
-	print('   raw ftrace output:              <hostname>_<mode>_ftrace.txt')
-	print('')
-	print('Options:')
-	print('   -h           Print this help text')
-	print('   -v           Print the current tool version')
-	print('   -config fn   Pull arguments and config options from file fn')
-	print('   -verbose     Print extra information during execution and analysis')
-	print('   -m mode      Mode to initiate for suspend (default: %s)') % (sysvals.suspendmode)
-	print('   -o name      Overrides the output subdirectory name when running a new test')
-	print('                default: suspend-{date}-{time}')
-	print('   -rtcwake t   Wakeup t seconds after suspend, set t to "off" to disable (default: 15)')
-	print('   -addlogs     Add the dmesg and ftrace logs to the html output')
-	print('   -srgap       Add a visible gap in the timeline between sus/res (default: disabled)')
-	print('   -skiphtml    Run the test and capture the trace logs, but skip the timeline (default: disabled)')
-	print('   -result fn   Export a results table to a text file for parsing.')
-	print('  [testprep]')
-	print('   -sync        Sync the filesystems before starting the test')
-	print('   -rs on/off   Enable/disable runtime suspend for all devices, restore all after test')
-	print('   -display m   Change the display mode to m for the test (on/off/standby/suspend)')
-	print('  [advanced]')
-	print('   -gzip        Gzip the trace and dmesg logs to save space')
-	print('   -cmd {s}     Run the timeline over a custom command, e.g. "sync -d"')
-	print('   -proc        Add usermode process info into the timeline (default: disabled)')
-	print('   -dev         Add kernel function calls and threads to the timeline (default: disabled)')
-	print('   -x2          Run two suspend/resumes back to back (default: disabled)')
-	print('   -x2delay t   Include t ms delay between multiple test runs (default: 0 ms)')
-	print('   -predelay t  Include t ms delay before 1st suspend (default: 0 ms)')
-	print('   -postdelay t Include t ms delay after last resume (default: 0 ms)')
-	print('   -mindev ms   Discard all device blocks shorter than ms milliseconds (e.g. 0.001 for us)')
-	print('   -multi n d   Execute <n> consecutive tests at <d> seconds intervals. The outputs will')
-	print('                be created in a new subdirectory with a summary page.')
-	print('  [debug]')
-	print('   -f           Use ftrace to create device callgraphs (default: disabled)')
-	print('   -maxdepth N  limit the callgraph data to N call levels (default: 0=all)')
-	print('   -expandcg    pre-expand the callgraph data in the html output (default: disabled)')
-	print('   -fadd file   Add functions to be graphed in the timeline from a list in a text file')
-	print('   -filter "d1,d2,..." Filter out all but this comma-delimited list of device names')
-	print('   -mincg  ms   Discard all callgraphs shorter than ms milliseconds (e.g. 0.001 for us)')
-	print('   -cgphase P   Only show callgraph data for phase P (e.g. suspend_late)')
-	print('   -cgtest N    Only show callgraph data for test N (e.g. 0 or 1 in an x2 run)')
-	print('   -timeprec N  Number of significant digits in timestamps (0:S, [3:ms], 6:us)')
-	print('   -cgfilter S  Filter the callgraph output in the timeline')
-	print('   -cgskip file Callgraph functions to skip, off to disable (default: cgskip.txt)')
-	print('   -bufsize N   Set trace buffer size to N kilo-bytes (default: all of free memory)')
-	print('')
-	print('Other commands:')
-	print('   -modes       List available suspend modes')
-	print('   -status      Test to see if the system is enabled to run this tool')
-	print('   -fpdt        Print out the contents of the ACPI Firmware Performance Data Table')
-	print('   -battery     Print out battery info (if available)')
-	print('   -x<mode>     Test xset by toggling the given mode (on/off/standby/suspend)')
-	print('   -sysinfo     Print out system info extracted from BIOS')
-	print('   -devinfo     Print out the pm settings of all devices which support runtime suspend')
-	print('   -flist       Print the list of functions currently being captured in ftrace')
-	print('   -flistall    Print all functions capable of being captured in ftrace')
-	print('   -summary dir Create a summary of tests in this dir [-genhtml builds missing html]')
-	print('  [redo]')
-	print('   -ftrace ftracefile  Create HTML output using ftrace input (used with -dmesg)')
-	print('   -dmesg dmesgfile    Create HTML output using dmesg (used with -ftrace)')
-	print('')
+	pprint('\n%s v%s\n'\
+	'Usage: sudo sleepgraph <options> <commands>\n'\
+	'\n'\
+	'Description:\n'\
+	'  This tool is designed to assist kernel and OS developers in optimizing\n'\
+	'  their linux stack\'s suspend/resume time. Using a kernel image built\n'\
+	'  with a few extra options enabled, the tool will execute a suspend and\n'\
+	'  capture dmesg and ftrace data until resume is complete. This data is\n'\
+	'  transformed into a device timeline and an optional callgraph to give\n'\
+	'  a detailed view of which devices/subsystems are taking the most\n'\
+	'  time in suspend/resume.\n'\
+	'\n'\
+	'  If no specific command is given, the default behavior is to initiate\n'\
+	'  a suspend/resume and capture the dmesg/ftrace output as an html timeline.\n'\
+	'\n'\
+	'  Generates output files in subdirectory: suspend-yymmdd-HHMMSS\n'\
+	'   HTML output:                    <hostname>_<mode>.html\n'\
+	'   raw dmesg output:               <hostname>_<mode>_dmesg.txt\n'\
+	'   raw ftrace output:              <hostname>_<mode>_ftrace.txt\n'\
+	'\n'\
+	'Options:\n'\
+	'   -h           Print this help text\n'\
+	'   -v           Print the current tool version\n'\
+	'   -config fn   Pull arguments and config options from file fn\n'\
+	'   -verbose     Print extra information during execution and analysis\n'\
+	'   -m mode      Mode to initiate for suspend (default: %s)\n'\
+	'   -o name      Overrides the output subdirectory name when running a new test\n'\
+	'                default: suspend-{date}-{time}\n'\
+	'   -rtcwake t   Wakeup t seconds after suspend, set t to "off" to disable (default: 15)\n'\
+	'   -addlogs     Add the dmesg and ftrace logs to the html output\n'\
+	'   -srgap       Add a visible gap in the timeline between sus/res (default: disabled)\n'\
+	'   -skiphtml    Run the test and capture the trace logs, but skip the timeline (default: disabled)\n'\
+	'   -result fn   Export a results table to a text file for parsing.\n'\
+	'  [testprep]\n'\
+	'   -sync        Sync the filesystems before starting the test\n'\
+	'   -rs on/off   Enable/disable runtime suspend for all devices, restore all after test\n'\
+	'   -display m   Change the display mode to m for the test (on/off/standby/suspend)\n'\
+	'  [advanced]\n'\
+	'   -gzip        Gzip the trace and dmesg logs to save space\n'\
+	'   -cmd {s}     Run the timeline over a custom command, e.g. "sync -d"\n'\
+	'   -proc        Add usermode process info into the timeline (default: disabled)\n'\
+	'   -dev         Add kernel function calls and threads to the timeline (default: disabled)\n'\
+	'   -x2          Run two suspend/resumes back to back (default: disabled)\n'\
+	'   -x2delay t   Include t ms delay between multiple test runs (default: 0 ms)\n'\
+	'   -predelay t  Include t ms delay before 1st suspend (default: 0 ms)\n'\
+	'   -postdelay t Include t ms delay after last resume (default: 0 ms)\n'\
+	'   -mindev ms   Discard all device blocks shorter than ms milliseconds (e.g. 0.001 for us)\n'\
+	'   -multi n d   Execute <n> consecutive tests at <d> seconds intervals. The outputs will\n'\
+	'                be created in a new subdirectory with a summary page.\n'\
+	'  [debug]\n'\
+	'   -f           Use ftrace to create device callgraphs (default: disabled)\n'\
+	'   -maxdepth N  limit the callgraph data to N call levels (default: 0=all)\n'\
+	'   -expandcg    pre-expand the callgraph data in the html output (default: disabled)\n'\
+	'   -fadd file   Add functions to be graphed in the timeline from a list in a text file\n'\
+	'   -filter "d1,d2,..." Filter out all but this comma-delimited list of device names\n'\
+	'   -mincg  ms   Discard all callgraphs shorter than ms milliseconds (e.g. 0.001 for us)\n'\
+	'   -cgphase P   Only show callgraph data for phase P (e.g. suspend_late)\n'\
+	'   -cgtest N    Only show callgraph data for test N (e.g. 0 or 1 in an x2 run)\n'\
+	'   -timeprec N  Number of significant digits in timestamps (0:S, [3:ms], 6:us)\n'\
+	'   -cgfilter S  Filter the callgraph output in the timeline\n'\
+	'   -cgskip file Callgraph functions to skip, off to disable (default: cgskip.txt)\n'\
+	'   -bufsize N   Set trace buffer size to N kilo-bytes (default: all of free memory)\n'\
+	'   -devdump     Print out all the raw device data for each phase\n'\
+	'   -cgdump      Print out all the raw callgraph data\n'\
+	'\n'\
+	'Other commands:\n'\
+	'   -modes       List available suspend modes\n'\
+	'   -status      Test to see if the system is enabled to run this tool\n'\
+	'   -fpdt        Print out the contents of the ACPI Firmware Performance Data Table\n'\
+	'   -battery     Print out battery info (if available)\n'\
+	'   -x<mode>     Test xset by toggling the given mode (on/off/standby/suspend)\n'\
+	'   -sysinfo     Print out system info extracted from BIOS\n'\
+	'   -devinfo     Print out the pm settings of all devices which support runtime suspend\n'\
+	'   -flist       Print the list of functions currently being captured in ftrace\n'\
+	'   -flistall    Print all functions capable of being captured in ftrace\n'\
+	'   -summary dir Create a summary of tests in this dir [-genhtml builds missing html]\n'\
+	'  [redo]\n'\
+	'   -ftrace ftracefile  Create HTML output using ftrace input (used with -dmesg)\n'\
+	'   -dmesg dmesgfile    Create HTML output using dmesg (used with -ftrace)\n'\
+	'' % (sysvals.title, sysvals.version, sysvals.suspendmode))
 	return True
 
 # ----------------- MAIN --------------------
@@ -5866,7 +5926,7 @@ if __name__ == '__main__':
 			printHelp()
 			sys.exit(0)
 		elif(arg == '-v'):
-			print("Version %s" % sysvals.version)
+			pprint("Version %s" % sysvals.version)
 			sys.exit(0)
 		elif(arg == '-x2'):
 			sysvals.execcount = 2
@@ -5882,6 +5942,8 @@ if __name__ == '__main__':
 			sysvals.skiphtml = True
 		elif(arg == '-cgdump'):
 			sysvals.cgdump = True
+		elif(arg == '-devdump'):
+			sysvals.devdump = True
 		elif(arg == '-genhtml'):
 			genhtml = True
 		elif(arg == '-addlogs'):
@@ -6088,9 +6150,9 @@ if __name__ == '__main__':
 		elif(cmd == 'battery'):
 			out = getBattery()
 			if out:
-				print 'AC Connect    : %s\nBattery Charge: %d' % out
+				pprint('AC Connect    : %s\nBattery Charge: %d' % out)
 			else:
-				print 'no battery found'
+				pprint('no battery found')
 				ret = 1
 		elif(cmd == 'sysinfo'):
 			sysvals.printSystemInfo(True)
@@ -6108,7 +6170,7 @@ if __name__ == '__main__':
 			sysvals.verbose = True
 			ret = displayControl(cmd[1:])
 		elif(cmd == 'xstat'):
-			print 'Display Status: %s' % displayControl('stat').upper()
+			pprint('Display Status: %s' % displayControl('stat').upper())
 		sys.exit(ret)
 
 	# if instructed, re-analyze existing data files
@@ -6122,13 +6184,10 @@ if __name__ == '__main__':
 	if(error):
 		doError(error)
 
-	# extract mem modes and convert
+	# extract mem/disk extra modes and convert
 	mode = sysvals.suspendmode
-	if 'mem' == mode[:3]:
-		if '-' in mode:
-			memmode = mode.split('-')[-1]
-		else:
-			memmode = 'deep'
+	if mode.startswith('mem'):
+		memmode = mode.split('-', 1)[-1] if '-' in mode else 'deep'
 		if memmode == 'shallow':
 			mode = 'standby'
 		elif memmode ==  's2idle':
@@ -6137,6 +6196,9 @@ if __name__ == '__main__':
 			mode = 'mem'
 		sysvals.memmode = memmode
 		sysvals.suspendmode = mode
+	if mode.startswith('disk-'):
+		sysvals.diskmode = mode.split('-', 1)[-1]
+		sysvals.suspendmode = 'disk'
 
 	sysvals.systemInfo(dmidecode(sysvals.mempath))
 
@@ -6153,13 +6215,13 @@ if __name__ == '__main__':
 			os.mkdir(sysvals.outdir)
 		for i in range(sysvals.multitest['count']):
 			if(i != 0):
-				print('Waiting %d seconds...' % (sysvals.multitest['delay']))
+				pprint('Waiting %d seconds...' % (sysvals.multitest['delay']))
 				time.sleep(sysvals.multitest['delay'])
-			print('TEST (%d/%d) START' % (i+1, sysvals.multitest['count']))
+			pprint('TEST (%d/%d) START' % (i+1, sysvals.multitest['count']))
 			fmt = 'suspend-%y%m%d-%H%M%S'
 			sysvals.testdir = os.path.join(sysvals.outdir, datetime.now().strftime(fmt))
 			ret = runTest(i+1)
-			print('TEST (%d/%d) COMPLETE' % (i+1, sysvals.multitest['count']))
+			pprint('TEST (%d/%d) COMPLETE' % (i+1, sysvals.multitest['count']))
 			sysvals.logmsg = ''
 		if not sysvals.skiphtml:
 			runSummary(sysvals.outdir, False, False)
-- 
cgit v1.1


From bde091ece2ad3f78d0896870f041bc52761ea3c1 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Wed, 10 Oct 2018 14:16:38 +0200
Subject: cpuidle: menu: Simplify checks related to the polling state

After some recent menu governor changes, the promotion of the
"polling" state to a physical one is mostly controlled by the
latency limit (resulting from the "interactivity" factor) and
not by the time to the closest timer event, so it should be
sufficient to check the exit latency of that state for this
purpose (of course, its target residency still needs to be
within the next timer event range for energy-efficiency).

Also, the physical state the "polling" one is promoted to need not
be the next one in principle (in case the next state is disabled,
for example).

For these reasons, simplify the checks made to decide whether or
not to promote the "polling" state to a physical one and update
the target idle duration when it is promoted in case the residency
of the new state turns out to be above the tick boundary (in which
case there is no reason to stop the tick).

Tested-by: Doug Smythies <dsmythies@telus.net>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/governors/menu.c | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index b754296..4ed8004 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -371,12 +371,12 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 		if (s->target_residency > predicted_us) {
 			/*
 			 * Use a physical idle state, not busy polling, unless
-			 * a timer is going to trigger really really soon.
+			 * a timer is going to trigger soon enough.
 			 */
 			if ((drv->states[idx].flags & CPUIDLE_FLAG_POLLING) &&
-			    i == idx + 1 && latency_req > s->exit_latency &&
-			    data->next_timer_us > max_t(unsigned int, 20,
-							s->target_residency)) {
+			    s->exit_latency <= latency_req &&
+			    s->target_residency <= data->next_timer_us) {
+				predicted_us = s->target_residency;
 				idx = i;
 				break;
 			}
-- 
cgit v1.1


From 29523f095397637edca60c627bc3e5c25a02c40f Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Mon, 15 Oct 2018 10:37:19 -0700
Subject: ACPI / CPPC: Add support for guaranteed performance

The Continuous Performance Control package may contain an optional
guaranteed performance field.

Add support to read guaranteed performance from _CPC.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/acpi/cppc_acpi.c | 8 ++++++--
 include/acpi/cppc_acpi.h | 1 +
 2 files changed, 7 insertions(+), 2 deletions(-)

diff --git a/drivers/acpi/cppc_acpi.c b/drivers/acpi/cppc_acpi.c
index d9ce4b1..217a782 100644
--- a/drivers/acpi/cppc_acpi.c
+++ b/drivers/acpi/cppc_acpi.c
@@ -1061,9 +1061,9 @@ int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps)
 {
 	struct cpc_desc *cpc_desc = per_cpu(cpc_desc_ptr, cpunum);
 	struct cpc_register_resource *highest_reg, *lowest_reg,
-		*lowest_non_linear_reg, *nominal_reg,
+		*lowest_non_linear_reg, *nominal_reg, *guaranteed_reg,
 		*low_freq_reg = NULL, *nom_freq_reg = NULL;
-	u64 high, low, nom, min_nonlinear, low_f = 0, nom_f = 0;
+	u64 high, low, guaranteed, nom, min_nonlinear, low_f = 0, nom_f = 0;
 	int pcc_ss_id = per_cpu(cpu_pcc_subspace_idx, cpunum);
 	struct cppc_pcc_data *pcc_ss_data = NULL;
 	int ret = 0, regs_in_pcc = 0;
@@ -1079,6 +1079,7 @@ int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps)
 	nominal_reg = &cpc_desc->cpc_regs[NOMINAL_PERF];
 	low_freq_reg = &cpc_desc->cpc_regs[LOWEST_FREQ];
 	nom_freq_reg = &cpc_desc->cpc_regs[NOMINAL_FREQ];
+	guaranteed_reg = &cpc_desc->cpc_regs[GUARANTEED_PERF];
 
 	/* Are any of the regs PCC ?*/
 	if (CPC_IN_PCC(highest_reg) || CPC_IN_PCC(lowest_reg) ||
@@ -1107,6 +1108,9 @@ int cppc_get_perf_caps(int cpunum, struct cppc_perf_caps *perf_caps)
 	cpc_read(cpunum, nominal_reg, &nom);
 	perf_caps->nominal_perf = nom;
 
+	cpc_read(cpunum, guaranteed_reg, &guaranteed);
+	perf_caps->guaranteed_perf = guaranteed;
+
 	cpc_read(cpunum, lowest_non_linear_reg, &min_nonlinear);
 	perf_caps->lowest_nonlinear_perf = min_nonlinear;
 
diff --git a/include/acpi/cppc_acpi.h b/include/acpi/cppc_acpi.h
index 8e0b825..cf59e62 100644
--- a/include/acpi/cppc_acpi.h
+++ b/include/acpi/cppc_acpi.h
@@ -104,6 +104,7 @@ enum cppc_regs {
  * today.
  */
 struct cppc_perf_caps {
+	u32 guaranteed_perf;
 	u32 highest_perf;
 	u32 nominal_perf;
 	u32 lowest_perf;
-- 
cgit v1.1


From 86d333a8cc7f66c2314ab1e147834a1cd95ec2de Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Mon, 15 Oct 2018 10:37:20 -0700
Subject: cpufreq: intel_pstate: Add base_frequency attribute

Expose base_frequency to user space via cpufreq sysfs when HWP is in
use.

This HWP base frequency is read from the ACPI _CPC object if present,
or from the HWP Capabilities MSR otherwise.

On the majority of the HWP platforms the _CPC object will point to
the HWP Capabilities MSR using the "Functional Fixed Hardware"
address space type.  The address space type also can simply be
ACPI_TYPE_INTEGER, however, in which case the platform firmware
can set its value at the initialization time based on the system
constraints.

Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
[ rjw: Changelog ]
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/intel_pstate.c | 38 ++++++++++++++++++++++++++++++++++++++
 1 file changed, 38 insertions(+)

diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c
index b6a1aad..2a99e2f 100644
--- a/drivers/cpufreq/intel_pstate.c
+++ b/drivers/cpufreq/intel_pstate.c
@@ -373,10 +373,28 @@ static void intel_pstate_set_itmt_prio(int cpu)
 		}
 	}
 }
+
+static int intel_pstate_get_cppc_guranteed(int cpu)
+{
+	struct cppc_perf_caps cppc_perf;
+	int ret;
+
+	ret = cppc_get_perf_caps(cpu, &cppc_perf);
+	if (ret)
+		return ret;
+
+	return cppc_perf.guaranteed_perf;
+}
+
 #else
 static void intel_pstate_set_itmt_prio(int cpu)
 {
 }
+
+static int intel_pstate_get_cppc_guranteed(int cpu)
+{
+	return -ENOTSUPP;
+}
 #endif
 
 static void intel_pstate_init_acpi_perf_limits(struct cpufreq_policy *policy)
@@ -699,9 +717,29 @@ static ssize_t show_energy_performance_preference(
 
 cpufreq_freq_attr_rw(energy_performance_preference);
 
+static ssize_t show_base_frequency(struct cpufreq_policy *policy, char *buf)
+{
+	struct cpudata *cpu;
+	u64 cap;
+	int ratio;
+
+	ratio = intel_pstate_get_cppc_guranteed(policy->cpu);
+	if (ratio <= 0) {
+		rdmsrl_on_cpu(policy->cpu, MSR_HWP_CAPABILITIES, &cap);
+		ratio = HWP_GUARANTEED_PERF(cap);
+	}
+
+	cpu = all_cpu_data[policy->cpu];
+
+	return sprintf(buf, "%d\n", ratio * cpu->pstate.scaling);
+}
+
+cpufreq_freq_attr_ro(base_frequency);
+
 static struct freq_attr *hwp_cpufreq_attrs[] = {
 	&energy_performance_preference,
 	&energy_performance_available_preferences,
+	&base_frequency,
 	NULL,
 };
 
-- 
cgit v1.1


From 4b73d334c50dcde25437be52a01e2f5cbf5f9e21 Mon Sep 17 00:00:00 2001
From: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Date: Mon, 15 Oct 2018 10:37:21 -0700
Subject: Documentation: intel_pstate: Add base_frequency information

Updated documentation to explain base_frequency attribute.

Suggested-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Signed-off-by: Srinivas Pandruvada <srinivas.pandruvada@linux.intel.com>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 Documentation/admin-guide/pm/intel_pstate.rst | 7 +++++++
 1 file changed, 7 insertions(+)

diff --git a/Documentation/admin-guide/pm/intel_pstate.rst b/Documentation/admin-guide/pm/intel_pstate.rst
index 8f1d3de..ac6f5c5 100644
--- a/Documentation/admin-guide/pm/intel_pstate.rst
+++ b/Documentation/admin-guide/pm/intel_pstate.rst
@@ -465,6 +465,13 @@ Next, the following policy attributes have special meaning if
 	policy for the time interval between the last two invocations of the
 	driver's utilization update callback by the CPU scheduler for that CPU.
 
+One more policy attribute is present if the `HWP feature is enabled in the
+processor <Active Mode With HWP_>`_:
+
+``base_frequency``
+	Shows the base frequency of the CPU. Any frequency above this will be
+	in the turbo frequency range.
+
 The meaning of these attributes in the `passive mode <Passive Mode_>`_ is the
 same as for other scaling drivers.
 
-- 
cgit v1.1


From da5e79bc70b84971d2b3a55fb252e34e51d81d48 Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 15 Oct 2018 23:21:05 +0200
Subject: cpufreq: conservative: Take limits changes into account properly

If the policy limits change between invocations of cs_dbs_update(),
the requested frequency value stored in dbs_info may not be updated
and the function may use a stale value of it next time.  Moreover, if
idle periods are takem into account by cs_dbs_update(), the requested
frequency value stored in dbs_info may be below the min policy limit,
which is incorrect.

To fix these problems, always update the requested frequency value
in dbs_info along with the local copy of it when the previous
requested frequency is beyond the policy limits and avoid decreasing
the requested frequency below the min policy limit when taking
idle periods into account.

Fixes: abb6627910a1 (cpufreq: conservative: Fix next frequency selection)
Fixes: 00bfe05889e9 (cpufreq: conservative: Decrease frequency faster for deferred updates)
Reported-by: Waldemar Rymarkiewicz <waldemarx.rymarkiewicz@intel.com>
Cc: All applicable <stable@vger.kernel.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
Acked-by: Waldemar Rymarkiewicz <waldemarx.rymarkiewicz@intel.com>
Acked-by: Viresh Kumar <viresh.kumar@linaro.org>
---
 drivers/cpufreq/cpufreq_conservative.c | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c
index f20f20a..4268f87 100644
--- a/drivers/cpufreq/cpufreq_conservative.c
+++ b/drivers/cpufreq/cpufreq_conservative.c
@@ -80,8 +80,10 @@ static unsigned int cs_dbs_update(struct cpufreq_policy *policy)
 	 * changed in the meantime, so fall back to current frequency in that
 	 * case.
 	 */
-	if (requested_freq > policy->max || requested_freq < policy->min)
+	if (requested_freq > policy->max || requested_freq < policy->min) {
 		requested_freq = policy->cur;
+		dbs_info->requested_freq = requested_freq;
+	}
 
 	freq_step = get_freq_step(cs_tuners, policy);
 
@@ -92,7 +94,7 @@ static unsigned int cs_dbs_update(struct cpufreq_policy *policy)
 	if (policy_dbs->idle_periods < UINT_MAX) {
 		unsigned int freq_steps = policy_dbs->idle_periods * freq_step;
 
-		if (requested_freq > freq_steps)
+		if (requested_freq > policy->min + freq_steps)
 			requested_freq -= freq_steps;
 		else
 			requested_freq = policy->min;
-- 
cgit v1.1


From a3ade38d5a8b101eed7b8ebfd427534404943b8a Mon Sep 17 00:00:00 2001
From: Christoph Hellwig <hch@lst.de>
Date: Sat, 13 Oct 2018 17:17:00 +0200
Subject: cpufreq: tegra186: don't pass GFP_DMA32 to dma_alloc_coherent()

The DMA API does its own zone decisions based on the coherent_dma_mask.

Signed-off-by: Christoph Hellwig <hch@lst.de>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpufreq/tegra186-cpufreq.c | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/drivers/cpufreq/tegra186-cpufreq.c b/drivers/cpufreq/tegra186-cpufreq.c
index 1f59966..f1e0902 100644
--- a/drivers/cpufreq/tegra186-cpufreq.c
+++ b/drivers/cpufreq/tegra186-cpufreq.c
@@ -121,7 +121,7 @@ static struct cpufreq_frequency_table *init_vhint_table(
 	void *virt;
 
 	virt = dma_alloc_coherent(bpmp->dev, sizeof(*data), &phys,
-				  GFP_KERNEL | GFP_DMA32);
+				  GFP_KERNEL);
 	if (!virt)
 		return ERR_PTR(-ENOMEM);
 
-- 
cgit v1.1


From 12b65eadf0bd41cf0dbf460f13bcd310a81afe2b Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 15 Oct 2018 13:51:45 +0200
Subject: cpuidle: menu: Drop redundant comparison

Since the correction factor cannot be greater than RESOLUTION * DECAY,
the result of the predicted_us computation in menu_select() cannot be
greater than data->next_timer_us, so it is not necessary to compare
the "typical interval" value coming from get_typical_interval() with
data->next_timer_us separately.

It is sufficient to copmare predicted_us with the return value of
get_typical_interval() directly, so do that and drop the now
redundant expected_interval variable.

No intentional changes of behavior.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/governors/menu.c | 7 +------
 1 file changed, 1 insertion(+), 6 deletions(-)

diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 4ed8004..3d28321 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -287,7 +287,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	int i;
 	int idx;
 	unsigned int interactivity_req;
-	unsigned int expected_interval;
 	unsigned int predicted_us;
 	unsigned long nr_iowaiters, cpu_load;
 	ktime_t delta_next;
@@ -323,14 +322,10 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	predicted_us = DIV_ROUND_CLOSEST_ULL((uint64_t)data->next_timer_us *
 					 data->correction_factor[data->bucket],
 					 RESOLUTION * DECAY);
-
-	expected_interval = get_typical_interval(data);
-	expected_interval = min(expected_interval, data->next_timer_us);
-
 	/*
 	 * Use the lowest expected idle interval to pick the idle state.
 	 */
-	predicted_us = min(predicted_us, expected_interval);
+	predicted_us = min(predicted_us, get_typical_interval(data));
 
 	if (tick_nohz_tick_stopped()) {
 		/*
-- 
cgit v1.1


From f1c8e410cdac5df42a7806e49efde2859a10fecd Mon Sep 17 00:00:00 2001
From: "Rafael J. Wysocki" <rafael.j.wysocki@intel.com>
Date: Mon, 15 Oct 2018 13:53:25 +0200
Subject: cpuidle: menu: Avoid computations when result will be discarded

If the minimum interval taken into account in the average computation
loop in get_typical_interval() is less than the expected idle
duration determined so far, the resultant average cannot be greater
than that value as well and the entire return result of the function
is going to be discarded anyway going forward.

In that case, it is a waste of time to carry out the remaining
computations in get_typical_interval(), so avoid that by returning
early if the minimum interval is not below the expected idle duration.

No intentional changes of behavior.

Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/cpuidle/governors/menu.c | 19 ++++++++++++++++---
 1 file changed, 16 insertions(+), 3 deletions(-)

diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index 3d28321..575a68f 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -196,10 +196,11 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev);
  * of points is below a threshold. If it is... then use the
  * average of these 8 points as the estimated value.
  */
-static unsigned int get_typical_interval(struct menu_device *data)
+static unsigned int get_typical_interval(struct menu_device *data,
+					 unsigned int predicted_us)
 {
 	int i, divisor;
-	unsigned int max, thresh, avg;
+	unsigned int min, max, thresh, avg;
 	uint64_t sum, variance;
 
 	thresh = UINT_MAX; /* Discard outliers above this value */
@@ -207,6 +208,7 @@ static unsigned int get_typical_interval(struct menu_device *data)
 again:
 
 	/* First calculate the average of past intervals */
+	min = UINT_MAX;
 	max = 0;
 	sum = 0;
 	divisor = 0;
@@ -217,8 +219,19 @@ again:
 			divisor++;
 			if (value > max)
 				max = value;
+
+			if (value < min)
+				min = value;
 		}
 	}
+
+	/*
+	 * If the result of the computation is going to be discarded anyway,
+	 * avoid the computation altogether.
+	 */
+	if (min >= predicted_us)
+		return UINT_MAX;
+
 	if (divisor == INTERVALS)
 		avg = sum >> INTERVAL_SHIFT;
 	else
@@ -325,7 +338,7 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev,
 	/*
 	 * Use the lowest expected idle interval to pick the idle state.
 	 */
-	predicted_us = min(predicted_us, get_typical_interval(data));
+	predicted_us = min(predicted_us, get_typical_interval(data, predicted_us));
 
 	if (tick_nohz_tick_stopped()) {
 		/*
-- 
cgit v1.1


From 2c361684803e0129007806b1ce42cbf9c52f8cd0 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 3 Oct 2018 16:38:14 +0200
Subject: PM / Domains: Don't treat zero found compatible idle states as an
 error

Instead of returning -EINVAL from of_genpd_parse_idle_states() in case none
compatible states was found, let's return 0 to indicate success. Assign
also the out-parameter *states to NULL and *n to 0, to indicate to the
caller that zero states have been found/allocated.

This enables the caller of of_genpd_parse_idle_states() to easier act on
the returned error code.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Lina Iyer <ilina@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index 4b57141..e1bbddb 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -2478,8 +2478,8 @@ static int genpd_iterate_idle_states(struct device_node *dn,
  *
  * Returns the device states parsed from the OF node. The memory for the states
  * is allocated by this function and is the responsibility of the caller to
- * free the memory after use. If no domain idle states is found it returns
- * -EINVAL and in case of errors, a negative error code.
+ * free the memory after use. If any or zero compatible domain idle states is
+ * found it returns 0 and in case of errors, a negative error code is returned.
  */
 int of_genpd_parse_idle_states(struct device_node *dn,
 			struct genpd_power_state **states, int *n)
@@ -2488,8 +2488,14 @@ int of_genpd_parse_idle_states(struct device_node *dn,
 	int ret;
 
 	ret = genpd_iterate_idle_states(dn, NULL);
-	if (ret <= 0)
-		return ret < 0 ? ret : -EINVAL;
+	if (ret < 0)
+		return ret;
+
+	if (!ret) {
+		*states = NULL;
+		*n = 0;
+		return 0;
+	}
 
 	st = kcalloc(ret, sizeof(*st), GFP_KERNEL);
 	if (!st)
-- 
cgit v1.1


From 2c9b7f8772033cc8bafbd4eefe2ca605bf3eb094 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 3 Oct 2018 16:38:15 +0200
Subject: PM / Domains: Deal with multiple states but no governor in genpd

A caller of pm_genpd_init() that provides some states for the genpd via the
->states pointer in the struct generic_pm_domain, should also provide a
governor. This because it's the job of the governor to pick a state that
satisfies the constraints.

Therefore, let's print a warning to inform the user about such bogus
configuration and avoid to bail out, by instead picking the shallowest
state before genpd invokes the ->power_off() callback.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Reviewed-by: Lina Iyer <ilina@codeaurora.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 drivers/base/power/domain.c | 6 ++++++
 1 file changed, 6 insertions(+)

diff --git a/drivers/base/power/domain.c b/drivers/base/power/domain.c
index e1bbddb..7f38a92 100644
--- a/drivers/base/power/domain.c
+++ b/drivers/base/power/domain.c
@@ -467,6 +467,10 @@ static int genpd_power_off(struct generic_pm_domain *genpd, bool one_dev_on,
 			return -EAGAIN;
 	}
 
+	/* Default to shallowest state. */
+	if (!genpd->gov)
+		genpd->state_idx = 0;
+
 	if (genpd->power_off) {
 		int ret;
 
@@ -1687,6 +1691,8 @@ int pm_genpd_init(struct generic_pm_domain *genpd,
 		ret = genpd_set_default_power_state(genpd);
 		if (ret)
 			return ret;
+	} else if (!gov) {
+		pr_warn("%s : no governor for states\n", genpd->name);
 	}
 
 	device_initialize(&genpd->dev);
-- 
cgit v1.1


From e5089c2c73a157997eb6ecca982adea9d0673075 Mon Sep 17 00:00:00 2001
From: Ulf Hansson <ulf.hansson@linaro.org>
Date: Wed, 3 Oct 2018 16:38:16 +0200
Subject: PM / Domains: Document flags for genpd

The current documented description of the GENPD_FLAG_* flags, are too
simplified, so let's extend them.

Signed-off-by: Ulf Hansson <ulf.hansson@linaro.org>
Signed-off-by: Rafael J. Wysocki <rafael.j.wysocki@intel.com>
---
 include/linux/pm_domain.h | 35 ++++++++++++++++++++++++++++++-----
 1 file changed, 30 insertions(+), 5 deletions(-)

diff --git a/include/linux/pm_domain.h b/include/linux/pm_domain.h
index 776c546..3b5d728 100644
--- a/include/linux/pm_domain.h
+++ b/include/linux/pm_domain.h
@@ -17,11 +17,36 @@
 #include <linux/notifier.h>
 #include <linux/spinlock.h>
 
-/* Defines used for the flags field in the struct generic_pm_domain */
-#define GENPD_FLAG_PM_CLK	 (1U << 0) /* PM domain uses PM clk */
-#define GENPD_FLAG_IRQ_SAFE	 (1U << 1) /* PM domain operates in atomic */
-#define GENPD_FLAG_ALWAYS_ON	 (1U << 2) /* PM domain is always powered on */
-#define GENPD_FLAG_ACTIVE_WAKEUP (1U << 3) /* Keep devices active if wakeup */
+/*
+ * Flags to control the behaviour of a genpd.
+ *
+ * These flags may be set in the struct generic_pm_domain's flags field by a
+ * genpd backend driver. The flags must be set before it calls pm_genpd_init(),
+ * which initializes a genpd.
+ *
+ * GENPD_FLAG_PM_CLK:		Instructs genpd to use the PM clk framework,
+ *				while powering on/off attached devices.
+ *
+ * GENPD_FLAG_IRQ_SAFE:		This informs genpd that its backend callbacks,
+ *				->power_on|off(), doesn't sleep. Hence, these
+ *				can be invoked from within atomic context, which
+ *				enables genpd to power on/off the PM domain,
+ *				even when pm_runtime_is_irq_safe() returns true,
+ *				for any of its attached devices. Note that, a
+ *				genpd having this flag set, requires its
+ *				masterdomains to also have it set.
+ *
+ * GENPD_FLAG_ALWAYS_ON:	Instructs genpd to always keep the PM domain
+ *				powered on.
+ *
+ * GENPD_FLAG_ACTIVE_WAKEUP:	Instructs genpd to keep the PM domain powered
+ *				on, in case any of its attached devices is used
+ *				in the wakeup path to serve system wakeups.
+ */
+#define GENPD_FLAG_PM_CLK	 (1U << 0)
+#define GENPD_FLAG_IRQ_SAFE	 (1U << 1)
+#define GENPD_FLAG_ALWAYS_ON	 (1U << 2)
+#define GENPD_FLAG_ACTIVE_WAKEUP (1U << 3)
 
 enum gpd_status {
 	GPD_STATE_ACTIVE = 0,	/* PM domain is active */
-- 
cgit v1.1