summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
authorZhang Rui <rui.zhang@intel.com>2017-05-05 20:30:09 +0800
committerZhang Rui <rui.zhang@intel.com>2017-05-05 20:30:09 +0800
commitbb4d5e38decf3f3c6bf170d19603949abb4d4b99 (patch)
tree7c2c83431a91b238511120a7a137daffb053185f
parenta6128f47f7940d8388ca7c8623fbe24e52f8fae6 (diff)
parentef1d87e06ab4d3f9a95f02517ecc50902dc233a7 (diff)
downloadop-kernel-dev-bb4d5e38decf3f3c6bf170d19603949abb4d4b99.zip
op-kernel-dev-bb4d5e38decf3f3c6bf170d19603949abb4d4b99.tar.gz
Merge branch 'backup-thermal-shutdown' into next
-rw-r--r--Documentation/thermal/sysfs-api.txt21
-rw-r--r--drivers/thermal/Kconfig17
-rw-r--r--drivers/thermal/thermal_core.c64
3 files changed, 101 insertions, 1 deletions
diff --git a/Documentation/thermal/sysfs-api.txt b/Documentation/thermal/sysfs-api.txt
index ef473dc..bb9a0a5 100644
--- a/Documentation/thermal/sysfs-api.txt
+++ b/Documentation/thermal/sysfs-api.txt
@@ -582,3 +582,24 @@ platform data is provided, this uses the step_wise throttling policy.
This function serves as an arbitrator to set the state of a cooling
device. It sets the cooling device to the deepest cooling state if
possible.
+
+6. thermal_emergency_poweroff:
+
+On an event of critical trip temperature crossing. Thermal framework
+allows the system to shutdown gracefully by calling orderly_poweroff().
+In the event of a failure of orderly_poweroff() to shut down the system
+we are in danger of keeping the system alive at undesirably high
+temperatures. To mitigate this high risk scenario we program a work
+queue to fire after a pre-determined number of seconds to start
+an emergency shutdown of the device using the kernel_power_off()
+function. In case kernel_power_off() fails then finally
+emergency_restart() is called in the worst case.
+
+The delay should be carefully profiled so as to give adequate time for
+orderly_poweroff(). In case of failure of an orderly_poweroff() the
+emergency poweroff kicks in after the delay has elapsed and shuts down
+the system.
+
+If set to 0 emergency poweroff will not be supported. So a carefully
+profiled non-zero positive value is a must for emergerncy poweroff to be
+triggered.
diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig
index 776b343..74ef51d 100644
--- a/drivers/thermal/Kconfig
+++ b/drivers/thermal/Kconfig
@@ -15,6 +15,23 @@ menuconfig THERMAL
if THERMAL
+config THERMAL_EMERGENCY_POWEROFF_DELAY_MS
+ int "Emergency poweroff delay in milli-seconds"
+ depends on THERMAL
+ default 0
+ help
+ Thermal subsystem will issue a graceful shutdown when
+ critical temperatures are reached using orderly_poweroff(). In
+ case of failure of an orderly_poweroff(), the thermal emergency
+ poweroff kicks in after a delay has elapsed and shuts down the system.
+ This config is number of milliseconds to delay before emergency
+ poweroff kicks in. Similarly to the critical trip point,
+ the delay should be carefully profiled so as to give adequate
+ time for orderly_poweroff() to finish on regular execution.
+ If set to 0 emergency poweroff will not be supported.
+
+ In doubt, leave as 0.
+
config THERMAL_HWMON
bool
prompt "Expose thermal sensors as hwmon device"
diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c
index 11f0675..b21b9cc 100644
--- a/drivers/thermal/thermal_core.c
+++ b/drivers/thermal/thermal_core.c
@@ -45,8 +45,10 @@ static LIST_HEAD(thermal_governor_list);
static DEFINE_MUTEX(thermal_list_lock);
static DEFINE_MUTEX(thermal_governor_lock);
+static DEFINE_MUTEX(poweroff_lock);
static atomic_t in_suspend;
+static bool power_off_triggered;
static struct thermal_governor *def_governor;
@@ -322,6 +324,54 @@ static void handle_non_critical_trips(struct thermal_zone_device *tz,
def_governor->throttle(tz, trip);
}
+/**
+ * thermal_emergency_poweroff_func - emergency poweroff work after a known delay
+ * @work: work_struct associated with the emergency poweroff function
+ *
+ * This function is called in very critical situations to force
+ * a kernel poweroff after a configurable timeout value.
+ */
+static void thermal_emergency_poweroff_func(struct work_struct *work)
+{
+ /*
+ * We have reached here after the emergency thermal shutdown
+ * Waiting period has expired. This means orderly_poweroff has
+ * not been able to shut off the system for some reason.
+ * Try to shut down the system immediately using kernel_power_off
+ * if populated
+ */
+ WARN(1, "Attempting kernel_power_off: Temperature too high\n");
+ kernel_power_off();
+
+ /*
+ * Worst of the worst case trigger emergency restart
+ */
+ WARN(1, "Attempting emergency_restart: Temperature too high\n");
+ emergency_restart();
+}
+
+static DECLARE_DELAYED_WORK(thermal_emergency_poweroff_work,
+ thermal_emergency_poweroff_func);
+
+/**
+ * thermal_emergency_poweroff - Trigger an emergency system poweroff
+ *
+ * This may be called from any critical situation to trigger a system shutdown
+ * after a known period of time. By default this is not scheduled.
+ */
+void thermal_emergency_poweroff(void)
+{
+ int poweroff_delay_ms = CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS;
+ /*
+ * poweroff_delay_ms must be a carefully profiled positive value.
+ * Its a must for thermal_emergency_poweroff_work to be scheduled
+ */
+ if (poweroff_delay_ms <= 0)
+ return;
+ schedule_delayed_work(&thermal_emergency_poweroff_work,
+ msecs_to_jiffies(poweroff_delay_ms));
+}
+
static void handle_critical_trips(struct thermal_zone_device *tz,
int trip, enum thermal_trip_type trip_type)
{
@@ -342,7 +392,17 @@ static void handle_critical_trips(struct thermal_zone_device *tz,
dev_emerg(&tz->device,
"critical temperature reached(%d C),shutting down\n",
tz->temperature / 1000);
- orderly_poweroff(true);
+ mutex_lock(&poweroff_lock);
+ if (!power_off_triggered) {
+ /*
+ * Queue a backup emergency shutdown in the event of
+ * orderly_poweroff failure
+ */
+ thermal_emergency_poweroff();
+ orderly_poweroff(true);
+ power_off_triggered = true;
+ }
+ mutex_unlock(&poweroff_lock);
}
}
@@ -1463,6 +1523,7 @@ static int __init thermal_init(void)
{
int result;
+ mutex_init(&poweroff_lock);
result = thermal_register_governors();
if (result)
goto error;
@@ -1497,6 +1558,7 @@ error:
ida_destroy(&thermal_cdev_ida);
mutex_destroy(&thermal_list_lock);
mutex_destroy(&thermal_governor_lock);
+ mutex_destroy(&poweroff_lock);
return result;
}
OpenPOWER on IntegriCloud