diff options
-rw-r--r-- | Documentation/thermal/sysfs-api.txt | 21 | ||||
-rw-r--r-- | drivers/thermal/Kconfig | 17 | ||||
-rw-r--r-- | drivers/thermal/thermal_core.c | 64 |
3 files changed, 101 insertions, 1 deletions
diff --git a/Documentation/thermal/sysfs-api.txt b/Documentation/thermal/sysfs-api.txt index ef473dc..bb9a0a5 100644 --- a/Documentation/thermal/sysfs-api.txt +++ b/Documentation/thermal/sysfs-api.txt @@ -582,3 +582,24 @@ platform data is provided, this uses the step_wise throttling policy. This function serves as an arbitrator to set the state of a cooling device. It sets the cooling device to the deepest cooling state if possible. + +6. thermal_emergency_poweroff: + +On an event of critical trip temperature crossing. Thermal framework +allows the system to shutdown gracefully by calling orderly_poweroff(). +In the event of a failure of orderly_poweroff() to shut down the system +we are in danger of keeping the system alive at undesirably high +temperatures. To mitigate this high risk scenario we program a work +queue to fire after a pre-determined number of seconds to start +an emergency shutdown of the device using the kernel_power_off() +function. In case kernel_power_off() fails then finally +emergency_restart() is called in the worst case. + +The delay should be carefully profiled so as to give adequate time for +orderly_poweroff(). In case of failure of an orderly_poweroff() the +emergency poweroff kicks in after the delay has elapsed and shuts down +the system. + +If set to 0 emergency poweroff will not be supported. So a carefully +profiled non-zero positive value is a must for emergerncy poweroff to be +triggered. diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index 776b343..74ef51d 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -15,6 +15,23 @@ menuconfig THERMAL if THERMAL +config THERMAL_EMERGENCY_POWEROFF_DELAY_MS + int "Emergency poweroff delay in milli-seconds" + depends on THERMAL + default 0 + help + Thermal subsystem will issue a graceful shutdown when + critical temperatures are reached using orderly_poweroff(). In + case of failure of an orderly_poweroff(), the thermal emergency + poweroff kicks in after a delay has elapsed and shuts down the system. + This config is number of milliseconds to delay before emergency + poweroff kicks in. Similarly to the critical trip point, + the delay should be carefully profiled so as to give adequate + time for orderly_poweroff() to finish on regular execution. + If set to 0 emergency poweroff will not be supported. + + In doubt, leave as 0. + config THERMAL_HWMON bool prompt "Expose thermal sensors as hwmon device" diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 11f0675..b21b9cc 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -45,8 +45,10 @@ static LIST_HEAD(thermal_governor_list); static DEFINE_MUTEX(thermal_list_lock); static DEFINE_MUTEX(thermal_governor_lock); +static DEFINE_MUTEX(poweroff_lock); static atomic_t in_suspend; +static bool power_off_triggered; static struct thermal_governor *def_governor; @@ -322,6 +324,54 @@ static void handle_non_critical_trips(struct thermal_zone_device *tz, def_governor->throttle(tz, trip); } +/** + * thermal_emergency_poweroff_func - emergency poweroff work after a known delay + * @work: work_struct associated with the emergency poweroff function + * + * This function is called in very critical situations to force + * a kernel poweroff after a configurable timeout value. + */ +static void thermal_emergency_poweroff_func(struct work_struct *work) +{ + /* + * We have reached here after the emergency thermal shutdown + * Waiting period has expired. This means orderly_poweroff has + * not been able to shut off the system for some reason. + * Try to shut down the system immediately using kernel_power_off + * if populated + */ + WARN(1, "Attempting kernel_power_off: Temperature too high\n"); + kernel_power_off(); + + /* + * Worst of the worst case trigger emergency restart + */ + WARN(1, "Attempting emergency_restart: Temperature too high\n"); + emergency_restart(); +} + +static DECLARE_DELAYED_WORK(thermal_emergency_poweroff_work, + thermal_emergency_poweroff_func); + +/** + * thermal_emergency_poweroff - Trigger an emergency system poweroff + * + * This may be called from any critical situation to trigger a system shutdown + * after a known period of time. By default this is not scheduled. + */ +void thermal_emergency_poweroff(void) +{ + int poweroff_delay_ms = CONFIG_THERMAL_EMERGENCY_POWEROFF_DELAY_MS; + /* + * poweroff_delay_ms must be a carefully profiled positive value. + * Its a must for thermal_emergency_poweroff_work to be scheduled + */ + if (poweroff_delay_ms <= 0) + return; + schedule_delayed_work(&thermal_emergency_poweroff_work, + msecs_to_jiffies(poweroff_delay_ms)); +} + static void handle_critical_trips(struct thermal_zone_device *tz, int trip, enum thermal_trip_type trip_type) { @@ -342,7 +392,17 @@ static void handle_critical_trips(struct thermal_zone_device *tz, dev_emerg(&tz->device, "critical temperature reached(%d C),shutting down\n", tz->temperature / 1000); - orderly_poweroff(true); + mutex_lock(&poweroff_lock); + if (!power_off_triggered) { + /* + * Queue a backup emergency shutdown in the event of + * orderly_poweroff failure + */ + thermal_emergency_poweroff(); + orderly_poweroff(true); + power_off_triggered = true; + } + mutex_unlock(&poweroff_lock); } } @@ -1463,6 +1523,7 @@ static int __init thermal_init(void) { int result; + mutex_init(&poweroff_lock); result = thermal_register_governors(); if (result) goto error; @@ -1497,6 +1558,7 @@ error: ida_destroy(&thermal_cdev_ida); mutex_destroy(&thermal_list_lock); mutex_destroy(&thermal_governor_lock); + mutex_destroy(&poweroff_lock); return result; } |