/* * kernel/power/main.c - PM subsystem core functionality. * * Copyright (c) 2003 Patrick Mochel * Copyright (c) 2003 Open Source Development Lab * * This file is released under the GPLv2 * */ #include <linux/module.h> #include <linux/suspend.h> #include <linux/kobject.h> #include <linux/string.h> #include <linux/delay.h> #include <linux/errno.h> #include <linux/kmod.h> #include <linux/init.h> #include <linux/console.h> #include <linux/cpu.h> #include <linux/resume-trace.h> #include <linux/freezer.h> #include <linux/vmstat.h> #include <linux/syscalls.h> #include <linux/ftrace.h> #include "power.h" DEFINE_MUTEX(pm_mutex); unsigned int pm_flags; EXPORT_SYMBOL(pm_flags); #ifdef CONFIG_PM_SLEEP /* Routines for PM-transition notifications */ static BLOCKING_NOTIFIER_HEAD(pm_chain_head); int register_pm_notifier(struct notifier_block *nb) { return blocking_notifier_chain_register(&pm_chain_head, nb); } EXPORT_SYMBOL_GPL(register_pm_notifier); int unregister_pm_notifier(struct notifier_block *nb) { return blocking_notifier_chain_unregister(&pm_chain_head, nb); } EXPORT_SYMBOL_GPL(unregister_pm_notifier); int pm_notifier_call_chain(unsigned long val) { return (blocking_notifier_call_chain(&pm_chain_head, val, NULL) == NOTIFY_BAD) ? -EINVAL : 0; } #ifdef CONFIG_PM_DEBUG int pm_test_level = TEST_NONE; static int suspend_test(int level) { if (pm_test_level == level) { printk(KERN_INFO "suspend debug: Waiting for 5 seconds.\n"); mdelay(5000); return 1; } return 0; } static const char * const pm_tests[__TEST_AFTER_LAST] = { [TEST_NONE] = "none", [TEST_CORE] = "core", [TEST_CPUS] = "processors", [TEST_PLATFORM] = "platform", [TEST_DEVICES] = "devices", [TEST_FREEZER] = "freezer", }; static ssize_t pm_test_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { char *s = buf; int level; for (level = TEST_FIRST; level <= TEST_MAX; level++) if (pm_tests[level]) { if (level == pm_test_level) s += sprintf(s, "[%s] ", pm_tests[level]); else s += sprintf(s, "%s ", pm_tests[level]); } if (s != buf) /* convert the last space to a newline */ *(s-1) = '\n'; return (s - buf); } static ssize_t pm_test_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { const char * const *s; int level; char *p; int len; int error = -EINVAL; p = memchr(buf, '\n', n); len = p ? p - buf : n; mutex_lock(&pm_mutex); level = TEST_FIRST; for (s = &pm_tests[level]; level <= TEST_MAX; s++, level++) if (*s && len == strlen(*s) && !strncmp(buf, *s, len)) { pm_test_level = level; error = 0; break; } mutex_unlock(&pm_mutex); return error ? error : n; } power_attr(pm_test); #else /* !CONFIG_PM_DEBUG */ static inline int suspend_test(int level) { return 0; } #endif /* !CONFIG_PM_DEBUG */ #endif /* CONFIG_PM_SLEEP */ #ifdef CONFIG_SUSPEND #ifdef CONFIG_PM_TEST_SUSPEND /* * We test the system suspend code by setting an RTC wakealarm a short * time in the future, then suspending. Suspending the devices won't * normally take long ... some systems only need a few milliseconds. * * The time it takes is system-specific though, so when we test this * during system bootup we allow a LOT of time. */ #define TEST_SUSPEND_SECONDS 5 static unsigned long suspend_test_start_time; static void suspend_test_start(void) { /* FIXME Use better timebase than "jiffies", ideally a clocksource. * What we want is a hardware counter that will work correctly even * during the irqs-are-off stages of the suspend/resume cycle... */ suspend_test_start_time = jiffies; } static void suspend_test_finish(const char *label) { long nj = jiffies - suspend_test_start_time; unsigned msec; msec = jiffies_to_msecs(abs(nj)); pr_info("PM: %s took %d.%03d seconds\n", label, msec / 1000, msec % 1000); /* Warning on suspend means the RTC alarm period needs to be * larger -- the system was sooo slooowwww to suspend that the * alarm (should have) fired before the system went to sleep! * * Warning on either suspend or resume also means the system * has some performance issues. The stack dump of a WARN_ON * is more likely to get the right attention than a printk... */ WARN_ON(msec > (TEST_SUSPEND_SECONDS * 1000)); } #else static void suspend_test_start(void) { } static void suspend_test_finish(const char *label) { } #endif /* This is just an arbitrary number */ #define FREE_PAGE_NUMBER (100) static struct platform_suspend_ops *suspend_ops; /** * suspend_set_ops - Set the global suspend method table. * @ops: Pointer to ops structure. */ void suspend_set_ops(struct platform_suspend_ops *ops) { mutex_lock(&pm_mutex); suspend_ops = ops; mutex_unlock(&pm_mutex); } /** * suspend_valid_only_mem - generic memory-only valid callback * * Platform drivers that implement mem suspend only and only need * to check for that in their .valid callback can use this instead * of rolling their own .valid callback. */ int suspend_valid_only_mem(suspend_state_t state) { return state == PM_SUSPEND_MEM; } /** * suspend_prepare - Do prep work before entering low-power state. * * This is common code that is called for each state that we're entering. * Run suspend notifiers, allocate a console and stop all processes. */ static int suspend_prepare(void) { int error; unsigned int free_pages; if (!suspend_ops || !suspend_ops->enter) return -EPERM; pm_prepare_console(); error = pm_notifier_call_chain(PM_SUSPEND_PREPARE); if (error) goto Finish; error = usermodehelper_disable(); if (error) goto Finish; if (suspend_freeze_processes()) { error = -EAGAIN; goto Thaw; } free_pages = global_page_state(NR_FREE_PAGES); if (free_pages < FREE_PAGE_NUMBER) { pr_debug("PM: free some memory\n"); shrink_all_memory(FREE_PAGE_NUMBER - free_pages); if (nr_free_pages() < FREE_PAGE_NUMBER) { error = -ENOMEM; printk(KERN_ERR "PM: No enough memory\n"); } } if (!error) return 0; Thaw: suspend_thaw_processes(); usermodehelper_enable(); Finish: pm_notifier_call_chain(PM_POST_SUSPEND); pm_restore_console(); return error; } /* default implementation */ void __attribute__ ((weak)) arch_suspend_disable_irqs(void) { local_irq_disable(); } /* default implementation */ void __attribute__ ((weak)) arch_suspend_enable_irqs(void) { local_irq_enable(); } /** * suspend_enter - enter the desired system sleep state. * @state: state to enter * * This function should be called after devices have been suspended. */ static int suspend_enter(suspend_state_t state) { int error = 0; device_pm_lock(); arch_suspend_disable_irqs(); BUG_ON(!irqs_disabled()); if ((error = device_power_down(PMSG_SUSPEND))) { printk(KERN_ERR "PM: Some devices failed to power down\n"); goto Done; } if (!suspend_test(TEST_CORE)) error = suspend_ops->enter(state); device_power_up(PMSG_RESUME); Done: arch_suspend_enable_irqs(); BUG_ON(irqs_disabled()); device_pm_unlock(); return error; } /** * suspend_devices_and_enter - suspend devices and enter the desired system * sleep state. * @state: state to enter */ int suspend_devices_and_enter(suspend_state_t state) { int error, ftrace_save; if (!suspend_ops) return -ENOSYS; if (suspend_ops->begin) { error = suspend_ops->begin(state); if (error) goto Close; } suspend_console(); ftrace_save = __ftrace_enabled_save(); suspend_test_start(); error = device_suspend(PMSG_SUSPEND); if (error) { printk(KERN_ERR "PM: Some devices failed to suspend\n"); goto Recover_platform; } suspend_test_finish("suspend devices"); if (suspend_test(TEST_DEVICES)) goto Recover_platform; if (suspend_ops->prepare) { error = suspend_ops->prepare(); if (error) goto Resume_devices; } if (suspend_test(TEST_PLATFORM)) goto Finish; error = disable_nonboot_cpus(); if (!error && !suspend_test(TEST_CPUS)) suspend_enter(state); enable_nonboot_cpus(); Finish: if (suspend_ops->finish) suspend_ops->finish(); Resume_devices: suspend_test_start(); device_resume(PMSG_RESUME); suspend_test_finish("resume devices"); __ftrace_enabled_restore(ftrace_save); resume_console(); Close: if (suspend_ops->end) suspend_ops->end(); return error; Recover_platform: if (suspend_ops->recover) suspend_ops->recover(); goto Resume_devices; } /** * suspend_finish - Do final work before exiting suspend sequence. * * Call platform code to clean up, restart processes, and free the * console that we've allocated. This is not called for suspend-to-disk. */ static void suspend_finish(void) { suspend_thaw_processes(); usermodehelper_enable(); pm_notifier_call_chain(PM_POST_SUSPEND); pm_restore_console(); } static const char * const pm_states[PM_SUSPEND_MAX] = { [PM_SUSPEND_STANDBY] = "standby", [PM_SUSPEND_MEM] = "mem", }; static inline int valid_state(suspend_state_t state) { /* All states need lowlevel support and need to be valid * to the lowlevel implementation, no valid callback * implies that none are valid. */ if (!suspend_ops || !suspend_ops->valid || !suspend_ops->valid(state)) return 0; return 1; } /** * enter_state - Do common work of entering low-power state. * @state: pm_state structure for state we're entering. * * Make sure we're the only ones trying to enter a sleep state. Fail * if someone has beat us to it, since we don't want anything weird to * happen when we wake up. * Then, do the setup for suspend, enter the state, and cleaup (after * we've woken up). */ static int enter_state(suspend_state_t state) { int error; if (!valid_state(state)) return -ENODEV; if (!mutex_trylock(&pm_mutex)) return -EBUSY; printk(KERN_INFO "PM: Syncing filesystems ... "); sys_sync(); printk("done.\n"); pr_debug("PM: Preparing system for %s sleep\n", pm_states[state]); error = suspend_prepare(); if (error) goto Unlock; if (suspend_test(TEST_FREEZER)) goto Finish; pr_debug("PM: Entering %s sleep\n", pm_states[state]); error = suspend_devices_and_enter(state); Finish: pr_debug("PM: Finishing wakeup.\n"); suspend_finish(); Unlock: mutex_unlock(&pm_mutex); return error; } /** * pm_suspend - Externally visible function for suspending system. * @state: Enumerated value of state to enter. * * Determine whether or not value is within range, get state * structure, and enter (above). */ int pm_suspend(suspend_state_t state) { if (state > PM_SUSPEND_ON && state <= PM_SUSPEND_MAX) return enter_state(state); return -EINVAL; } EXPORT_SYMBOL(pm_suspend); #endif /* CONFIG_SUSPEND */ struct kobject *power_kobj; /** * state - control system power state. * * show() returns what states are supported, which is hard-coded to * 'standby' (Power-On Suspend), 'mem' (Suspend-to-RAM), and * 'disk' (Suspend-to-Disk). * * store() accepts one of those strings, translates it into the * proper enumerated value, and initiates a suspend transition. */ static ssize_t state_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { char *s = buf; #ifdef CONFIG_SUSPEND int i; for (i = 0; i < PM_SUSPEND_MAX; i++) { if (pm_states[i] && valid_state(i)) s += sprintf(s,"%s ", pm_states[i]); } #endif #ifdef CONFIG_HIBERNATION s += sprintf(s, "%s\n", "disk"); #else if (s != buf) /* convert the last space to a newline */ *(s-1) = '\n'; #endif return (s - buf); } static ssize_t state_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { #ifdef CONFIG_SUSPEND suspend_state_t state = PM_SUSPEND_STANDBY; const char * const *s; #endif char *p; int len; int error = -EINVAL; p = memchr(buf, '\n', n); len = p ? p - buf : n; /* First, check if we are requested to hibernate */ if (len == 4 && !strncmp(buf, "disk", len)) { error = hibernate(); goto Exit; } #ifdef CONFIG_SUSPEND for (s = &pm_states[state]; state < PM_SUSPEND_MAX; s++, state++) { if (*s && len == strlen(*s) && !strncmp(buf, *s, len)) break; } if (state < PM_SUSPEND_MAX && *s) error = enter_state(state); #endif Exit: return error ? error : n; } power_attr(state); #ifdef CONFIG_PM_TRACE int pm_trace_enabled; static ssize_t pm_trace_show(struct kobject *kobj, struct kobj_attribute *attr, char *buf) { return sprintf(buf, "%d\n", pm_trace_enabled); } static ssize_t pm_trace_store(struct kobject *kobj, struct kobj_attribute *attr, const char *buf, size_t n) { int val; if (sscanf(buf, "%d", &val) == 1) { pm_trace_enabled = !!val; return n; } return -EINVAL; } power_attr(pm_trace); #endif /* CONFIG_PM_TRACE */ static struct attribute * g[] = { &state_attr.attr, #ifdef CONFIG_PM_TRACE &pm_trace_attr.attr, #endif #if defined(CONFIG_PM_SLEEP) && defined(CONFIG_PM_DEBUG) &pm_test_attr.attr, #endif NULL, }; static struct attribute_group attr_group = { .attrs = g, }; static int __init pm_init(void) { power_kobj = kobject_create_and_add("power", NULL); if (!power_kobj) return -ENOMEM; return sysfs_create_group(power_kobj, &attr_group); } core_initcall(pm_init); #ifdef CONFIG_PM_TEST_SUSPEND #include <linux/rtc.h> /* * To test system suspend, we need a hands-off mechanism to resume the * system. RTCs wake alarms are a common self-contained mechanism. */ static void __init test_wakealarm(struct rtc_device *rtc, suspend_state_t state) { static char err_readtime[] __initdata = KERN_ERR "PM: can't read %s time, err %d\n"; static char err_wakealarm [] __initdata = KERN_ERR "PM: can't set %s wakealarm, err %d\n"; static char err_suspend[] __initdata = KERN_ERR "PM: suspend test failed, error %d\n"; static char info_test[] __initdata = KERN_INFO "PM: test RTC wakeup from '%s' suspend\n"; unsigned long now; struct rtc_wkalrm alm; int status; /* this may fail if the RTC hasn't been initialized */ status = rtc_read_time(rtc, &alm.time); if (status < 0) { printk(err_readtime, rtc->dev.bus_id, status); return; } rtc_tm_to_time(&alm.time, &now); memset(&alm, 0, sizeof alm); rtc_time_to_tm(now + TEST_SUSPEND_SECONDS, &alm.time); alm.enabled = true; status = rtc_set_alarm(rtc, &alm); if (status < 0) { printk(err_wakealarm, rtc->dev.bus_id, status); return; } if (state == PM_SUSPEND_MEM) { printk(info_test, pm_states[state]); status = pm_suspend(state); if (status == -ENODEV) state = PM_SUSPEND_STANDBY; } if (state == PM_SUSPEND_STANDBY) { printk(info_test, pm_states[state]); status = pm_suspend(state); } if (status < 0) printk(err_suspend, status); /* Some platforms can't detect that the alarm triggered the * wakeup, or (accordingly) disable it after it afterwards. * It's supposed to give oneshot behavior; cope. */ alm.enabled = false; rtc_set_alarm(rtc, &alm); } static int __init has_wakealarm(struct device *dev, void *name_ptr) { struct rtc_device *candidate = to_rtc_device(dev); if (!candidate->ops->set_alarm) return 0; if (!device_may_wakeup(candidate->dev.parent)) return 0; *(char **)name_ptr = dev->bus_id; return 1; } /* * Kernel options like "test_suspend=mem" force suspend/resume sanity tests * at startup time. They're normally disabled, for faster boot and because * we can't know which states really work on this particular system. */ static suspend_state_t test_state __initdata = PM_SUSPEND_ON; static char warn_bad_state[] __initdata = KERN_WARNING "PM: can't test '%s' suspend state\n"; static int __init setup_test_suspend(char *value) { unsigned i; /* "=mem" ==> "mem" */ value++; for (i = 0; i < PM_SUSPEND_MAX; i++) { if (!pm_states[i]) continue; if (strcmp(pm_states[i], value) != 0) continue; test_state = (__force suspend_state_t) i; return 0; } printk(warn_bad_state, value); return 0; } __setup("test_suspend", setup_test_suspend); static int __init test_suspend(void) { static char warn_no_rtc[] __initdata = KERN_WARNING "PM: no wakealarm-capable RTC driver is ready\n"; char *pony = NULL; struct rtc_device *rtc = NULL; /* PM is initialized by now; is that state testable? */ if (test_state == PM_SUSPEND_ON) goto done; if (!valid_state(test_state)) { printk(warn_bad_state, pm_states[test_state]); goto done; } /* RTCs have initialized by now too ... can we use one? */ class_find_device(rtc_class, NULL, &pony, has_wakealarm); if (pony) rtc = rtc_class_open(pony); if (!rtc) { printk(warn_no_rtc); goto done; } /* go for it */ test_wakealarm(rtc, test_state); rtc_class_close(rtc); done: return 0; } late_initcall(test_suspend); #endif /* CONFIG_PM_TEST_SUSPEND */