diff options
Diffstat (limited to 'drivers/cpuidle')
-rw-r--r-- | drivers/cpuidle/Kconfig | 20 | ||||
-rw-r--r-- | drivers/cpuidle/Kconfig.arm | 29 | ||||
-rw-r--r-- | drivers/cpuidle/Makefile | 9 | ||||
-rw-r--r-- | drivers/cpuidle/coupled.c | 129 | ||||
-rw-r--r-- | drivers/cpuidle/cpuidle-calxeda.c | 2 | ||||
-rw-r--r-- | drivers/cpuidle/cpuidle-kirkwood.c | 5 | ||||
-rw-r--r-- | drivers/cpuidle/cpuidle-ux500.c | 131 | ||||
-rw-r--r-- | drivers/cpuidle/cpuidle.c | 94 | ||||
-rw-r--r-- | drivers/cpuidle/governors/ladder.c | 12 | ||||
-rw-r--r-- | drivers/cpuidle/governors/menu.c | 214 | ||||
-rw-r--r-- | drivers/cpuidle/sysfs.c | 101 |
11 files changed, 467 insertions, 279 deletions
diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig index 0e2cd5c..b3fb81d 100644 --- a/drivers/cpuidle/Kconfig +++ b/drivers/cpuidle/Kconfig @@ -1,5 +1,6 @@ +menu "CPU Idle" -menuconfig CPU_IDLE +config CPU_IDLE bool "CPU idle PM support" default y if ACPI || PPC_PSERIES select CPU_IDLE_GOV_LADDER if (!NO_HZ && !NO_HZ_IDLE) @@ -29,20 +30,13 @@ config CPU_IDLE_GOV_MENU bool "Menu governor (for tickless system)" default y -config CPU_IDLE_CALXEDA - bool "CPU Idle Driver for Calxeda processors" - depends on ARCH_HIGHBANK - select ARM_CPU_SUSPEND - help - Select this to enable cpuidle on Calxeda processors. - -config CPU_IDLE_ZYNQ - bool "CPU Idle Driver for Xilinx Zynq processors" - depends on ARCH_ZYNQ - help - Select this to enable cpuidle on Xilinx Zynq processors. +menu "ARM CPU Idle Drivers" +depends on ARM +source "drivers/cpuidle/Kconfig.arm" +endmenu endif config ARCH_NEEDS_CPU_IDLE_COUPLED def_bool n +endmenu diff --git a/drivers/cpuidle/Kconfig.arm b/drivers/cpuidle/Kconfig.arm new file mode 100644 index 0000000..b3302193 --- /dev/null +++ b/drivers/cpuidle/Kconfig.arm @@ -0,0 +1,29 @@ +# +# ARM CPU Idle drivers +# + +config ARM_HIGHBANK_CPUIDLE + bool "CPU Idle Driver for Calxeda processors" + depends on ARCH_HIGHBANK + select ARM_CPU_SUSPEND + help + Select this to enable cpuidle on Calxeda processors. + +config ARM_KIRKWOOD_CPUIDLE + bool "CPU Idle Driver for Marvell Kirkwood SoCs" + depends on ARCH_KIRKWOOD + help + This adds the CPU Idle driver for Marvell Kirkwood SoCs. + +config ARM_ZYNQ_CPUIDLE + bool "CPU Idle Driver for Xilinx Zynq processors" + depends on ARCH_ZYNQ + help + Select this to enable cpuidle on Xilinx Zynq processors. + +config ARM_U8500_CPUIDLE + bool "Cpu Idle Driver for the ST-E u8500 processors" + depends on ARCH_U8500 + help + Select this to enable cpuidle for ST-E u8500 processors + diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile index 8767a7b..0b9d200 100644 --- a/drivers/cpuidle/Makefile +++ b/drivers/cpuidle/Makefile @@ -5,6 +5,9 @@ obj-y += cpuidle.o driver.o governor.o sysfs.o governors/ obj-$(CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED) += coupled.o -obj-$(CONFIG_CPU_IDLE_CALXEDA) += cpuidle-calxeda.o -obj-$(CONFIG_ARCH_KIRKWOOD) += cpuidle-kirkwood.o -obj-$(CONFIG_CPU_IDLE_ZYNQ) += cpuidle-zynq.o +################################################################################## +# ARM SoC drivers +obj-$(CONFIG_ARM_HIGHBANK_CPUIDLE) += cpuidle-calxeda.o +obj-$(CONFIG_ARM_KIRKWOOD_CPUIDLE) += cpuidle-kirkwood.o +obj-$(CONFIG_ARM_ZYNQ_CPUIDLE) += cpuidle-zynq.o +obj-$(CONFIG_ARM_U8500_CPUIDLE) += cpuidle-ux500.o diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c index 2a297f8..f8a8636 100644 --- a/drivers/cpuidle/coupled.c +++ b/drivers/cpuidle/coupled.c @@ -106,6 +106,7 @@ struct cpuidle_coupled { cpumask_t coupled_cpus; int requested_state[NR_CPUS]; atomic_t ready_waiting_counts; + atomic_t abort_barrier; int online_count; int refcnt; int prevent; @@ -122,12 +123,19 @@ static DEFINE_MUTEX(cpuidle_coupled_lock); static DEFINE_PER_CPU(struct call_single_data, cpuidle_coupled_poke_cb); /* - * The cpuidle_coupled_poked_mask mask is used to avoid calling + * The cpuidle_coupled_poke_pending mask is used to avoid calling * __smp_call_function_single with the per cpu call_single_data struct already * in use. This prevents a deadlock where two cpus are waiting for each others * call_single_data struct to be available */ -static cpumask_t cpuidle_coupled_poked_mask; +static cpumask_t cpuidle_coupled_poke_pending; + +/* + * The cpuidle_coupled_poked mask is used to ensure that each cpu has been poked + * once to minimize entering the ready loop with a poke pending, which would + * require aborting and retrying. + */ +static cpumask_t cpuidle_coupled_poked; /** * cpuidle_coupled_parallel_barrier - synchronize all online coupled cpus @@ -291,10 +299,11 @@ static inline int cpuidle_coupled_get_state(struct cpuidle_device *dev, return state; } -static void cpuidle_coupled_poked(void *info) +static void cpuidle_coupled_handle_poke(void *info) { int cpu = (unsigned long)info; - cpumask_clear_cpu(cpu, &cpuidle_coupled_poked_mask); + cpumask_set_cpu(cpu, &cpuidle_coupled_poked); + cpumask_clear_cpu(cpu, &cpuidle_coupled_poke_pending); } /** @@ -313,7 +322,7 @@ static void cpuidle_coupled_poke(int cpu) { struct call_single_data *csd = &per_cpu(cpuidle_coupled_poke_cb, cpu); - if (!cpumask_test_and_set_cpu(cpu, &cpuidle_coupled_poked_mask)) + if (!cpumask_test_and_set_cpu(cpu, &cpuidle_coupled_poke_pending)) __smp_call_function_single(cpu, csd, 0); } @@ -340,30 +349,19 @@ static void cpuidle_coupled_poke_others(int this_cpu, * @coupled: the struct coupled that contains the current cpu * @next_state: the index in drv->states of the requested state for this cpu * - * Updates the requested idle state for the specified cpuidle device, - * poking all coupled cpus out of idle if necessary to let them see the new - * state. + * Updates the requested idle state for the specified cpuidle device. + * Returns the number of waiting cpus. */ -static void cpuidle_coupled_set_waiting(int cpu, +static int cpuidle_coupled_set_waiting(int cpu, struct cpuidle_coupled *coupled, int next_state) { - int w; - coupled->requested_state[cpu] = next_state; /* - * If this is the last cpu to enter the waiting state, poke - * all the other cpus out of their waiting state so they can - * enter a deeper state. This can race with one of the cpus - * exiting the waiting state due to an interrupt and - * decrementing waiting_count, see comment below. - * * The atomic_inc_return provides a write barrier to order the write * to requested_state with the later write that increments ready_count. */ - w = atomic_inc_return(&coupled->ready_waiting_counts) & WAITING_MASK; - if (w == coupled->online_count) - cpuidle_coupled_poke_others(cpu, coupled); + return atomic_inc_return(&coupled->ready_waiting_counts) & WAITING_MASK; } /** @@ -410,19 +408,33 @@ static void cpuidle_coupled_set_done(int cpu, struct cpuidle_coupled *coupled) * been processed and the poke bit has been cleared. * * Other interrupts may also be processed while interrupts are enabled, so - * need_resched() must be tested after turning interrupts off again to make sure + * need_resched() must be tested after this function returns to make sure * the interrupt didn't schedule work that should take the cpu out of idle. * - * Returns 0 if need_resched was false, -EINTR if need_resched was true. + * Returns 0 if no poke was pending, 1 if a poke was cleared. */ static int cpuidle_coupled_clear_pokes(int cpu) { + if (!cpumask_test_cpu(cpu, &cpuidle_coupled_poke_pending)) + return 0; + local_irq_enable(); - while (cpumask_test_cpu(cpu, &cpuidle_coupled_poked_mask)) + while (cpumask_test_cpu(cpu, &cpuidle_coupled_poke_pending)) cpu_relax(); local_irq_disable(); - return need_resched() ? -EINTR : 0; + return 1; +} + +static bool cpuidle_coupled_any_pokes_pending(struct cpuidle_coupled *coupled) +{ + cpumask_t cpus; + int ret; + + cpumask_and(&cpus, cpu_online_mask, &coupled->coupled_cpus); + ret = cpumask_and(&cpus, &cpuidle_coupled_poke_pending, &cpus); + + return ret; } /** @@ -449,31 +461,56 @@ int cpuidle_enter_state_coupled(struct cpuidle_device *dev, { int entered_state = -1; struct cpuidle_coupled *coupled = dev->coupled; + int w; if (!coupled) return -EINVAL; while (coupled->prevent) { - if (cpuidle_coupled_clear_pokes(dev->cpu)) { + cpuidle_coupled_clear_pokes(dev->cpu); + if (need_resched()) { local_irq_enable(); return entered_state; } entered_state = cpuidle_enter_state(dev, drv, dev->safe_state_index); + local_irq_disable(); } /* Read barrier ensures online_count is read after prevent is cleared */ smp_rmb(); - cpuidle_coupled_set_waiting(dev->cpu, coupled, next_state); +reset: + cpumask_clear_cpu(dev->cpu, &cpuidle_coupled_poked); + + w = cpuidle_coupled_set_waiting(dev->cpu, coupled, next_state); + /* + * If this is the last cpu to enter the waiting state, poke + * all the other cpus out of their waiting state so they can + * enter a deeper state. This can race with one of the cpus + * exiting the waiting state due to an interrupt and + * decrementing waiting_count, see comment below. + */ + if (w == coupled->online_count) { + cpumask_set_cpu(dev->cpu, &cpuidle_coupled_poked); + cpuidle_coupled_poke_others(dev->cpu, coupled); + } retry: /* * Wait for all coupled cpus to be idle, using the deepest state - * allowed for a single cpu. + * allowed for a single cpu. If this was not the poking cpu, wait + * for at least one poke before leaving to avoid a race where + * two cpus could arrive at the waiting loop at the same time, + * but the first of the two to arrive could skip the loop without + * processing the pokes from the last to arrive. */ - while (!cpuidle_coupled_cpus_waiting(coupled)) { - if (cpuidle_coupled_clear_pokes(dev->cpu)) { + while (!cpuidle_coupled_cpus_waiting(coupled) || + !cpumask_test_cpu(dev->cpu, &cpuidle_coupled_poked)) { + if (cpuidle_coupled_clear_pokes(dev->cpu)) + continue; + + if (need_resched()) { cpuidle_coupled_set_not_waiting(dev->cpu, coupled); goto out; } @@ -485,14 +522,22 @@ retry: entered_state = cpuidle_enter_state(dev, drv, dev->safe_state_index); + local_irq_disable(); } - if (cpuidle_coupled_clear_pokes(dev->cpu)) { + cpuidle_coupled_clear_pokes(dev->cpu); + if (need_resched()) { cpuidle_coupled_set_not_waiting(dev->cpu, coupled); goto out; } /* + * Make sure final poke status for this cpu is visible before setting + * cpu as ready. + */ + smp_wmb(); + + /* * All coupled cpus are probably idle. There is a small chance that * one of the other cpus just became active. Increment the ready count, * and spin until all coupled cpus have incremented the counter. Once a @@ -511,6 +556,28 @@ retry: cpu_relax(); } + /* + * Make sure read of all cpus ready is done before reading pending pokes + */ + smp_rmb(); + + /* + * There is a small chance that a cpu left and reentered idle after this + * cpu saw that all cpus were waiting. The cpu that reentered idle will + * have sent this cpu a poke, which will still be pending after the + * ready loop. The pending interrupt may be lost by the interrupt + * controller when entering the deep idle state. It's not possible to + * clear a pending interrupt without turning interrupts on and handling + * it, and it's too late to turn on interrupts here, so reset the + * coupled idle state of all cpus and retry. + */ + if (cpuidle_coupled_any_pokes_pending(coupled)) { + cpuidle_coupled_set_done(dev->cpu, coupled); + /* Wait for all cpus to see the pending pokes */ + cpuidle_coupled_parallel_barrier(dev, &coupled->abort_barrier); + goto reset; + } + /* all cpus have acked the coupled state */ next_state = cpuidle_coupled_get_state(dev, coupled); @@ -596,7 +663,7 @@ have_coupled: coupled->refcnt++; csd = &per_cpu(cpuidle_coupled_poke_cb, dev->cpu); - csd->func = cpuidle_coupled_poked; + csd->func = cpuidle_coupled_handle_poke; csd->info = (void *)(unsigned long)dev->cpu; return 0; diff --git a/drivers/cpuidle/cpuidle-calxeda.c b/drivers/cpuidle/cpuidle-calxeda.c index 0e6e408..3460584 100644 --- a/drivers/cpuidle/cpuidle-calxeda.c +++ b/drivers/cpuidle/cpuidle-calxeda.c @@ -35,7 +35,7 @@ #include <asm/cp15.h> extern void highbank_set_cpu_jump(int cpu, void *jump_addr); -extern void *scu_base_addr; +extern void __iomem *scu_base_addr; static noinline void calxeda_idle_restore(void) { diff --git a/drivers/cpuidle/cpuidle-kirkwood.c b/drivers/cpuidle/cpuidle-kirkwood.c index 521b0a7..41ba843 100644 --- a/drivers/cpuidle/cpuidle-kirkwood.c +++ b/drivers/cpuidle/cpuidle-kirkwood.c @@ -60,9 +60,6 @@ static int kirkwood_cpuidle_probe(struct platform_device *pdev) struct resource *res; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (res == NULL) - return -EINVAL; - ddr_operation_base = devm_ioremap_resource(&pdev->dev, res); if (IS_ERR(ddr_operation_base)) return PTR_ERR(ddr_operation_base); @@ -70,7 +67,7 @@ static int kirkwood_cpuidle_probe(struct platform_device *pdev) return cpuidle_register(&kirkwood_idle_driver, NULL); } -int kirkwood_cpuidle_remove(struct platform_device *pdev) +static int kirkwood_cpuidle_remove(struct platform_device *pdev) { cpuidle_unregister(&kirkwood_idle_driver); return 0; diff --git a/drivers/cpuidle/cpuidle-ux500.c b/drivers/cpuidle/cpuidle-ux500.c new file mode 100644 index 0000000..e056465 --- /dev/null +++ b/drivers/cpuidle/cpuidle-ux500.c @@ -0,0 +1,131 @@ +/* + * Copyright (c) 2012 Linaro : Daniel Lezcano <daniel.lezcano@linaro.org> (IBM) + * + * Based on the work of Rickard Andersson <rickard.andersson@stericsson.com> + * and Jonas Aaberg <jonas.aberg@stericsson.com>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/module.h> +#include <linux/cpuidle.h> +#include <linux/spinlock.h> +#include <linux/atomic.h> +#include <linux/smp.h> +#include <linux/mfd/dbx500-prcmu.h> +#include <linux/platform_data/arm-ux500-pm.h> +#include <linux/platform_device.h> + +#include <asm/cpuidle.h> +#include <asm/proc-fns.h> + +static atomic_t master = ATOMIC_INIT(0); +static DEFINE_SPINLOCK(master_lock); + +static inline int ux500_enter_idle(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) +{ + int this_cpu = smp_processor_id(); + bool recouple = false; + + if (atomic_inc_return(&master) == num_online_cpus()) { + + /* With this lock, we prevent the other cpu to exit and enter + * this function again and become the master */ + if (!spin_trylock(&master_lock)) + goto wfi; + + /* decouple the gic from the A9 cores */ + if (prcmu_gic_decouple()) { + spin_unlock(&master_lock); + goto out; + } + + /* If an error occur, we will have to recouple the gic + * manually */ + recouple = true; + + /* At this state, as the gic is decoupled, if the other + * cpu is in WFI, we have the guarantee it won't be wake + * up, so we can safely go to retention */ + if (!prcmu_is_cpu_in_wfi(this_cpu ? 0 : 1)) + goto out; + + /* The prcmu will be in charge of watching the interrupts + * and wake up the cpus */ + if (prcmu_copy_gic_settings()) + goto out; + + /* Check in the meantime an interrupt did + * not occur on the gic ... */ + if (prcmu_gic_pending_irq()) + goto out; + + /* ... and the prcmu */ + if (prcmu_pending_irq()) + goto out; + + /* Go to the retention state, the prcmu will wait for the + * cpu to go WFI and this is what happens after exiting this + * 'master' critical section */ + if (prcmu_set_power_state(PRCMU_AP_IDLE, true, true)) + goto out; + + /* When we switch to retention, the prcmu is in charge + * of recoupling the gic automatically */ + recouple = false; + + spin_unlock(&master_lock); + } +wfi: + cpu_do_idle(); +out: + atomic_dec(&master); + + if (recouple) { + prcmu_gic_recouple(); + spin_unlock(&master_lock); + } + + return index; +} + +static struct cpuidle_driver ux500_idle_driver = { + .name = "ux500_idle", + .owner = THIS_MODULE, + .states = { + ARM_CPUIDLE_WFI_STATE, + { + .enter = ux500_enter_idle, + .exit_latency = 70, + .target_residency = 260, + .flags = CPUIDLE_FLAG_TIME_VALID | + CPUIDLE_FLAG_TIMER_STOP, + .name = "ApIdle", + .desc = "ARM Retention", + }, + }, + .safe_state_index = 0, + .state_count = 2, +}; + +static int __init dbx500_cpuidle_probe(struct platform_device *pdev) +{ + /* Configure wake up reasons */ + prcmu_enable_wakeups(PRCMU_WAKEUP(ARM) | PRCMU_WAKEUP(RTC) | + PRCMU_WAKEUP(ABB)); + + return cpuidle_register(&ux500_idle_driver, NULL); +} + +static struct platform_driver dbx500_cpuidle_plat_driver = { + .driver = { + .name = "cpuidle-dbx500", + .owner = THIS_MODULE, + }, + .probe = dbx500_cpuidle_probe, +}; + +module_platform_driver(dbx500_cpuidle_plat_driver); diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index fdc432f..d75040d 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -42,8 +42,6 @@ void disable_cpuidle(void) off = 1; } -static int __cpuidle_register_device(struct cpuidle_device *dev); - /** * cpuidle_play_dead - cpu off-lining * @@ -278,7 +276,7 @@ static void poll_idle_init(struct cpuidle_driver *drv) {} */ int cpuidle_enable_device(struct cpuidle_device *dev) { - int ret, i; + int ret; struct cpuidle_driver *drv; if (!dev) @@ -292,15 +290,12 @@ int cpuidle_enable_device(struct cpuidle_device *dev) if (!drv || !cpuidle_curr_governor) return -EIO; + if (!dev->registered) + return -EINVAL; + if (!dev->state_count) dev->state_count = drv->state_count; - if (dev->registered == 0) { - ret = __cpuidle_register_device(dev); - if (ret) - return ret; - } - poll_idle_init(drv); ret = cpuidle_add_device_sysfs(dev); @@ -311,12 +306,6 @@ int cpuidle_enable_device(struct cpuidle_device *dev) (ret = cpuidle_curr_governor->enable(drv, dev))) goto fail_sysfs; - for (i = 0; i < dev->state_count; i++) { - dev->states_usage[i].usage = 0; - dev->states_usage[i].time = 0; - } - dev->last_residency = 0; - smp_wmb(); dev->enabled = 1; @@ -360,6 +349,23 @@ void cpuidle_disable_device(struct cpuidle_device *dev) EXPORT_SYMBOL_GPL(cpuidle_disable_device); +static void __cpuidle_unregister_device(struct cpuidle_device *dev) +{ + struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); + + list_del(&dev->device_list); + per_cpu(cpuidle_devices, dev->cpu) = NULL; + module_put(drv->owner); +} + +static int __cpuidle_device_init(struct cpuidle_device *dev) +{ + memset(dev->states_usage, 0, sizeof(dev->states_usage)); + dev->last_residency = 0; + + return 0; +} + /** * __cpuidle_register_device - internal register function called before register * and enable routines @@ -377,24 +383,15 @@ static int __cpuidle_register_device(struct cpuidle_device *dev) per_cpu(cpuidle_devices, dev->cpu) = dev; list_add(&dev->device_list, &cpuidle_detected_devices); - ret = cpuidle_add_sysfs(dev); - if (ret) - goto err_sysfs; ret = cpuidle_coupled_register_device(dev); - if (ret) - goto err_coupled; + if (ret) { + __cpuidle_unregister_device(dev); + return ret; + } dev->registered = 1; return 0; - -err_coupled: - cpuidle_remove_sysfs(dev); -err_sysfs: - list_del(&dev->device_list); - per_cpu(cpuidle_devices, dev->cpu) = NULL; - module_put(drv->owner); - return ret; } /** @@ -403,25 +400,44 @@ err_sysfs: */ int cpuidle_register_device(struct cpuidle_device *dev) { - int ret; + int ret = -EBUSY; if (!dev) return -EINVAL; mutex_lock(&cpuidle_lock); - if ((ret = __cpuidle_register_device(dev))) { - mutex_unlock(&cpuidle_lock); - return ret; - } + if (dev->registered) + goto out_unlock; + + ret = __cpuidle_device_init(dev); + if (ret) + goto out_unlock; + + ret = __cpuidle_register_device(dev); + if (ret) + goto out_unlock; + + ret = cpuidle_add_sysfs(dev); + if (ret) + goto out_unregister; + + ret = cpuidle_enable_device(dev); + if (ret) + goto out_sysfs; - cpuidle_enable_device(dev); cpuidle_install_idle_handler(); +out_unlock: mutex_unlock(&cpuidle_lock); - return 0; + return ret; +out_sysfs: + cpuidle_remove_sysfs(dev); +out_unregister: + __cpuidle_unregister_device(dev); + goto out_unlock; } EXPORT_SYMBOL_GPL(cpuidle_register_device); @@ -432,8 +448,6 @@ EXPORT_SYMBOL_GPL(cpuidle_register_device); */ void cpuidle_unregister_device(struct cpuidle_device *dev) { - struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); - if (dev->registered == 0) return; @@ -442,14 +456,12 @@ void cpuidle_unregister_device(struct cpuidle_device *dev) cpuidle_disable_device(dev); cpuidle_remove_sysfs(dev); - list_del(&dev->device_list); - per_cpu(cpuidle_devices, dev->cpu) = NULL; + + __cpuidle_unregister_device(dev); cpuidle_coupled_unregister_device(dev); cpuidle_resume_and_unlock(); - - module_put(drv->owner); } EXPORT_SYMBOL_GPL(cpuidle_unregister_device); diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c index 9b78405..9f08e8c 100644 --- a/drivers/cpuidle/governors/ladder.c +++ b/drivers/cpuidle/governors/ladder.c @@ -192,14 +192,4 @@ static int __init init_ladder(void) return cpuidle_register_governor(&ladder_governor); } -/** - * exit_ladder - exits the governor - */ -static void __exit exit_ladder(void) -{ - cpuidle_unregister_governor(&ladder_governor); -} - -MODULE_LICENSE("GPL"); -module_init(init_ladder); -module_exit(exit_ladder); +postcore_initcall(init_ladder); diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c index fe343a0..cf7f2f0 100644 --- a/drivers/cpuidle/governors/menu.c +++ b/drivers/cpuidle/governors/menu.c @@ -21,6 +21,15 @@ #include <linux/math64.h> #include <linux/module.h> +/* + * Please note when changing the tuning values: + * If (MAX_INTERESTING-1) * RESOLUTION > UINT_MAX, the result of + * a scaling operation multiplication may overflow on 32 bit platforms. + * In that case, #define RESOLUTION as ULL to get 64 bit result: + * #define RESOLUTION 1024ULL + * + * The default values do not overflow. + */ #define BUCKETS 12 #define INTERVALS 8 #define RESOLUTION 1024 @@ -28,13 +37,6 @@ #define MAX_INTERESTING 50000 #define STDDEV_THRESH 400 -/* 60 * 60 > STDDEV_THRESH * INTERVALS = 400 * 8 */ -#define MAX_DEVIATION 60 - -static DEFINE_PER_CPU(struct hrtimer, menu_hrtimer); -static DEFINE_PER_CPU(int, hrtimer_status); -/* menu hrtimer mode */ -enum {MENU_HRTIMER_STOP, MENU_HRTIMER_REPEAT, MENU_HRTIMER_GENERAL}; /* * Concepts and ideas behind the menu governor @@ -116,23 +118,16 @@ enum {MENU_HRTIMER_STOP, MENU_HRTIMER_REPEAT, MENU_HRTIMER_GENERAL}; * */ -/* - * The C-state residency is so long that is is worthwhile to exit - * from the shallow C-state and re-enter into a deeper C-state. - */ -static unsigned int perfect_cstate_ms __read_mostly = 30; -module_param(perfect_cstate_ms, uint, 0000); - struct menu_device { int last_state_idx; int needs_update; unsigned int expected_us; - u64 predicted_us; + unsigned int predicted_us; unsigned int exit_us; unsigned int bucket; - u64 correction_factor[BUCKETS]; - u32 intervals[INTERVALS]; + unsigned int correction_factor[BUCKETS]; + unsigned int intervals[INTERVALS]; int interval_ptr; }; @@ -205,59 +200,28 @@ static u64 div_round64(u64 dividend, u32 divisor) return div_u64(dividend + (divisor / 2), divisor); } -/* Cancel the hrtimer if it is not triggered yet */ -void menu_hrtimer_cancel(void) -{ - int cpu = smp_processor_id(); - struct hrtimer *hrtmr = &per_cpu(menu_hrtimer, cpu); - - /* The timer is still not time out*/ - if (per_cpu(hrtimer_status, cpu)) { - hrtimer_cancel(hrtmr); - per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_STOP; - } -} -EXPORT_SYMBOL_GPL(menu_hrtimer_cancel); - -/* Call back for hrtimer is triggered */ -static enum hrtimer_restart menu_hrtimer_notify(struct hrtimer *hrtimer) -{ - int cpu = smp_processor_id(); - struct menu_device *data = &per_cpu(menu_devices, cpu); - - /* In general case, the expected residency is much larger than - * deepest C-state target residency, but prediction logic still - * predicts a small predicted residency, so the prediction - * history is totally broken if the timer is triggered. - * So reset the correction factor. - */ - if (per_cpu(hrtimer_status, cpu) == MENU_HRTIMER_GENERAL) - data->correction_factor[data->bucket] = RESOLUTION * DECAY; - - per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_STOP; - - return HRTIMER_NORESTART; -} - /* * Try detecting repeating patterns by keeping track of the last 8 * intervals, and checking if the standard deviation of that set * of points is below a threshold. If it is... then use the * average of these 8 points as the estimated value. */ -static u32 get_typical_interval(struct menu_device *data) +static void get_typical_interval(struct menu_device *data) { - int i = 0, divisor = 0; - uint64_t max = 0, avg = 0, stddev = 0; - int64_t thresh = LLONG_MAX; /* Discard outliers above this value. */ - unsigned int ret = 0; + int i, divisor; + unsigned int max, thresh; + uint64_t avg, stddev; + + thresh = UINT_MAX; /* Discard outliers above this value */ again: - /* first calculate average and standard deviation of the past */ - max = avg = divisor = stddev = 0; + /* First calculate the average of past intervals */ + max = 0; + avg = 0; + divisor = 0; for (i = 0; i < INTERVALS; i++) { - int64_t value = data->intervals[i]; + unsigned int value = data->intervals[i]; if (value <= thresh) { avg += value; divisor++; @@ -267,15 +231,38 @@ again: } do_div(avg, divisor); + /* Then try to determine standard deviation */ + stddev = 0; for (i = 0; i < INTERVALS; i++) { - int64_t value = data->intervals[i]; + unsigned int value = data->intervals[i]; if (value <= thresh) { int64_t diff = value - avg; stddev += diff * diff; } } do_div(stddev, divisor); - stddev = int_sqrt(stddev); + /* + * The typical interval is obtained when standard deviation is small + * or standard deviation is small compared to the average interval. + * + * int_sqrt() formal parameter type is unsigned long. When the + * greatest difference to an outlier exceeds ~65 ms * sqrt(divisor) + * the resulting squared standard deviation exceeds the input domain + * of int_sqrt on platforms where unsigned long is 32 bits in size. + * In such case reject the candidate average. + * + * Use this result only if there is no timer to wake us up sooner. + */ + if (likely(stddev <= ULONG_MAX)) { + stddev = int_sqrt(stddev); + if (((avg > stddev * 6) && (divisor * 4 >= INTERVALS * 3)) + || stddev <= 20) { + if (data->expected_us > avg) + data->predicted_us = avg; + return; + } + } + /* * If we have outliers to the upside in our distribution, discard * those by setting the threshold to exclude these outliers, then @@ -284,23 +271,12 @@ again: * * This can deal with workloads that have long pauses interspersed * with sporadic activity with a bunch of short pauses. - * - * The typical interval is obtained when standard deviation is small - * or standard deviation is small compared to the average interval. */ - if (((avg > stddev * 6) && (divisor * 4 >= INTERVALS * 3)) - || stddev <= 20) { - data->predicted_us = avg; - ret = 1; - return ret; - - } else if ((divisor * 4) > INTERVALS * 3) { - /* Exclude the max interval */ - thresh = max - 1; - goto again; - } + if ((divisor * 4) <= INTERVALS * 3) + return; - return ret; + thresh = max - 1; + goto again; } /** @@ -315,9 +291,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) int i; int multiplier; struct timespec t; - int repeat = 0, low_predicted = 0; - int cpu = smp_processor_id(); - struct hrtimer *hrtmr = &per_cpu(menu_hrtimer, cpu); if (data->needs_update) { menu_update(drv, dev); @@ -348,11 +321,16 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) if (data->correction_factor[data->bucket] == 0) data->correction_factor[data->bucket] = RESOLUTION * DECAY; - /* Make sure to round up for half microseconds */ - data->predicted_us = div_round64(data->expected_us * data->correction_factor[data->bucket], + /* + * Force the result of multiplication to be 64 bits even if both + * operands are 32 bits. + * Make sure to round up for half microseconds. + */ + data->predicted_us = div_round64((uint64_t)data->expected_us * + data->correction_factor[data->bucket], RESOLUTION * DECAY); - repeat = get_typical_interval(data); + get_typical_interval(data); /* * We want to default to C1 (hlt), not to busy polling @@ -373,10 +351,8 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) if (s->disabled || su->disable) continue; - if (s->target_residency > data->predicted_us) { - low_predicted = 1; + if (s->target_residency > data->predicted_us) continue; - } if (s->exit_latency > latency_req) continue; if (s->exit_latency * multiplier > data->predicted_us) @@ -386,44 +362,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev) data->exit_us = s->exit_latency; } - /* not deepest C-state chosen for low predicted residency */ - if (low_predicted) { - unsigned int timer_us = 0; - unsigned int perfect_us = 0; - - /* - * Set a timer to detect whether this sleep is much - * longer than repeat mode predicted. If the timer - * triggers, the code will evaluate whether to put - * the CPU into a deeper C-state. - * The timer is cancelled on CPU wakeup. - */ - timer_us = 2 * (data->predicted_us + MAX_DEVIATION); - - perfect_us = perfect_cstate_ms * 1000; - - if (repeat && (4 * timer_us < data->expected_us)) { - RCU_NONIDLE(hrtimer_start(hrtmr, - ns_to_ktime(1000 * timer_us), - HRTIMER_MODE_REL_PINNED)); - /* In repeat case, menu hrtimer is started */ - per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_REPEAT; - } else if (perfect_us < data->expected_us) { - /* - * The next timer is long. This could be because - * we did not make a useful prediction. - * In that case, it makes sense to re-enter - * into a deeper C-state after some time. - */ - RCU_NONIDLE(hrtimer_start(hrtmr, - ns_to_ktime(1000 * timer_us), - HRTIMER_MODE_REL_PINNED)); - /* In general case, menu hrtimer is started */ - per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_GENERAL; - } - - } - return data->last_state_idx; } @@ -455,7 +393,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) unsigned int last_idle_us = cpuidle_get_last_residency(dev); struct cpuidle_state *target = &drv->states[last_idx]; unsigned int measured_us; - u64 new_factor; + unsigned int new_factor; /* * Ugh, this idle state doesn't support residency measurements, so we @@ -476,10 +414,9 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) measured_us -= data->exit_us; - /* update our correction ratio */ - - new_factor = data->correction_factor[data->bucket] - * (DECAY - 1) / DECAY; + /* Update our correction ratio */ + new_factor = data->correction_factor[data->bucket]; + new_factor -= new_factor / DECAY; if (data->expected_us > 0 && measured_us < MAX_INTERESTING) new_factor += RESOLUTION * measured_us / data->expected_us; @@ -492,9 +429,11 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev) /* * We don't want 0 as factor; we always want at least - * a tiny bit of estimated time. + * a tiny bit of estimated time. Fortunately, due to rounding, + * new_factor will stay nonzero regardless of measured_us values + * and the compiler can eliminate this test as long as DECAY > 1. */ - if (new_factor == 0) + if (DECAY == 1 && unlikely(new_factor == 0)) new_factor = 1; data->correction_factor[data->bucket] = new_factor; @@ -514,9 +453,6 @@ static int menu_enable_device(struct cpuidle_driver *drv, struct cpuidle_device *dev) { struct menu_device *data = &per_cpu(menu_devices, dev->cpu); - struct hrtimer *t = &per_cpu(menu_hrtimer, dev->cpu); - hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL); - t->function = menu_hrtimer_notify; memset(data, 0, sizeof(struct menu_device)); @@ -540,14 +476,4 @@ static int __init init_menu(void) return cpuidle_register_governor(&menu_governor); } -/** - * exit_menu - exits the governor - */ -static void __exit exit_menu(void) -{ - cpuidle_unregister_governor(&menu_governor); -} - -MODULE_LICENSE("GPL"); -module_init(init_menu); -module_exit(exit_menu); +postcore_initcall(init_menu); diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c index 428754a..8739cc0 100644 --- a/drivers/cpuidle/sysfs.c +++ b/drivers/cpuidle/sysfs.c @@ -11,8 +11,10 @@ #include <linux/sysfs.h> #include <linux/slab.h> #include <linux/cpu.h> +#include <linux/completion.h> #include <linux/capability.h> #include <linux/device.h> +#include <linux/kobject.h> #include "cpuidle.h" @@ -33,7 +35,8 @@ static ssize_t show_available_governors(struct device *dev, mutex_lock(&cpuidle_lock); list_for_each_entry(tmp, &cpuidle_governors, governor_list) { - if (i >= (ssize_t) ((PAGE_SIZE/sizeof(char)) - CPUIDLE_NAME_LEN - 2)) + if (i >= (ssize_t) ((PAGE_SIZE/sizeof(char)) - + CPUIDLE_NAME_LEN - 2)) goto out; i += scnprintf(&buf[i], CPUIDLE_NAME_LEN, "%s ", tmp->name); } @@ -166,13 +169,28 @@ struct cpuidle_attr { #define define_one_rw(_name, show, store) \ static struct cpuidle_attr attr_##_name = __ATTR(_name, 0644, show, store) -#define kobj_to_cpuidledev(k) container_of(k, struct cpuidle_device, kobj) #define attr_to_cpuidleattr(a) container_of(a, struct cpuidle_attr, attr) -static ssize_t cpuidle_show(struct kobject * kobj, struct attribute * attr ,char * buf) + +struct cpuidle_device_kobj { + struct cpuidle_device *dev; + struct completion kobj_unregister; + struct kobject kobj; +}; + +static inline struct cpuidle_device *to_cpuidle_device(struct kobject *kobj) +{ + struct cpuidle_device_kobj *kdev = + container_of(kobj, struct cpuidle_device_kobj, kobj); + + return kdev->dev; +} + +static ssize_t cpuidle_show(struct kobject *kobj, struct attribute *attr, + char *buf) { int ret = -EIO; - struct cpuidle_device *dev = kobj_to_cpuidledev(kobj); - struct cpuidle_attr * cattr = attr_to_cpuidleattr(attr); + struct cpuidle_device *dev = to_cpuidle_device(kobj); + struct cpuidle_attr *cattr = attr_to_cpuidleattr(attr); if (cattr->show) { mutex_lock(&cpuidle_lock); @@ -182,12 +200,12 @@ static ssize_t cpuidle_show(struct kobject * kobj, struct attribute * attr ,char return ret; } -static ssize_t cpuidle_store(struct kobject * kobj, struct attribute * attr, - const char * buf, size_t count) +static ssize_t cpuidle_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) { int ret = -EIO; - struct cpuidle_device *dev = kobj_to_cpuidledev(kobj); - struct cpuidle_attr * cattr = attr_to_cpuidleattr(attr); + struct cpuidle_device *dev = to_cpuidle_device(kobj); + struct cpuidle_attr *cattr = attr_to_cpuidleattr(attr); if (cattr->store) { mutex_lock(&cpuidle_lock); @@ -204,9 +222,10 @@ static const struct sysfs_ops cpuidle_sysfs_ops = { static void cpuidle_sysfs_release(struct kobject *kobj) { - struct cpuidle_device *dev = kobj_to_cpuidledev(kobj); + struct cpuidle_device_kobj *kdev = + container_of(kobj, struct cpuidle_device_kobj, kobj); - complete(&dev->kobj_unregister); + complete(&kdev->kobj_unregister); } static struct kobj_type ktype_cpuidle = { @@ -237,8 +256,8 @@ static ssize_t show_state_##_name(struct cpuidle_state *state, \ #define define_store_state_ull_function(_name) \ static ssize_t store_state_##_name(struct cpuidle_state *state, \ - struct cpuidle_state_usage *state_usage, \ - const char *buf, size_t size) \ + struct cpuidle_state_usage *state_usage, \ + const char *buf, size_t size) \ { \ unsigned long long value; \ int err; \ @@ -256,14 +275,16 @@ static ssize_t store_state_##_name(struct cpuidle_state *state, \ #define define_show_state_ull_function(_name) \ static ssize_t show_state_##_name(struct cpuidle_state *state, \ - struct cpuidle_state_usage *state_usage, char *buf) \ + struct cpuidle_state_usage *state_usage, \ + char *buf) \ { \ return sprintf(buf, "%llu\n", state_usage->_name);\ } #define define_show_state_str_function(_name) \ static ssize_t show_state_##_name(struct cpuidle_state *state, \ - struct cpuidle_state_usage *state_usage, char *buf) \ + struct cpuidle_state_usage *state_usage, \ + char *buf) \ { \ if (state->_name[0] == '\0')\ return sprintf(buf, "<null>\n");\ @@ -309,8 +330,9 @@ struct cpuidle_state_kobj { #define kobj_to_state(k) (kobj_to_state_obj(k)->state) #define kobj_to_state_usage(k) (kobj_to_state_obj(k)->state_usage) #define attr_to_stateattr(a) container_of(a, struct cpuidle_state_attr, attr) -static ssize_t cpuidle_state_show(struct kobject * kobj, - struct attribute * attr ,char * buf) + +static ssize_t cpuidle_state_show(struct kobject *kobj, struct attribute *attr, + char * buf) { int ret = -EIO; struct cpuidle_state *state = kobj_to_state(kobj); @@ -323,8 +345,8 @@ static ssize_t cpuidle_state_show(struct kobject * kobj, return ret; } -static ssize_t cpuidle_state_store(struct kobject *kobj, - struct attribute *attr, const char *buf, size_t size) +static ssize_t cpuidle_state_store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t size) { int ret = -EIO; struct cpuidle_state *state = kobj_to_state(kobj); @@ -371,6 +393,7 @@ static int cpuidle_add_state_sysfs(struct cpuidle_device *device) { int i, ret = -ENOMEM; struct cpuidle_state_kobj *kobj; + struct cpuidle_device_kobj *kdev = device->kobj_dev; struct cpuidle_driver *drv = cpuidle_get_cpu_driver(device); /* state statistics */ @@ -383,7 +406,7 @@ static int cpuidle_add_state_sysfs(struct cpuidle_device *device) init_completion(&kobj->kobj_unregister); ret = kobject_init_and_add(&kobj->kobj, &ktype_state_cpuidle, - &device->kobj, "state%d", i); + &kdev->kobj, "state%d", i); if (ret) { kfree(kobj); goto error_state; @@ -449,8 +472,8 @@ static void cpuidle_driver_sysfs_release(struct kobject *kobj) complete(&driver_kobj->kobj_unregister); } -static ssize_t cpuidle_driver_show(struct kobject *kobj, struct attribute * attr, - char * buf) +static ssize_t cpuidle_driver_show(struct kobject *kobj, struct attribute *attr, + char *buf) { int ret = -EIO; struct cpuidle_driver_kobj *driver_kobj = kobj_to_driver_kobj(kobj); @@ -500,6 +523,7 @@ static struct kobj_type ktype_driver_cpuidle = { static int cpuidle_add_driver_sysfs(struct cpuidle_device *dev) { struct cpuidle_driver_kobj *kdrv; + struct cpuidle_device_kobj *kdev = dev->kobj_dev; struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev); int ret; @@ -511,7 +535,7 @@ static int cpuidle_add_driver_sysfs(struct cpuidle_device *dev) init_completion(&kdrv->kobj_unregister); ret = kobject_init_and_add(&kdrv->kobj, &ktype_driver_cpuidle, - &dev->kobj, "driver"); + &kdev->kobj, "driver"); if (ret) { kfree(kdrv); return ret; @@ -580,16 +604,28 @@ void cpuidle_remove_device_sysfs(struct cpuidle_device *device) */ int cpuidle_add_sysfs(struct cpuidle_device *dev) { + struct cpuidle_device_kobj *kdev; struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu); int error; - init_completion(&dev->kobj_unregister); + kdev = kzalloc(sizeof(*kdev), GFP_KERNEL); + if (!kdev) + return -ENOMEM; + kdev->dev = dev; + dev->kobj_dev = kdev; - error = kobject_init_and_add(&dev->kobj, &ktype_cpuidle, &cpu_dev->kobj, - "cpuidle"); - if (!error) - kobject_uevent(&dev->kobj, KOBJ_ADD); - return error; + init_completion(&kdev->kobj_unregister); + + error = kobject_init_and_add(&kdev->kobj, &ktype_cpuidle, &cpu_dev->kobj, + "cpuidle"); + if (error) { + kfree(kdev); + return error; + } + + kobject_uevent(&kdev->kobj, KOBJ_ADD); + + return 0; } /** @@ -598,6 +634,9 @@ int cpuidle_add_sysfs(struct cpuidle_device *dev) */ void cpuidle_remove_sysfs(struct cpuidle_device *dev) { - kobject_put(&dev->kobj); - wait_for_completion(&dev->kobj_unregister); + struct cpuidle_device_kobj *kdev = dev->kobj_dev; + + kobject_put(&kdev->kobj); + wait_for_completion(&kdev->kobj_unregister); + kfree(kdev); } |