summaryrefslogtreecommitdiffstats
path: root/drivers/cpuidle
diff options
context:
space:
mode:
Diffstat (limited to 'drivers/cpuidle')
-rw-r--r--drivers/cpuidle/Kconfig20
-rw-r--r--drivers/cpuidle/Kconfig.arm29
-rw-r--r--drivers/cpuidle/Makefile9
-rw-r--r--drivers/cpuidle/coupled.c129
-rw-r--r--drivers/cpuidle/cpuidle-calxeda.c2
-rw-r--r--drivers/cpuidle/cpuidle-kirkwood.c5
-rw-r--r--drivers/cpuidle/cpuidle-ux500.c131
-rw-r--r--drivers/cpuidle/cpuidle.c94
-rw-r--r--drivers/cpuidle/governors/ladder.c12
-rw-r--r--drivers/cpuidle/governors/menu.c214
-rw-r--r--drivers/cpuidle/sysfs.c101
11 files changed, 467 insertions, 279 deletions
diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig
index 0e2cd5c..b3fb81d 100644
--- a/drivers/cpuidle/Kconfig
+++ b/drivers/cpuidle/Kconfig
@@ -1,5 +1,6 @@
+menu "CPU Idle"
-menuconfig CPU_IDLE
+config CPU_IDLE
bool "CPU idle PM support"
default y if ACPI || PPC_PSERIES
select CPU_IDLE_GOV_LADDER if (!NO_HZ && !NO_HZ_IDLE)
@@ -29,20 +30,13 @@ config CPU_IDLE_GOV_MENU
bool "Menu governor (for tickless system)"
default y
-config CPU_IDLE_CALXEDA
- bool "CPU Idle Driver for Calxeda processors"
- depends on ARCH_HIGHBANK
- select ARM_CPU_SUSPEND
- help
- Select this to enable cpuidle on Calxeda processors.
-
-config CPU_IDLE_ZYNQ
- bool "CPU Idle Driver for Xilinx Zynq processors"
- depends on ARCH_ZYNQ
- help
- Select this to enable cpuidle on Xilinx Zynq processors.
+menu "ARM CPU Idle Drivers"
+depends on ARM
+source "drivers/cpuidle/Kconfig.arm"
+endmenu
endif
config ARCH_NEEDS_CPU_IDLE_COUPLED
def_bool n
+endmenu
diff --git a/drivers/cpuidle/Kconfig.arm b/drivers/cpuidle/Kconfig.arm
new file mode 100644
index 0000000..b3302193
--- /dev/null
+++ b/drivers/cpuidle/Kconfig.arm
@@ -0,0 +1,29 @@
+#
+# ARM CPU Idle drivers
+#
+
+config ARM_HIGHBANK_CPUIDLE
+ bool "CPU Idle Driver for Calxeda processors"
+ depends on ARCH_HIGHBANK
+ select ARM_CPU_SUSPEND
+ help
+ Select this to enable cpuidle on Calxeda processors.
+
+config ARM_KIRKWOOD_CPUIDLE
+ bool "CPU Idle Driver for Marvell Kirkwood SoCs"
+ depends on ARCH_KIRKWOOD
+ help
+ This adds the CPU Idle driver for Marvell Kirkwood SoCs.
+
+config ARM_ZYNQ_CPUIDLE
+ bool "CPU Idle Driver for Xilinx Zynq processors"
+ depends on ARCH_ZYNQ
+ help
+ Select this to enable cpuidle on Xilinx Zynq processors.
+
+config ARM_U8500_CPUIDLE
+ bool "Cpu Idle Driver for the ST-E u8500 processors"
+ depends on ARCH_U8500
+ help
+ Select this to enable cpuidle for ST-E u8500 processors
+
diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile
index 8767a7b..0b9d200 100644
--- a/drivers/cpuidle/Makefile
+++ b/drivers/cpuidle/Makefile
@@ -5,6 +5,9 @@
obj-y += cpuidle.o driver.o governor.o sysfs.o governors/
obj-$(CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED) += coupled.o
-obj-$(CONFIG_CPU_IDLE_CALXEDA) += cpuidle-calxeda.o
-obj-$(CONFIG_ARCH_KIRKWOOD) += cpuidle-kirkwood.o
-obj-$(CONFIG_CPU_IDLE_ZYNQ) += cpuidle-zynq.o
+##################################################################################
+# ARM SoC drivers
+obj-$(CONFIG_ARM_HIGHBANK_CPUIDLE) += cpuidle-calxeda.o
+obj-$(CONFIG_ARM_KIRKWOOD_CPUIDLE) += cpuidle-kirkwood.o
+obj-$(CONFIG_ARM_ZYNQ_CPUIDLE) += cpuidle-zynq.o
+obj-$(CONFIG_ARM_U8500_CPUIDLE) += cpuidle-ux500.o
diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c
index 2a297f8..f8a8636 100644
--- a/drivers/cpuidle/coupled.c
+++ b/drivers/cpuidle/coupled.c
@@ -106,6 +106,7 @@ struct cpuidle_coupled {
cpumask_t coupled_cpus;
int requested_state[NR_CPUS];
atomic_t ready_waiting_counts;
+ atomic_t abort_barrier;
int online_count;
int refcnt;
int prevent;
@@ -122,12 +123,19 @@ static DEFINE_MUTEX(cpuidle_coupled_lock);
static DEFINE_PER_CPU(struct call_single_data, cpuidle_coupled_poke_cb);
/*
- * The cpuidle_coupled_poked_mask mask is used to avoid calling
+ * The cpuidle_coupled_poke_pending mask is used to avoid calling
* __smp_call_function_single with the per cpu call_single_data struct already
* in use. This prevents a deadlock where two cpus are waiting for each others
* call_single_data struct to be available
*/
-static cpumask_t cpuidle_coupled_poked_mask;
+static cpumask_t cpuidle_coupled_poke_pending;
+
+/*
+ * The cpuidle_coupled_poked mask is used to ensure that each cpu has been poked
+ * once to minimize entering the ready loop with a poke pending, which would
+ * require aborting and retrying.
+ */
+static cpumask_t cpuidle_coupled_poked;
/**
* cpuidle_coupled_parallel_barrier - synchronize all online coupled cpus
@@ -291,10 +299,11 @@ static inline int cpuidle_coupled_get_state(struct cpuidle_device *dev,
return state;
}
-static void cpuidle_coupled_poked(void *info)
+static void cpuidle_coupled_handle_poke(void *info)
{
int cpu = (unsigned long)info;
- cpumask_clear_cpu(cpu, &cpuidle_coupled_poked_mask);
+ cpumask_set_cpu(cpu, &cpuidle_coupled_poked);
+ cpumask_clear_cpu(cpu, &cpuidle_coupled_poke_pending);
}
/**
@@ -313,7 +322,7 @@ static void cpuidle_coupled_poke(int cpu)
{
struct call_single_data *csd = &per_cpu(cpuidle_coupled_poke_cb, cpu);
- if (!cpumask_test_and_set_cpu(cpu, &cpuidle_coupled_poked_mask))
+ if (!cpumask_test_and_set_cpu(cpu, &cpuidle_coupled_poke_pending))
__smp_call_function_single(cpu, csd, 0);
}
@@ -340,30 +349,19 @@ static void cpuidle_coupled_poke_others(int this_cpu,
* @coupled: the struct coupled that contains the current cpu
* @next_state: the index in drv->states of the requested state for this cpu
*
- * Updates the requested idle state for the specified cpuidle device,
- * poking all coupled cpus out of idle if necessary to let them see the new
- * state.
+ * Updates the requested idle state for the specified cpuidle device.
+ * Returns the number of waiting cpus.
*/
-static void cpuidle_coupled_set_waiting(int cpu,
+static int cpuidle_coupled_set_waiting(int cpu,
struct cpuidle_coupled *coupled, int next_state)
{
- int w;
-
coupled->requested_state[cpu] = next_state;
/*
- * If this is the last cpu to enter the waiting state, poke
- * all the other cpus out of their waiting state so they can
- * enter a deeper state. This can race with one of the cpus
- * exiting the waiting state due to an interrupt and
- * decrementing waiting_count, see comment below.
- *
* The atomic_inc_return provides a write barrier to order the write
* to requested_state with the later write that increments ready_count.
*/
- w = atomic_inc_return(&coupled->ready_waiting_counts) & WAITING_MASK;
- if (w == coupled->online_count)
- cpuidle_coupled_poke_others(cpu, coupled);
+ return atomic_inc_return(&coupled->ready_waiting_counts) & WAITING_MASK;
}
/**
@@ -410,19 +408,33 @@ static void cpuidle_coupled_set_done(int cpu, struct cpuidle_coupled *coupled)
* been processed and the poke bit has been cleared.
*
* Other interrupts may also be processed while interrupts are enabled, so
- * need_resched() must be tested after turning interrupts off again to make sure
+ * need_resched() must be tested after this function returns to make sure
* the interrupt didn't schedule work that should take the cpu out of idle.
*
- * Returns 0 if need_resched was false, -EINTR if need_resched was true.
+ * Returns 0 if no poke was pending, 1 if a poke was cleared.
*/
static int cpuidle_coupled_clear_pokes(int cpu)
{
+ if (!cpumask_test_cpu(cpu, &cpuidle_coupled_poke_pending))
+ return 0;
+
local_irq_enable();
- while (cpumask_test_cpu(cpu, &cpuidle_coupled_poked_mask))
+ while (cpumask_test_cpu(cpu, &cpuidle_coupled_poke_pending))
cpu_relax();
local_irq_disable();
- return need_resched() ? -EINTR : 0;
+ return 1;
+}
+
+static bool cpuidle_coupled_any_pokes_pending(struct cpuidle_coupled *coupled)
+{
+ cpumask_t cpus;
+ int ret;
+
+ cpumask_and(&cpus, cpu_online_mask, &coupled->coupled_cpus);
+ ret = cpumask_and(&cpus, &cpuidle_coupled_poke_pending, &cpus);
+
+ return ret;
}
/**
@@ -449,31 +461,56 @@ int cpuidle_enter_state_coupled(struct cpuidle_device *dev,
{
int entered_state = -1;
struct cpuidle_coupled *coupled = dev->coupled;
+ int w;
if (!coupled)
return -EINVAL;
while (coupled->prevent) {
- if (cpuidle_coupled_clear_pokes(dev->cpu)) {
+ cpuidle_coupled_clear_pokes(dev->cpu);
+ if (need_resched()) {
local_irq_enable();
return entered_state;
}
entered_state = cpuidle_enter_state(dev, drv,
dev->safe_state_index);
+ local_irq_disable();
}
/* Read barrier ensures online_count is read after prevent is cleared */
smp_rmb();
- cpuidle_coupled_set_waiting(dev->cpu, coupled, next_state);
+reset:
+ cpumask_clear_cpu(dev->cpu, &cpuidle_coupled_poked);
+
+ w = cpuidle_coupled_set_waiting(dev->cpu, coupled, next_state);
+ /*
+ * If this is the last cpu to enter the waiting state, poke
+ * all the other cpus out of their waiting state so they can
+ * enter a deeper state. This can race with one of the cpus
+ * exiting the waiting state due to an interrupt and
+ * decrementing waiting_count, see comment below.
+ */
+ if (w == coupled->online_count) {
+ cpumask_set_cpu(dev->cpu, &cpuidle_coupled_poked);
+ cpuidle_coupled_poke_others(dev->cpu, coupled);
+ }
retry:
/*
* Wait for all coupled cpus to be idle, using the deepest state
- * allowed for a single cpu.
+ * allowed for a single cpu. If this was not the poking cpu, wait
+ * for at least one poke before leaving to avoid a race where
+ * two cpus could arrive at the waiting loop at the same time,
+ * but the first of the two to arrive could skip the loop without
+ * processing the pokes from the last to arrive.
*/
- while (!cpuidle_coupled_cpus_waiting(coupled)) {
- if (cpuidle_coupled_clear_pokes(dev->cpu)) {
+ while (!cpuidle_coupled_cpus_waiting(coupled) ||
+ !cpumask_test_cpu(dev->cpu, &cpuidle_coupled_poked)) {
+ if (cpuidle_coupled_clear_pokes(dev->cpu))
+ continue;
+
+ if (need_resched()) {
cpuidle_coupled_set_not_waiting(dev->cpu, coupled);
goto out;
}
@@ -485,14 +522,22 @@ retry:
entered_state = cpuidle_enter_state(dev, drv,
dev->safe_state_index);
+ local_irq_disable();
}
- if (cpuidle_coupled_clear_pokes(dev->cpu)) {
+ cpuidle_coupled_clear_pokes(dev->cpu);
+ if (need_resched()) {
cpuidle_coupled_set_not_waiting(dev->cpu, coupled);
goto out;
}
/*
+ * Make sure final poke status for this cpu is visible before setting
+ * cpu as ready.
+ */
+ smp_wmb();
+
+ /*
* All coupled cpus are probably idle. There is a small chance that
* one of the other cpus just became active. Increment the ready count,
* and spin until all coupled cpus have incremented the counter. Once a
@@ -511,6 +556,28 @@ retry:
cpu_relax();
}
+ /*
+ * Make sure read of all cpus ready is done before reading pending pokes
+ */
+ smp_rmb();
+
+ /*
+ * There is a small chance that a cpu left and reentered idle after this
+ * cpu saw that all cpus were waiting. The cpu that reentered idle will
+ * have sent this cpu a poke, which will still be pending after the
+ * ready loop. The pending interrupt may be lost by the interrupt
+ * controller when entering the deep idle state. It's not possible to
+ * clear a pending interrupt without turning interrupts on and handling
+ * it, and it's too late to turn on interrupts here, so reset the
+ * coupled idle state of all cpus and retry.
+ */
+ if (cpuidle_coupled_any_pokes_pending(coupled)) {
+ cpuidle_coupled_set_done(dev->cpu, coupled);
+ /* Wait for all cpus to see the pending pokes */
+ cpuidle_coupled_parallel_barrier(dev, &coupled->abort_barrier);
+ goto reset;
+ }
+
/* all cpus have acked the coupled state */
next_state = cpuidle_coupled_get_state(dev, coupled);
@@ -596,7 +663,7 @@ have_coupled:
coupled->refcnt++;
csd = &per_cpu(cpuidle_coupled_poke_cb, dev->cpu);
- csd->func = cpuidle_coupled_poked;
+ csd->func = cpuidle_coupled_handle_poke;
csd->info = (void *)(unsigned long)dev->cpu;
return 0;
diff --git a/drivers/cpuidle/cpuidle-calxeda.c b/drivers/cpuidle/cpuidle-calxeda.c
index 0e6e408..3460584 100644
--- a/drivers/cpuidle/cpuidle-calxeda.c
+++ b/drivers/cpuidle/cpuidle-calxeda.c
@@ -35,7 +35,7 @@
#include <asm/cp15.h>
extern void highbank_set_cpu_jump(int cpu, void *jump_addr);
-extern void *scu_base_addr;
+extern void __iomem *scu_base_addr;
static noinline void calxeda_idle_restore(void)
{
diff --git a/drivers/cpuidle/cpuidle-kirkwood.c b/drivers/cpuidle/cpuidle-kirkwood.c
index 521b0a7..41ba843 100644
--- a/drivers/cpuidle/cpuidle-kirkwood.c
+++ b/drivers/cpuidle/cpuidle-kirkwood.c
@@ -60,9 +60,6 @@ static int kirkwood_cpuidle_probe(struct platform_device *pdev)
struct resource *res;
res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
- if (res == NULL)
- return -EINVAL;
-
ddr_operation_base = devm_ioremap_resource(&pdev->dev, res);
if (IS_ERR(ddr_operation_base))
return PTR_ERR(ddr_operation_base);
@@ -70,7 +67,7 @@ static int kirkwood_cpuidle_probe(struct platform_device *pdev)
return cpuidle_register(&kirkwood_idle_driver, NULL);
}
-int kirkwood_cpuidle_remove(struct platform_device *pdev)
+static int kirkwood_cpuidle_remove(struct platform_device *pdev)
{
cpuidle_unregister(&kirkwood_idle_driver);
return 0;
diff --git a/drivers/cpuidle/cpuidle-ux500.c b/drivers/cpuidle/cpuidle-ux500.c
new file mode 100644
index 0000000..e056465
--- /dev/null
+++ b/drivers/cpuidle/cpuidle-ux500.c
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2012 Linaro : Daniel Lezcano <daniel.lezcano@linaro.org> (IBM)
+ *
+ * Based on the work of Rickard Andersson <rickard.andersson@stericsson.com>
+ * and Jonas Aaberg <jonas.aberg@stericsson.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/cpuidle.h>
+#include <linux/spinlock.h>
+#include <linux/atomic.h>
+#include <linux/smp.h>
+#include <linux/mfd/dbx500-prcmu.h>
+#include <linux/platform_data/arm-ux500-pm.h>
+#include <linux/platform_device.h>
+
+#include <asm/cpuidle.h>
+#include <asm/proc-fns.h>
+
+static atomic_t master = ATOMIC_INIT(0);
+static DEFINE_SPINLOCK(master_lock);
+
+static inline int ux500_enter_idle(struct cpuidle_device *dev,
+ struct cpuidle_driver *drv, int index)
+{
+ int this_cpu = smp_processor_id();
+ bool recouple = false;
+
+ if (atomic_inc_return(&master) == num_online_cpus()) {
+
+ /* With this lock, we prevent the other cpu to exit and enter
+ * this function again and become the master */
+ if (!spin_trylock(&master_lock))
+ goto wfi;
+
+ /* decouple the gic from the A9 cores */
+ if (prcmu_gic_decouple()) {
+ spin_unlock(&master_lock);
+ goto out;
+ }
+
+ /* If an error occur, we will have to recouple the gic
+ * manually */
+ recouple = true;
+
+ /* At this state, as the gic is decoupled, if the other
+ * cpu is in WFI, we have the guarantee it won't be wake
+ * up, so we can safely go to retention */
+ if (!prcmu_is_cpu_in_wfi(this_cpu ? 0 : 1))
+ goto out;
+
+ /* The prcmu will be in charge of watching the interrupts
+ * and wake up the cpus */
+ if (prcmu_copy_gic_settings())
+ goto out;
+
+ /* Check in the meantime an interrupt did
+ * not occur on the gic ... */
+ if (prcmu_gic_pending_irq())
+ goto out;
+
+ /* ... and the prcmu */
+ if (prcmu_pending_irq())
+ goto out;
+
+ /* Go to the retention state, the prcmu will wait for the
+ * cpu to go WFI and this is what happens after exiting this
+ * 'master' critical section */
+ if (prcmu_set_power_state(PRCMU_AP_IDLE, true, true))
+ goto out;
+
+ /* When we switch to retention, the prcmu is in charge
+ * of recoupling the gic automatically */
+ recouple = false;
+
+ spin_unlock(&master_lock);
+ }
+wfi:
+ cpu_do_idle();
+out:
+ atomic_dec(&master);
+
+ if (recouple) {
+ prcmu_gic_recouple();
+ spin_unlock(&master_lock);
+ }
+
+ return index;
+}
+
+static struct cpuidle_driver ux500_idle_driver = {
+ .name = "ux500_idle",
+ .owner = THIS_MODULE,
+ .states = {
+ ARM_CPUIDLE_WFI_STATE,
+ {
+ .enter = ux500_enter_idle,
+ .exit_latency = 70,
+ .target_residency = 260,
+ .flags = CPUIDLE_FLAG_TIME_VALID |
+ CPUIDLE_FLAG_TIMER_STOP,
+ .name = "ApIdle",
+ .desc = "ARM Retention",
+ },
+ },
+ .safe_state_index = 0,
+ .state_count = 2,
+};
+
+static int __init dbx500_cpuidle_probe(struct platform_device *pdev)
+{
+ /* Configure wake up reasons */
+ prcmu_enable_wakeups(PRCMU_WAKEUP(ARM) | PRCMU_WAKEUP(RTC) |
+ PRCMU_WAKEUP(ABB));
+
+ return cpuidle_register(&ux500_idle_driver, NULL);
+}
+
+static struct platform_driver dbx500_cpuidle_plat_driver = {
+ .driver = {
+ .name = "cpuidle-dbx500",
+ .owner = THIS_MODULE,
+ },
+ .probe = dbx500_cpuidle_probe,
+};
+
+module_platform_driver(dbx500_cpuidle_plat_driver);
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index fdc432f..d75040d 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -42,8 +42,6 @@ void disable_cpuidle(void)
off = 1;
}
-static int __cpuidle_register_device(struct cpuidle_device *dev);
-
/**
* cpuidle_play_dead - cpu off-lining
*
@@ -278,7 +276,7 @@ static void poll_idle_init(struct cpuidle_driver *drv) {}
*/
int cpuidle_enable_device(struct cpuidle_device *dev)
{
- int ret, i;
+ int ret;
struct cpuidle_driver *drv;
if (!dev)
@@ -292,15 +290,12 @@ int cpuidle_enable_device(struct cpuidle_device *dev)
if (!drv || !cpuidle_curr_governor)
return -EIO;
+ if (!dev->registered)
+ return -EINVAL;
+
if (!dev->state_count)
dev->state_count = drv->state_count;
- if (dev->registered == 0) {
- ret = __cpuidle_register_device(dev);
- if (ret)
- return ret;
- }
-
poll_idle_init(drv);
ret = cpuidle_add_device_sysfs(dev);
@@ -311,12 +306,6 @@ int cpuidle_enable_device(struct cpuidle_device *dev)
(ret = cpuidle_curr_governor->enable(drv, dev)))
goto fail_sysfs;
- for (i = 0; i < dev->state_count; i++) {
- dev->states_usage[i].usage = 0;
- dev->states_usage[i].time = 0;
- }
- dev->last_residency = 0;
-
smp_wmb();
dev->enabled = 1;
@@ -360,6 +349,23 @@ void cpuidle_disable_device(struct cpuidle_device *dev)
EXPORT_SYMBOL_GPL(cpuidle_disable_device);
+static void __cpuidle_unregister_device(struct cpuidle_device *dev)
+{
+ struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
+
+ list_del(&dev->device_list);
+ per_cpu(cpuidle_devices, dev->cpu) = NULL;
+ module_put(drv->owner);
+}
+
+static int __cpuidle_device_init(struct cpuidle_device *dev)
+{
+ memset(dev->states_usage, 0, sizeof(dev->states_usage));
+ dev->last_residency = 0;
+
+ return 0;
+}
+
/**
* __cpuidle_register_device - internal register function called before register
* and enable routines
@@ -377,24 +383,15 @@ static int __cpuidle_register_device(struct cpuidle_device *dev)
per_cpu(cpuidle_devices, dev->cpu) = dev;
list_add(&dev->device_list, &cpuidle_detected_devices);
- ret = cpuidle_add_sysfs(dev);
- if (ret)
- goto err_sysfs;
ret = cpuidle_coupled_register_device(dev);
- if (ret)
- goto err_coupled;
+ if (ret) {
+ __cpuidle_unregister_device(dev);
+ return ret;
+ }
dev->registered = 1;
return 0;
-
-err_coupled:
- cpuidle_remove_sysfs(dev);
-err_sysfs:
- list_del(&dev->device_list);
- per_cpu(cpuidle_devices, dev->cpu) = NULL;
- module_put(drv->owner);
- return ret;
}
/**
@@ -403,25 +400,44 @@ err_sysfs:
*/
int cpuidle_register_device(struct cpuidle_device *dev)
{
- int ret;
+ int ret = -EBUSY;
if (!dev)
return -EINVAL;
mutex_lock(&cpuidle_lock);
- if ((ret = __cpuidle_register_device(dev))) {
- mutex_unlock(&cpuidle_lock);
- return ret;
- }
+ if (dev->registered)
+ goto out_unlock;
+
+ ret = __cpuidle_device_init(dev);
+ if (ret)
+ goto out_unlock;
+
+ ret = __cpuidle_register_device(dev);
+ if (ret)
+ goto out_unlock;
+
+ ret = cpuidle_add_sysfs(dev);
+ if (ret)
+ goto out_unregister;
+
+ ret = cpuidle_enable_device(dev);
+ if (ret)
+ goto out_sysfs;
- cpuidle_enable_device(dev);
cpuidle_install_idle_handler();
+out_unlock:
mutex_unlock(&cpuidle_lock);
- return 0;
+ return ret;
+out_sysfs:
+ cpuidle_remove_sysfs(dev);
+out_unregister:
+ __cpuidle_unregister_device(dev);
+ goto out_unlock;
}
EXPORT_SYMBOL_GPL(cpuidle_register_device);
@@ -432,8 +448,6 @@ EXPORT_SYMBOL_GPL(cpuidle_register_device);
*/
void cpuidle_unregister_device(struct cpuidle_device *dev)
{
- struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
-
if (dev->registered == 0)
return;
@@ -442,14 +456,12 @@ void cpuidle_unregister_device(struct cpuidle_device *dev)
cpuidle_disable_device(dev);
cpuidle_remove_sysfs(dev);
- list_del(&dev->device_list);
- per_cpu(cpuidle_devices, dev->cpu) = NULL;
+
+ __cpuidle_unregister_device(dev);
cpuidle_coupled_unregister_device(dev);
cpuidle_resume_and_unlock();
-
- module_put(drv->owner);
}
EXPORT_SYMBOL_GPL(cpuidle_unregister_device);
diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c
index 9b78405..9f08e8c 100644
--- a/drivers/cpuidle/governors/ladder.c
+++ b/drivers/cpuidle/governors/ladder.c
@@ -192,14 +192,4 @@ static int __init init_ladder(void)
return cpuidle_register_governor(&ladder_governor);
}
-/**
- * exit_ladder - exits the governor
- */
-static void __exit exit_ladder(void)
-{
- cpuidle_unregister_governor(&ladder_governor);
-}
-
-MODULE_LICENSE("GPL");
-module_init(init_ladder);
-module_exit(exit_ladder);
+postcore_initcall(init_ladder);
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index fe343a0..cf7f2f0 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -21,6 +21,15 @@
#include <linux/math64.h>
#include <linux/module.h>
+/*
+ * Please note when changing the tuning values:
+ * If (MAX_INTERESTING-1) * RESOLUTION > UINT_MAX, the result of
+ * a scaling operation multiplication may overflow on 32 bit platforms.
+ * In that case, #define RESOLUTION as ULL to get 64 bit result:
+ * #define RESOLUTION 1024ULL
+ *
+ * The default values do not overflow.
+ */
#define BUCKETS 12
#define INTERVALS 8
#define RESOLUTION 1024
@@ -28,13 +37,6 @@
#define MAX_INTERESTING 50000
#define STDDEV_THRESH 400
-/* 60 * 60 > STDDEV_THRESH * INTERVALS = 400 * 8 */
-#define MAX_DEVIATION 60
-
-static DEFINE_PER_CPU(struct hrtimer, menu_hrtimer);
-static DEFINE_PER_CPU(int, hrtimer_status);
-/* menu hrtimer mode */
-enum {MENU_HRTIMER_STOP, MENU_HRTIMER_REPEAT, MENU_HRTIMER_GENERAL};
/*
* Concepts and ideas behind the menu governor
@@ -116,23 +118,16 @@ enum {MENU_HRTIMER_STOP, MENU_HRTIMER_REPEAT, MENU_HRTIMER_GENERAL};
*
*/
-/*
- * The C-state residency is so long that is is worthwhile to exit
- * from the shallow C-state and re-enter into a deeper C-state.
- */
-static unsigned int perfect_cstate_ms __read_mostly = 30;
-module_param(perfect_cstate_ms, uint, 0000);
-
struct menu_device {
int last_state_idx;
int needs_update;
unsigned int expected_us;
- u64 predicted_us;
+ unsigned int predicted_us;
unsigned int exit_us;
unsigned int bucket;
- u64 correction_factor[BUCKETS];
- u32 intervals[INTERVALS];
+ unsigned int correction_factor[BUCKETS];
+ unsigned int intervals[INTERVALS];
int interval_ptr;
};
@@ -205,59 +200,28 @@ static u64 div_round64(u64 dividend, u32 divisor)
return div_u64(dividend + (divisor / 2), divisor);
}
-/* Cancel the hrtimer if it is not triggered yet */
-void menu_hrtimer_cancel(void)
-{
- int cpu = smp_processor_id();
- struct hrtimer *hrtmr = &per_cpu(menu_hrtimer, cpu);
-
- /* The timer is still not time out*/
- if (per_cpu(hrtimer_status, cpu)) {
- hrtimer_cancel(hrtmr);
- per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_STOP;
- }
-}
-EXPORT_SYMBOL_GPL(menu_hrtimer_cancel);
-
-/* Call back for hrtimer is triggered */
-static enum hrtimer_restart menu_hrtimer_notify(struct hrtimer *hrtimer)
-{
- int cpu = smp_processor_id();
- struct menu_device *data = &per_cpu(menu_devices, cpu);
-
- /* In general case, the expected residency is much larger than
- * deepest C-state target residency, but prediction logic still
- * predicts a small predicted residency, so the prediction
- * history is totally broken if the timer is triggered.
- * So reset the correction factor.
- */
- if (per_cpu(hrtimer_status, cpu) == MENU_HRTIMER_GENERAL)
- data->correction_factor[data->bucket] = RESOLUTION * DECAY;
-
- per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_STOP;
-
- return HRTIMER_NORESTART;
-}
-
/*
* Try detecting repeating patterns by keeping track of the last 8
* intervals, and checking if the standard deviation of that set
* of points is below a threshold. If it is... then use the
* average of these 8 points as the estimated value.
*/
-static u32 get_typical_interval(struct menu_device *data)
+static void get_typical_interval(struct menu_device *data)
{
- int i = 0, divisor = 0;
- uint64_t max = 0, avg = 0, stddev = 0;
- int64_t thresh = LLONG_MAX; /* Discard outliers above this value. */
- unsigned int ret = 0;
+ int i, divisor;
+ unsigned int max, thresh;
+ uint64_t avg, stddev;
+
+ thresh = UINT_MAX; /* Discard outliers above this value */
again:
- /* first calculate average and standard deviation of the past */
- max = avg = divisor = stddev = 0;
+ /* First calculate the average of past intervals */
+ max = 0;
+ avg = 0;
+ divisor = 0;
for (i = 0; i < INTERVALS; i++) {
- int64_t value = data->intervals[i];
+ unsigned int value = data->intervals[i];
if (value <= thresh) {
avg += value;
divisor++;
@@ -267,15 +231,38 @@ again:
}
do_div(avg, divisor);
+ /* Then try to determine standard deviation */
+ stddev = 0;
for (i = 0; i < INTERVALS; i++) {
- int64_t value = data->intervals[i];
+ unsigned int value = data->intervals[i];
if (value <= thresh) {
int64_t diff = value - avg;
stddev += diff * diff;
}
}
do_div(stddev, divisor);
- stddev = int_sqrt(stddev);
+ /*
+ * The typical interval is obtained when standard deviation is small
+ * or standard deviation is small compared to the average interval.
+ *
+ * int_sqrt() formal parameter type is unsigned long. When the
+ * greatest difference to an outlier exceeds ~65 ms * sqrt(divisor)
+ * the resulting squared standard deviation exceeds the input domain
+ * of int_sqrt on platforms where unsigned long is 32 bits in size.
+ * In such case reject the candidate average.
+ *
+ * Use this result only if there is no timer to wake us up sooner.
+ */
+ if (likely(stddev <= ULONG_MAX)) {
+ stddev = int_sqrt(stddev);
+ if (((avg > stddev * 6) && (divisor * 4 >= INTERVALS * 3))
+ || stddev <= 20) {
+ if (data->expected_us > avg)
+ data->predicted_us = avg;
+ return;
+ }
+ }
+
/*
* If we have outliers to the upside in our distribution, discard
* those by setting the threshold to exclude these outliers, then
@@ -284,23 +271,12 @@ again:
*
* This can deal with workloads that have long pauses interspersed
* with sporadic activity with a bunch of short pauses.
- *
- * The typical interval is obtained when standard deviation is small
- * or standard deviation is small compared to the average interval.
*/
- if (((avg > stddev * 6) && (divisor * 4 >= INTERVALS * 3))
- || stddev <= 20) {
- data->predicted_us = avg;
- ret = 1;
- return ret;
-
- } else if ((divisor * 4) > INTERVALS * 3) {
- /* Exclude the max interval */
- thresh = max - 1;
- goto again;
- }
+ if ((divisor * 4) <= INTERVALS * 3)
+ return;
- return ret;
+ thresh = max - 1;
+ goto again;
}
/**
@@ -315,9 +291,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
int i;
int multiplier;
struct timespec t;
- int repeat = 0, low_predicted = 0;
- int cpu = smp_processor_id();
- struct hrtimer *hrtmr = &per_cpu(menu_hrtimer, cpu);
if (data->needs_update) {
menu_update(drv, dev);
@@ -348,11 +321,16 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
if (data->correction_factor[data->bucket] == 0)
data->correction_factor[data->bucket] = RESOLUTION * DECAY;
- /* Make sure to round up for half microseconds */
- data->predicted_us = div_round64(data->expected_us * data->correction_factor[data->bucket],
+ /*
+ * Force the result of multiplication to be 64 bits even if both
+ * operands are 32 bits.
+ * Make sure to round up for half microseconds.
+ */
+ data->predicted_us = div_round64((uint64_t)data->expected_us *
+ data->correction_factor[data->bucket],
RESOLUTION * DECAY);
- repeat = get_typical_interval(data);
+ get_typical_interval(data);
/*
* We want to default to C1 (hlt), not to busy polling
@@ -373,10 +351,8 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
if (s->disabled || su->disable)
continue;
- if (s->target_residency > data->predicted_us) {
- low_predicted = 1;
+ if (s->target_residency > data->predicted_us)
continue;
- }
if (s->exit_latency > latency_req)
continue;
if (s->exit_latency * multiplier > data->predicted_us)
@@ -386,44 +362,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
data->exit_us = s->exit_latency;
}
- /* not deepest C-state chosen for low predicted residency */
- if (low_predicted) {
- unsigned int timer_us = 0;
- unsigned int perfect_us = 0;
-
- /*
- * Set a timer to detect whether this sleep is much
- * longer than repeat mode predicted. If the timer
- * triggers, the code will evaluate whether to put
- * the CPU into a deeper C-state.
- * The timer is cancelled on CPU wakeup.
- */
- timer_us = 2 * (data->predicted_us + MAX_DEVIATION);
-
- perfect_us = perfect_cstate_ms * 1000;
-
- if (repeat && (4 * timer_us < data->expected_us)) {
- RCU_NONIDLE(hrtimer_start(hrtmr,
- ns_to_ktime(1000 * timer_us),
- HRTIMER_MODE_REL_PINNED));
- /* In repeat case, menu hrtimer is started */
- per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_REPEAT;
- } else if (perfect_us < data->expected_us) {
- /*
- * The next timer is long. This could be because
- * we did not make a useful prediction.
- * In that case, it makes sense to re-enter
- * into a deeper C-state after some time.
- */
- RCU_NONIDLE(hrtimer_start(hrtmr,
- ns_to_ktime(1000 * timer_us),
- HRTIMER_MODE_REL_PINNED));
- /* In general case, menu hrtimer is started */
- per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_GENERAL;
- }
-
- }
-
return data->last_state_idx;
}
@@ -455,7 +393,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
unsigned int last_idle_us = cpuidle_get_last_residency(dev);
struct cpuidle_state *target = &drv->states[last_idx];
unsigned int measured_us;
- u64 new_factor;
+ unsigned int new_factor;
/*
* Ugh, this idle state doesn't support residency measurements, so we
@@ -476,10 +414,9 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
measured_us -= data->exit_us;
- /* update our correction ratio */
-
- new_factor = data->correction_factor[data->bucket]
- * (DECAY - 1) / DECAY;
+ /* Update our correction ratio */
+ new_factor = data->correction_factor[data->bucket];
+ new_factor -= new_factor / DECAY;
if (data->expected_us > 0 && measured_us < MAX_INTERESTING)
new_factor += RESOLUTION * measured_us / data->expected_us;
@@ -492,9 +429,11 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
/*
* We don't want 0 as factor; we always want at least
- * a tiny bit of estimated time.
+ * a tiny bit of estimated time. Fortunately, due to rounding,
+ * new_factor will stay nonzero regardless of measured_us values
+ * and the compiler can eliminate this test as long as DECAY > 1.
*/
- if (new_factor == 0)
+ if (DECAY == 1 && unlikely(new_factor == 0))
new_factor = 1;
data->correction_factor[data->bucket] = new_factor;
@@ -514,9 +453,6 @@ static int menu_enable_device(struct cpuidle_driver *drv,
struct cpuidle_device *dev)
{
struct menu_device *data = &per_cpu(menu_devices, dev->cpu);
- struct hrtimer *t = &per_cpu(menu_hrtimer, dev->cpu);
- hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
- t->function = menu_hrtimer_notify;
memset(data, 0, sizeof(struct menu_device));
@@ -540,14 +476,4 @@ static int __init init_menu(void)
return cpuidle_register_governor(&menu_governor);
}
-/**
- * exit_menu - exits the governor
- */
-static void __exit exit_menu(void)
-{
- cpuidle_unregister_governor(&menu_governor);
-}
-
-MODULE_LICENSE("GPL");
-module_init(init_menu);
-module_exit(exit_menu);
+postcore_initcall(init_menu);
diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c
index 428754a..8739cc0 100644
--- a/drivers/cpuidle/sysfs.c
+++ b/drivers/cpuidle/sysfs.c
@@ -11,8 +11,10 @@
#include <linux/sysfs.h>
#include <linux/slab.h>
#include <linux/cpu.h>
+#include <linux/completion.h>
#include <linux/capability.h>
#include <linux/device.h>
+#include <linux/kobject.h>
#include "cpuidle.h"
@@ -33,7 +35,8 @@ static ssize_t show_available_governors(struct device *dev,
mutex_lock(&cpuidle_lock);
list_for_each_entry(tmp, &cpuidle_governors, governor_list) {
- if (i >= (ssize_t) ((PAGE_SIZE/sizeof(char)) - CPUIDLE_NAME_LEN - 2))
+ if (i >= (ssize_t) ((PAGE_SIZE/sizeof(char)) -
+ CPUIDLE_NAME_LEN - 2))
goto out;
i += scnprintf(&buf[i], CPUIDLE_NAME_LEN, "%s ", tmp->name);
}
@@ -166,13 +169,28 @@ struct cpuidle_attr {
#define define_one_rw(_name, show, store) \
static struct cpuidle_attr attr_##_name = __ATTR(_name, 0644, show, store)
-#define kobj_to_cpuidledev(k) container_of(k, struct cpuidle_device, kobj)
#define attr_to_cpuidleattr(a) container_of(a, struct cpuidle_attr, attr)
-static ssize_t cpuidle_show(struct kobject * kobj, struct attribute * attr ,char * buf)
+
+struct cpuidle_device_kobj {
+ struct cpuidle_device *dev;
+ struct completion kobj_unregister;
+ struct kobject kobj;
+};
+
+static inline struct cpuidle_device *to_cpuidle_device(struct kobject *kobj)
+{
+ struct cpuidle_device_kobj *kdev =
+ container_of(kobj, struct cpuidle_device_kobj, kobj);
+
+ return kdev->dev;
+}
+
+static ssize_t cpuidle_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
{
int ret = -EIO;
- struct cpuidle_device *dev = kobj_to_cpuidledev(kobj);
- struct cpuidle_attr * cattr = attr_to_cpuidleattr(attr);
+ struct cpuidle_device *dev = to_cpuidle_device(kobj);
+ struct cpuidle_attr *cattr = attr_to_cpuidleattr(attr);
if (cattr->show) {
mutex_lock(&cpuidle_lock);
@@ -182,12 +200,12 @@ static ssize_t cpuidle_show(struct kobject * kobj, struct attribute * attr ,char
return ret;
}
-static ssize_t cpuidle_store(struct kobject * kobj, struct attribute * attr,
- const char * buf, size_t count)
+static ssize_t cpuidle_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t count)
{
int ret = -EIO;
- struct cpuidle_device *dev = kobj_to_cpuidledev(kobj);
- struct cpuidle_attr * cattr = attr_to_cpuidleattr(attr);
+ struct cpuidle_device *dev = to_cpuidle_device(kobj);
+ struct cpuidle_attr *cattr = attr_to_cpuidleattr(attr);
if (cattr->store) {
mutex_lock(&cpuidle_lock);
@@ -204,9 +222,10 @@ static const struct sysfs_ops cpuidle_sysfs_ops = {
static void cpuidle_sysfs_release(struct kobject *kobj)
{
- struct cpuidle_device *dev = kobj_to_cpuidledev(kobj);
+ struct cpuidle_device_kobj *kdev =
+ container_of(kobj, struct cpuidle_device_kobj, kobj);
- complete(&dev->kobj_unregister);
+ complete(&kdev->kobj_unregister);
}
static struct kobj_type ktype_cpuidle = {
@@ -237,8 +256,8 @@ static ssize_t show_state_##_name(struct cpuidle_state *state, \
#define define_store_state_ull_function(_name) \
static ssize_t store_state_##_name(struct cpuidle_state *state, \
- struct cpuidle_state_usage *state_usage, \
- const char *buf, size_t size) \
+ struct cpuidle_state_usage *state_usage, \
+ const char *buf, size_t size) \
{ \
unsigned long long value; \
int err; \
@@ -256,14 +275,16 @@ static ssize_t store_state_##_name(struct cpuidle_state *state, \
#define define_show_state_ull_function(_name) \
static ssize_t show_state_##_name(struct cpuidle_state *state, \
- struct cpuidle_state_usage *state_usage, char *buf) \
+ struct cpuidle_state_usage *state_usage, \
+ char *buf) \
{ \
return sprintf(buf, "%llu\n", state_usage->_name);\
}
#define define_show_state_str_function(_name) \
static ssize_t show_state_##_name(struct cpuidle_state *state, \
- struct cpuidle_state_usage *state_usage, char *buf) \
+ struct cpuidle_state_usage *state_usage, \
+ char *buf) \
{ \
if (state->_name[0] == '\0')\
return sprintf(buf, "<null>\n");\
@@ -309,8 +330,9 @@ struct cpuidle_state_kobj {
#define kobj_to_state(k) (kobj_to_state_obj(k)->state)
#define kobj_to_state_usage(k) (kobj_to_state_obj(k)->state_usage)
#define attr_to_stateattr(a) container_of(a, struct cpuidle_state_attr, attr)
-static ssize_t cpuidle_state_show(struct kobject * kobj,
- struct attribute * attr ,char * buf)
+
+static ssize_t cpuidle_state_show(struct kobject *kobj, struct attribute *attr,
+ char * buf)
{
int ret = -EIO;
struct cpuidle_state *state = kobj_to_state(kobj);
@@ -323,8 +345,8 @@ static ssize_t cpuidle_state_show(struct kobject * kobj,
return ret;
}
-static ssize_t cpuidle_state_store(struct kobject *kobj,
- struct attribute *attr, const char *buf, size_t size)
+static ssize_t cpuidle_state_store(struct kobject *kobj, struct attribute *attr,
+ const char *buf, size_t size)
{
int ret = -EIO;
struct cpuidle_state *state = kobj_to_state(kobj);
@@ -371,6 +393,7 @@ static int cpuidle_add_state_sysfs(struct cpuidle_device *device)
{
int i, ret = -ENOMEM;
struct cpuidle_state_kobj *kobj;
+ struct cpuidle_device_kobj *kdev = device->kobj_dev;
struct cpuidle_driver *drv = cpuidle_get_cpu_driver(device);
/* state statistics */
@@ -383,7 +406,7 @@ static int cpuidle_add_state_sysfs(struct cpuidle_device *device)
init_completion(&kobj->kobj_unregister);
ret = kobject_init_and_add(&kobj->kobj, &ktype_state_cpuidle,
- &device->kobj, "state%d", i);
+ &kdev->kobj, "state%d", i);
if (ret) {
kfree(kobj);
goto error_state;
@@ -449,8 +472,8 @@ static void cpuidle_driver_sysfs_release(struct kobject *kobj)
complete(&driver_kobj->kobj_unregister);
}
-static ssize_t cpuidle_driver_show(struct kobject *kobj, struct attribute * attr,
- char * buf)
+static ssize_t cpuidle_driver_show(struct kobject *kobj, struct attribute *attr,
+ char *buf)
{
int ret = -EIO;
struct cpuidle_driver_kobj *driver_kobj = kobj_to_driver_kobj(kobj);
@@ -500,6 +523,7 @@ static struct kobj_type ktype_driver_cpuidle = {
static int cpuidle_add_driver_sysfs(struct cpuidle_device *dev)
{
struct cpuidle_driver_kobj *kdrv;
+ struct cpuidle_device_kobj *kdev = dev->kobj_dev;
struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
int ret;
@@ -511,7 +535,7 @@ static int cpuidle_add_driver_sysfs(struct cpuidle_device *dev)
init_completion(&kdrv->kobj_unregister);
ret = kobject_init_and_add(&kdrv->kobj, &ktype_driver_cpuidle,
- &dev->kobj, "driver");
+ &kdev->kobj, "driver");
if (ret) {
kfree(kdrv);
return ret;
@@ -580,16 +604,28 @@ void cpuidle_remove_device_sysfs(struct cpuidle_device *device)
*/
int cpuidle_add_sysfs(struct cpuidle_device *dev)
{
+ struct cpuidle_device_kobj *kdev;
struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu);
int error;
- init_completion(&dev->kobj_unregister);
+ kdev = kzalloc(sizeof(*kdev), GFP_KERNEL);
+ if (!kdev)
+ return -ENOMEM;
+ kdev->dev = dev;
+ dev->kobj_dev = kdev;
- error = kobject_init_and_add(&dev->kobj, &ktype_cpuidle, &cpu_dev->kobj,
- "cpuidle");
- if (!error)
- kobject_uevent(&dev->kobj, KOBJ_ADD);
- return error;
+ init_completion(&kdev->kobj_unregister);
+
+ error = kobject_init_and_add(&kdev->kobj, &ktype_cpuidle, &cpu_dev->kobj,
+ "cpuidle");
+ if (error) {
+ kfree(kdev);
+ return error;
+ }
+
+ kobject_uevent(&kdev->kobj, KOBJ_ADD);
+
+ return 0;
}
/**
@@ -598,6 +634,9 @@ int cpuidle_add_sysfs(struct cpuidle_device *dev)
*/
void cpuidle_remove_sysfs(struct cpuidle_device *dev)
{
- kobject_put(&dev->kobj);
- wait_for_completion(&dev->kobj_unregister);
+ struct cpuidle_device_kobj *kdev = dev->kobj_dev;
+
+ kobject_put(&kdev->kobj);
+ wait_for_completion(&kdev->kobj_unregister);
+ kfree(kdev);
}
OpenPOWER on IntegriCloud