11 files changed, 467 insertions, 279 deletions
diff --git a/drivers/cpuidle/Kconfig b/drivers/cpuidle/Kconfig
index 0e2cd5c..b3fb81d 100644
--- a/drivers/cpuidle/Kconfig
+++ b/drivers/cpuidle/Kconfig
@@ -1,5 +1,6 @@
+menu "CPU Idle"
 
-menuconfig CPU_IDLE
+config CPU_IDLE
 	bool "CPU idle PM support"
 	default y if ACPI || PPC_PSERIES
 	select CPU_IDLE_GOV_LADDER if (!NO_HZ && !NO_HZ_IDLE)
@@ -29,20 +30,13 @@ config CPU_IDLE_GOV_MENU
 	bool "Menu governor (for tickless system)"
 	default y
 
-config CPU_IDLE_CALXEDA
-	bool "CPU Idle Driver for Calxeda processors"
-	depends on ARCH_HIGHBANK
-	select ARM_CPU_SUSPEND
-	help
-	  Select this to enable cpuidle on Calxeda processors.
-
-config CPU_IDLE_ZYNQ
-	bool "CPU Idle Driver for Xilinx Zynq processors"
-	depends on ARCH_ZYNQ
-	help
-	  Select this to enable cpuidle on Xilinx Zynq processors.
+menu "ARM CPU Idle Drivers"
+depends on ARM
+source "drivers/cpuidle/Kconfig.arm"
+endmenu
 
 endif
 
 config ARCH_NEEDS_CPU_IDLE_COUPLED
 	def_bool n
+endmenu
diff --git a/drivers/cpuidle/Kconfig.arm b/drivers/cpuidle/Kconfig.arm
new file mode 100644
index 0000000..b3302193
--- /dev/null
+++ b/drivers/cpuidle/Kconfig.arm
@@ -0,0 +1,29 @@
+#
+# ARM CPU Idle drivers
+#
+
+config ARM_HIGHBANK_CPUIDLE
+	bool "CPU Idle Driver for Calxeda processors"
+	depends on ARCH_HIGHBANK
+	select ARM_CPU_SUSPEND
+	help
+	  Select this to enable cpuidle on Calxeda processors.
+
+config ARM_KIRKWOOD_CPUIDLE
+	bool "CPU Idle Driver for Marvell Kirkwood SoCs"
+	depends on ARCH_KIRKWOOD
+	help
+	  This adds the CPU Idle driver for Marvell Kirkwood SoCs.
+
+config ARM_ZYNQ_CPUIDLE
+	bool "CPU Idle Driver for Xilinx Zynq processors"
+	depends on ARCH_ZYNQ
+	help
+	  Select this to enable cpuidle on Xilinx Zynq processors.
+
+config ARM_U8500_CPUIDLE
+	bool "Cpu Idle Driver for the ST-E u8500 processors"
+	depends on ARCH_U8500
+	help
+	  Select this to enable cpuidle for ST-E u8500 processors
+
diff --git a/drivers/cpuidle/Makefile b/drivers/cpuidle/Makefile
index 8767a7b..0b9d200 100644
--- a/drivers/cpuidle/Makefile
+++ b/drivers/cpuidle/Makefile
@@ -5,6 +5,9 @@
 obj-y += cpuidle.o driver.o governor.o sysfs.o governors/
 obj-$(CONFIG_ARCH_NEEDS_CPU_IDLE_COUPLED) += coupled.o
 
-obj-$(CONFIG_CPU_IDLE_CALXEDA) += cpuidle-calxeda.o
-obj-$(CONFIG_ARCH_KIRKWOOD) += cpuidle-kirkwood.o
-obj-$(CONFIG_CPU_IDLE_ZYNQ) += cpuidle-zynq.o
+##################################################################################
+# ARM SoC drivers
+obj-$(CONFIG_ARM_HIGHBANK_CPUIDLE)	+= cpuidle-calxeda.o
+obj-$(CONFIG_ARM_KIRKWOOD_CPUIDLE)	+= cpuidle-kirkwood.o
+obj-$(CONFIG_ARM_ZYNQ_CPUIDLE)		+= cpuidle-zynq.o
+obj-$(CONFIG_ARM_U8500_CPUIDLE)         += cpuidle-ux500.o
diff --git a/drivers/cpuidle/coupled.c b/drivers/cpuidle/coupled.c
index 2a297f8..f8a8636 100644
--- a/drivers/cpuidle/coupled.c
+++ b/drivers/cpuidle/coupled.c
@@ -106,6 +106,7 @@ struct cpuidle_coupled {
 	cpumask_t coupled_cpus;
 	int requested_state[NR_CPUS];
 	atomic_t ready_waiting_counts;
+	atomic_t abort_barrier;
 	int online_count;
 	int refcnt;
 	int prevent;
@@ -122,12 +123,19 @@ static DEFINE_MUTEX(cpuidle_coupled_lock);
 static DEFINE_PER_CPU(struct call_single_data, cpuidle_coupled_poke_cb);
 
 /*
- * The cpuidle_coupled_poked_mask mask is used to avoid calling
+ * The cpuidle_coupled_poke_pending mask is used to avoid calling
  * __smp_call_function_single with the per cpu call_single_data struct already
  * in use.  This prevents a deadlock where two cpus are waiting for each others
  * call_single_data struct to be available
  */
-static cpumask_t cpuidle_coupled_poked_mask;
+static cpumask_t cpuidle_coupled_poke_pending;
+
+/*
+ * The cpuidle_coupled_poked mask is used to ensure that each cpu has been poked
+ * once to minimize entering the ready loop with a poke pending, which would
+ * require aborting and retrying.
+ */
+static cpumask_t cpuidle_coupled_poked;
 
 /**
  * cpuidle_coupled_parallel_barrier - synchronize all online coupled cpus
@@ -291,10 +299,11 @@ static inline int cpuidle_coupled_get_state(struct cpuidle_device *dev,
 	return state;
 }
 
-static void cpuidle_coupled_poked(void *info)
+static void cpuidle_coupled_handle_poke(void *info)
 {
 	int cpu = (unsigned long)info;
-	cpumask_clear_cpu(cpu, &cpuidle_coupled_poked_mask);
+	cpumask_set_cpu(cpu, &cpuidle_coupled_poked);
+	cpumask_clear_cpu(cpu, &cpuidle_coupled_poke_pending);
 }
 
 /**
@@ -313,7 +322,7 @@ static void cpuidle_coupled_poke(int cpu)
 {
 	struct call_single_data *csd = &per_cpu(cpuidle_coupled_poke_cb, cpu);
 
-	if (!cpumask_test_and_set_cpu(cpu, &cpuidle_coupled_poked_mask))
+	if (!cpumask_test_and_set_cpu(cpu, &cpuidle_coupled_poke_pending))
 		__smp_call_function_single(cpu, csd, 0);
 }
 
@@ -340,30 +349,19 @@ static void cpuidle_coupled_poke_others(int this_cpu,
  * @coupled: the struct coupled that contains the current cpu
  * @next_state: the index in drv->states of the requested state for this cpu
  *
- * Updates the requested idle state for the specified cpuidle device,
- * poking all coupled cpus out of idle if necessary to let them see the new
- * state.
+ * Updates the requested idle state for the specified cpuidle device.
+ * Returns the number of waiting cpus.
  */
-static void cpuidle_coupled_set_waiting(int cpu,
+static int cpuidle_coupled_set_waiting(int cpu,
 		struct cpuidle_coupled *coupled, int next_state)
 {
-	int w;
-
 	coupled->requested_state[cpu] = next_state;
 
 	/*
-	 * If this is the last cpu to enter the waiting state, poke
-	 * all the other cpus out of their waiting state so they can
-	 * enter a deeper state.  This can race with one of the cpus
-	 * exiting the waiting state due to an interrupt and
-	 * decrementing waiting_count, see comment below.
-	 *
 	 * The atomic_inc_return provides a write barrier to order the write
 	 * to requested_state with the later write that increments ready_count.
 	 */
-	w = atomic_inc_return(&coupled->ready_waiting_counts) & WAITING_MASK;
-	if (w == coupled->online_count)
-		cpuidle_coupled_poke_others(cpu, coupled);
+	return atomic_inc_return(&coupled->ready_waiting_counts) & WAITING_MASK;
 }
 
 /**
@@ -410,19 +408,33 @@ static void cpuidle_coupled_set_done(int cpu, struct cpuidle_coupled *coupled)
  * been processed and the poke bit has been cleared.
  *
  * Other interrupts may also be processed while interrupts are enabled, so
- * need_resched() must be tested after turning interrupts off again to make sure
+ * need_resched() must be tested after this function returns to make sure
  * the interrupt didn't schedule work that should take the cpu out of idle.
  *
- * Returns 0 if need_resched was false, -EINTR if need_resched was true.
+ * Returns 0 if no poke was pending, 1 if a poke was cleared.
  */
 static int cpuidle_coupled_clear_pokes(int cpu)
 {
+	if (!cpumask_test_cpu(cpu, &cpuidle_coupled_poke_pending))
+		return 0;
+
 	local_irq_enable();
-	while (cpumask_test_cpu(cpu, &cpuidle_coupled_poked_mask))
+	while (cpumask_test_cpu(cpu, &cpuidle_coupled_poke_pending))
 		cpu_relax();
 	local_irq_disable();
 
-	return need_resched() ? -EINTR : 0;
+	return 1;
+}
+
+static bool cpuidle_coupled_any_pokes_pending(struct cpuidle_coupled *coupled)
+{
+	cpumask_t cpus;
+	int ret;
+
+	cpumask_and(&cpus, cpu_online_mask, &coupled->coupled_cpus);
+	ret = cpumask_and(&cpus, &cpuidle_coupled_poke_pending, &cpus);
+
+	return ret;
 }
 
 /**
@@ -449,31 +461,56 @@ int cpuidle_enter_state_coupled(struct cpuidle_device *dev,
 {
 	int entered_state = -1;
 	struct cpuidle_coupled *coupled = dev->coupled;
+	int w;
 
 	if (!coupled)
 		return -EINVAL;
 
 	while (coupled->prevent) {
-		if (cpuidle_coupled_clear_pokes(dev->cpu)) {
+		cpuidle_coupled_clear_pokes(dev->cpu);
+		if (need_resched()) {
 			local_irq_enable();
 			return entered_state;
 		}
 		entered_state = cpuidle_enter_state(dev, drv,
 			dev->safe_state_index);
+		local_irq_disable();
 	}
 
 	/* Read barrier ensures online_count is read after prevent is cleared */
 	smp_rmb();
 
-	cpuidle_coupled_set_waiting(dev->cpu, coupled, next_state);
+reset:
+	cpumask_clear_cpu(dev->cpu, &cpuidle_coupled_poked);
+
+	w = cpuidle_coupled_set_waiting(dev->cpu, coupled, next_state);
+	/*
+	 * If this is the last cpu to enter the waiting state, poke
+	 * all the other cpus out of their waiting state so they can
+	 * enter a deeper state.  This can race with one of the cpus
+	 * exiting the waiting state due to an interrupt and
+	 * decrementing waiting_count, see comment below.
+	 */
+	if (w == coupled->online_count) {
+		cpumask_set_cpu(dev->cpu, &cpuidle_coupled_poked);
+		cpuidle_coupled_poke_others(dev->cpu, coupled);
+	}
 
 retry:
 	/*
 	 * Wait for all coupled cpus to be idle, using the deepest state
-	 * allowed for a single cpu.
+	 * allowed for a single cpu.  If this was not the poking cpu, wait
+	 * for at least one poke before leaving to avoid a race where
+	 * two cpus could arrive at the waiting loop at the same time,
+	 * but the first of the two to arrive could skip the loop without
+	 * processing the pokes from the last to arrive.
 	 */
-	while (!cpuidle_coupled_cpus_waiting(coupled)) {
-		if (cpuidle_coupled_clear_pokes(dev->cpu)) {
+	while (!cpuidle_coupled_cpus_waiting(coupled) ||
+			!cpumask_test_cpu(dev->cpu, &cpuidle_coupled_poked)) {
+		if (cpuidle_coupled_clear_pokes(dev->cpu))
+			continue;
+
+		if (need_resched()) {
 			cpuidle_coupled_set_not_waiting(dev->cpu, coupled);
 			goto out;
 		}
@@ -485,14 +522,22 @@ retry:
 
 		entered_state = cpuidle_enter_state(dev, drv,
 			dev->safe_state_index);
+		local_irq_disable();
 	}
 
-	if (cpuidle_coupled_clear_pokes(dev->cpu)) {
+	cpuidle_coupled_clear_pokes(dev->cpu);
+	if (need_resched()) {
 		cpuidle_coupled_set_not_waiting(dev->cpu, coupled);
 		goto out;
 	}
 
 	/*
+	 * Make sure final poke status for this cpu is visible before setting
+	 * cpu as ready.
+	 */
+	smp_wmb();
+
+	/*
 	 * All coupled cpus are probably idle.  There is a small chance that
 	 * one of the other cpus just became active.  Increment the ready count,
 	 * and spin until all coupled cpus have incremented the counter. Once a
@@ -511,6 +556,28 @@ retry:
 		cpu_relax();
 	}
 
+	/*
+	 * Make sure read of all cpus ready is done before reading pending pokes
+	 */
+	smp_rmb();
+
+	/*
+	 * There is a small chance that a cpu left and reentered idle after this
+	 * cpu saw that all cpus were waiting.  The cpu that reentered idle will
+	 * have sent this cpu a poke, which will still be pending after the
+	 * ready loop.  The pending interrupt may be lost by the interrupt
+	 * controller when entering the deep idle state.  It's not possible to
+	 * clear a pending interrupt without turning interrupts on and handling
+	 * it, and it's too late to turn on interrupts here, so reset the
+	 * coupled idle state of all cpus and retry.
+	 */
+	if (cpuidle_coupled_any_pokes_pending(coupled)) {
+		cpuidle_coupled_set_done(dev->cpu, coupled);
+		/* Wait for all cpus to see the pending pokes */
+		cpuidle_coupled_parallel_barrier(dev, &coupled->abort_barrier);
+		goto reset;
+	}
+
 	/* all cpus have acked the coupled state */
 	next_state = cpuidle_coupled_get_state(dev, coupled);
 
@@ -596,7 +663,7 @@ have_coupled:
 	coupled->refcnt++;
 
 	csd = &per_cpu(cpuidle_coupled_poke_cb, dev->cpu);
-	csd->func = cpuidle_coupled_poked;
+	csd->func = cpuidle_coupled_handle_poke;
 	csd->info = (void *)(unsigned long)dev->cpu;
 
 	return 0;
diff --git a/drivers/cpuidle/cpuidle-calxeda.c b/drivers/cpuidle/cpuidle-calxeda.c
index 0e6e408..3460584 100644
--- a/drivers/cpuidle/cpuidle-calxeda.c
+++ b/drivers/cpuidle/cpuidle-calxeda.c
@@ -35,7 +35,7 @@
 #include <asm/cp15.h>
 
 extern void highbank_set_cpu_jump(int cpu, void *jump_addr);
-extern void *scu_base_addr;
+extern void __iomem *scu_base_addr;
 
 static noinline void calxeda_idle_restore(void)
 {
diff --git a/drivers/cpuidle/cpuidle-kirkwood.c b/drivers/cpuidle/cpuidle-kirkwood.c
index 521b0a7..41ba843 100644
--- a/drivers/cpuidle/cpuidle-kirkwood.c
+++ b/drivers/cpuidle/cpuidle-kirkwood.c
@@ -60,9 +60,6 @@ static int kirkwood_cpuidle_probe(struct platform_device *pdev)
 	struct resource *res;
 
 	res = platform_get_resource(pdev, IORESOURCE_MEM, 0);
-	if (res == NULL)
-		return -EINVAL;
-
 	ddr_operation_base = devm_ioremap_resource(&pdev->dev, res);
 	if (IS_ERR(ddr_operation_base))
 		return PTR_ERR(ddr_operation_base);
@@ -70,7 +67,7 @@ static int kirkwood_cpuidle_probe(struct platform_device *pdev)
 	return cpuidle_register(&kirkwood_idle_driver, NULL);
 }
 
-int kirkwood_cpuidle_remove(struct platform_device *pdev)
+static int kirkwood_cpuidle_remove(struct platform_device *pdev)
 {
 	cpuidle_unregister(&kirkwood_idle_driver);
 	return 0;
diff --git a/drivers/cpuidle/cpuidle-ux500.c b/drivers/cpuidle/cpuidle-ux500.c
new file mode 100644
index 0000000..e056465
--- /dev/null
+++ b/drivers/cpuidle/cpuidle-ux500.c
@@ -0,0 +1,131 @@
+/*
+ * Copyright (c) 2012 Linaro : Daniel Lezcano <daniel.lezcano@linaro.org> (IBM)
+ *
+ * Based on the work of Rickard Andersson <rickard.andersson@stericsson.com>
+ * and Jonas Aaberg <jonas.aberg@stericsson.com>.
+ *
+ * This program is free software; you can redistribute it and/or modify
+ * it under the terms of the GNU General Public License version 2 as
+ * published by the Free Software Foundation.
+ */
+
+#include <linux/module.h>
+#include <linux/cpuidle.h>
+#include <linux/spinlock.h>
+#include <linux/atomic.h>
+#include <linux/smp.h>
+#include <linux/mfd/dbx500-prcmu.h>
+#include <linux/platform_data/arm-ux500-pm.h>
+#include <linux/platform_device.h>
+
+#include <asm/cpuidle.h>
+#include <asm/proc-fns.h>
+
+static atomic_t master = ATOMIC_INIT(0);
+static DEFINE_SPINLOCK(master_lock);
+
+static inline int ux500_enter_idle(struct cpuidle_device *dev,
+				   struct cpuidle_driver *drv, int index)
+{
+	int this_cpu = smp_processor_id();
+	bool recouple = false;
+
+	if (atomic_inc_return(&master) == num_online_cpus()) {
+
+		/* With this lock, we prevent the other cpu to exit and enter
+		 * this function again and become the master */
+		if (!spin_trylock(&master_lock))
+			goto wfi;
+
+		/* decouple the gic from the A9 cores */
+		if (prcmu_gic_decouple()) {
+			spin_unlock(&master_lock);
+			goto out;
+		}
+
+		/* If an error occur, we will have to recouple the gic
+		 * manually */
+		recouple = true;
+
+		/* At this state, as the gic is decoupled, if the other
+		 * cpu is in WFI, we have the guarantee it won't be wake
+		 * up, so we can safely go to retention */
+		if (!prcmu_is_cpu_in_wfi(this_cpu ? 0 : 1))
+			goto out;
+
+		/* The prcmu will be in charge of watching the interrupts
+		 * and wake up the cpus */
+		if (prcmu_copy_gic_settings())
+			goto out;
+
+		/* Check in the meantime an interrupt did
+		 * not occur on the gic ... */
+		if (prcmu_gic_pending_irq())
+			goto out;
+
+		/* ... and the prcmu */
+		if (prcmu_pending_irq())
+			goto out;
+
+		/* Go to the retention state, the prcmu will wait for the
+		 * cpu to go WFI and this is what happens after exiting this
+		 * 'master' critical section */
+		if (prcmu_set_power_state(PRCMU_AP_IDLE, true, true))
+			goto out;
+
+		/* When we switch to retention, the prcmu is in charge
+		 * of recoupling the gic automatically */
+		recouple = false;
+
+		spin_unlock(&master_lock);
+	}
+wfi:
+	cpu_do_idle();
+out:
+	atomic_dec(&master);
+
+	if (recouple) {
+		prcmu_gic_recouple();
+		spin_unlock(&master_lock);
+	}
+
+	return index;
+}
+
+static struct cpuidle_driver ux500_idle_driver = {
+	.name = "ux500_idle",
+	.owner = THIS_MODULE,
+	.states = {
+		ARM_CPUIDLE_WFI_STATE,
+		{
+			.enter		  = ux500_enter_idle,
+			.exit_latency	  = 70,
+			.target_residency = 260,
+			.flags		  = CPUIDLE_FLAG_TIME_VALID |
+			                    CPUIDLE_FLAG_TIMER_STOP,
+			.name		  = "ApIdle",
+			.desc		  = "ARM Retention",
+		},
+	},
+	.safe_state_index = 0,
+	.state_count = 2,
+};
+
+static int __init dbx500_cpuidle_probe(struct platform_device *pdev)
+{
+	/* Configure wake up reasons */
+	prcmu_enable_wakeups(PRCMU_WAKEUP(ARM) | PRCMU_WAKEUP(RTC) |
+			     PRCMU_WAKEUP(ABB));
+
+	return cpuidle_register(&ux500_idle_driver, NULL);
+}
+
+static struct platform_driver dbx500_cpuidle_plat_driver = {
+	.driver = {
+		.name = "cpuidle-dbx500",
+		.owner = THIS_MODULE,
+	},
+	.probe = dbx500_cpuidle_probe,
+};
+
+module_platform_driver(dbx500_cpuidle_plat_driver);
diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c
index fdc432f..d75040d 100644
--- a/drivers/cpuidle/cpuidle.c
+++ b/drivers/cpuidle/cpuidle.c
@@ -42,8 +42,6 @@ void disable_cpuidle(void)
 	off = 1;
 }
 
-static int __cpuidle_register_device(struct cpuidle_device *dev);
-
 /**
  * cpuidle_play_dead - cpu off-lining
  *
@@ -278,7 +276,7 @@ static void poll_idle_init(struct cpuidle_driver *drv) {}
  */
 int cpuidle_enable_device(struct cpuidle_device *dev)
 {
-	int ret, i;
+	int ret;
 	struct cpuidle_driver *drv;
 
 	if (!dev)
@@ -292,15 +290,12 @@ int cpuidle_enable_device(struct cpuidle_device *dev)
 	if (!drv || !cpuidle_curr_governor)
 		return -EIO;
 
+	if (!dev->registered)
+		return -EINVAL;
+
 	if (!dev->state_count)
 		dev->state_count = drv->state_count;
 
-	if (dev->registered == 0) {
-		ret = __cpuidle_register_device(dev);
-		if (ret)
-			return ret;
-	}
-
 	poll_idle_init(drv);
 
 	ret = cpuidle_add_device_sysfs(dev);
@@ -311,12 +306,6 @@ int cpuidle_enable_device(struct cpuidle_device *dev)
 	    (ret = cpuidle_curr_governor->enable(drv, dev)))
 		goto fail_sysfs;
 
-	for (i = 0; i < dev->state_count; i++) {
-		dev->states_usage[i].usage = 0;
-		dev->states_usage[i].time = 0;
-	}
-	dev->last_residency = 0;
-
 	smp_wmb();
 
 	dev->enabled = 1;
@@ -360,6 +349,23 @@ void cpuidle_disable_device(struct cpuidle_device *dev)
 
 EXPORT_SYMBOL_GPL(cpuidle_disable_device);
 
+static void __cpuidle_unregister_device(struct cpuidle_device *dev)
+{
+	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
+
+	list_del(&dev->device_list);
+	per_cpu(cpuidle_devices, dev->cpu) = NULL;
+	module_put(drv->owner);
+}
+
+static int __cpuidle_device_init(struct cpuidle_device *dev)
+{
+	memset(dev->states_usage, 0, sizeof(dev->states_usage));
+	dev->last_residency = 0;
+
+	return 0;
+}
+
 /**
  * __cpuidle_register_device - internal register function called before register
  * and enable routines
@@ -377,24 +383,15 @@ static int __cpuidle_register_device(struct cpuidle_device *dev)
 
 	per_cpu(cpuidle_devices, dev->cpu) = dev;
 	list_add(&dev->device_list, &cpuidle_detected_devices);
-	ret = cpuidle_add_sysfs(dev);
-	if (ret)
-		goto err_sysfs;
 
 	ret = cpuidle_coupled_register_device(dev);
-	if (ret)
-		goto err_coupled;
+	if (ret) {
+		__cpuidle_unregister_device(dev);
+		return ret;
+	}
 
 	dev->registered = 1;
 	return 0;
-
-err_coupled:
-	cpuidle_remove_sysfs(dev);
-err_sysfs:
-	list_del(&dev->device_list);
-	per_cpu(cpuidle_devices, dev->cpu) = NULL;
-	module_put(drv->owner);
-	return ret;
 }
 
 /**
@@ -403,25 +400,44 @@ err_sysfs:
  */
 int cpuidle_register_device(struct cpuidle_device *dev)
 {
-	int ret;
+	int ret = -EBUSY;
 
 	if (!dev)
 		return -EINVAL;
 
 	mutex_lock(&cpuidle_lock);
 
-	if ((ret = __cpuidle_register_device(dev))) {
-		mutex_unlock(&cpuidle_lock);
-		return ret;
-	}
+	if (dev->registered)
+		goto out_unlock;
+
+	ret = __cpuidle_device_init(dev);
+	if (ret)
+		goto out_unlock;
+
+	ret = __cpuidle_register_device(dev);
+	if (ret)
+		goto out_unlock;
+
+	ret = cpuidle_add_sysfs(dev);
+	if (ret)
+		goto out_unregister;
+
+	ret = cpuidle_enable_device(dev);
+	if (ret)
+		goto out_sysfs;
 
-	cpuidle_enable_device(dev);
 	cpuidle_install_idle_handler();
 
+out_unlock:
 	mutex_unlock(&cpuidle_lock);
 
-	return 0;
+	return ret;
 
+out_sysfs:
+	cpuidle_remove_sysfs(dev);
+out_unregister:
+	__cpuidle_unregister_device(dev);
+	goto out_unlock;
 }
 
 EXPORT_SYMBOL_GPL(cpuidle_register_device);
@@ -432,8 +448,6 @@ EXPORT_SYMBOL_GPL(cpuidle_register_device);
  */
 void cpuidle_unregister_device(struct cpuidle_device *dev)
 {
-	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
-
 	if (dev->registered == 0)
 		return;
 
@@ -442,14 +456,12 @@ void cpuidle_unregister_device(struct cpuidle_device *dev)
 	cpuidle_disable_device(dev);
 
 	cpuidle_remove_sysfs(dev);
-	list_del(&dev->device_list);
-	per_cpu(cpuidle_devices, dev->cpu) = NULL;
+
+	__cpuidle_unregister_device(dev);
 
 	cpuidle_coupled_unregister_device(dev);
 
 	cpuidle_resume_and_unlock();
-
-	module_put(drv->owner);
 }
 
 EXPORT_SYMBOL_GPL(cpuidle_unregister_device);
diff --git a/drivers/cpuidle/governors/ladder.c b/drivers/cpuidle/governors/ladder.c
index 9b78405..9f08e8c 100644
--- a/drivers/cpuidle/governors/ladder.c
+++ b/drivers/cpuidle/governors/ladder.c
@@ -192,14 +192,4 @@ static int __init init_ladder(void)
 	return cpuidle_register_governor(&ladder_governor);
 }
 
-/**
- * exit_ladder - exits the governor
- */
-static void __exit exit_ladder(void)
-{
-	cpuidle_unregister_governor(&ladder_governor);
-}
-
-MODULE_LICENSE("GPL");
-module_init(init_ladder);
-module_exit(exit_ladder);
+postcore_initcall(init_ladder);
diff --git a/drivers/cpuidle/governors/menu.c b/drivers/cpuidle/governors/menu.c
index fe343a0..cf7f2f0 100644
--- a/drivers/cpuidle/governors/menu.c
+++ b/drivers/cpuidle/governors/menu.c
@@ -21,6 +21,15 @@
 #include <linux/math64.h>
 #include <linux/module.h>
 
+/*
+ * Please note when changing the tuning values:
+ * If (MAX_INTERESTING-1) * RESOLUTION > UINT_MAX, the result of
+ * a scaling operation multiplication may overflow on 32 bit platforms.
+ * In that case, #define RESOLUTION as ULL to get 64 bit result:
+ * #define RESOLUTION 1024ULL
+ *
+ * The default values do not overflow.
+ */
 #define BUCKETS 12
 #define INTERVALS 8
 #define RESOLUTION 1024
@@ -28,13 +37,6 @@
 #define MAX_INTERESTING 50000
 #define STDDEV_THRESH 400
 
-/* 60 * 60 > STDDEV_THRESH * INTERVALS = 400 * 8 */
-#define MAX_DEVIATION 60
-
-static DEFINE_PER_CPU(struct hrtimer, menu_hrtimer);
-static DEFINE_PER_CPU(int, hrtimer_status);
-/* menu hrtimer mode */
-enum {MENU_HRTIMER_STOP, MENU_HRTIMER_REPEAT, MENU_HRTIMER_GENERAL};
 
 /*
  * Concepts and ideas behind the menu governor
@@ -116,23 +118,16 @@ enum {MENU_HRTIMER_STOP, MENU_HRTIMER_REPEAT, MENU_HRTIMER_GENERAL};
  *
  */
 
-/*
- * The C-state residency is so long that is is worthwhile to exit
- * from the shallow C-state and re-enter into a deeper C-state.
- */
-static unsigned int perfect_cstate_ms __read_mostly = 30;
-module_param(perfect_cstate_ms, uint, 0000);
-
 struct menu_device {
 	int		last_state_idx;
 	int             needs_update;
 
 	unsigned int	expected_us;
-	u64		predicted_us;
+	unsigned int	predicted_us;
 	unsigned int	exit_us;
 	unsigned int	bucket;
-	u64		correction_factor[BUCKETS];
-	u32		intervals[INTERVALS];
+	unsigned int	correction_factor[BUCKETS];
+	unsigned int	intervals[INTERVALS];
 	int		interval_ptr;
 };
 
@@ -205,59 +200,28 @@ static u64 div_round64(u64 dividend, u32 divisor)
 	return div_u64(dividend + (divisor / 2), divisor);
 }
 
-/* Cancel the hrtimer if it is not triggered yet */
-void menu_hrtimer_cancel(void)
-{
-	int cpu = smp_processor_id();
-	struct hrtimer *hrtmr = &per_cpu(menu_hrtimer, cpu);
-
-	/* The timer is still not time out*/
-	if (per_cpu(hrtimer_status, cpu)) {
-		hrtimer_cancel(hrtmr);
-		per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_STOP;
-	}
-}
-EXPORT_SYMBOL_GPL(menu_hrtimer_cancel);
-
-/* Call back for hrtimer is triggered */
-static enum hrtimer_restart menu_hrtimer_notify(struct hrtimer *hrtimer)
-{
-	int cpu = smp_processor_id();
-	struct menu_device *data = &per_cpu(menu_devices, cpu);
-
-	/* In general case, the expected residency is much larger than
-	 *  deepest C-state target residency, but prediction logic still
-	 *  predicts a small predicted residency, so the prediction
-	 *  history is totally broken if the timer is triggered.
-	 *  So reset the correction factor.
-	 */
-	if (per_cpu(hrtimer_status, cpu) == MENU_HRTIMER_GENERAL)
-		data->correction_factor[data->bucket] = RESOLUTION * DECAY;
-
-	per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_STOP;
-
-	return HRTIMER_NORESTART;
-}
-
 /*
  * Try detecting repeating patterns by keeping track of the last 8
  * intervals, and checking if the standard deviation of that set
  * of points is below a threshold. If it is... then use the
  * average of these 8 points as the estimated value.
  */
-static u32 get_typical_interval(struct menu_device *data)
+static void get_typical_interval(struct menu_device *data)
 {
-	int i = 0, divisor = 0;
-	uint64_t max = 0, avg = 0, stddev = 0;
-	int64_t thresh = LLONG_MAX; /* Discard outliers above this value. */
-	unsigned int ret = 0;
+	int i, divisor;
+	unsigned int max, thresh;
+	uint64_t avg, stddev;
+
+	thresh = UINT_MAX; /* Discard outliers above this value */
 
 again:
 
-	/* first calculate average and standard deviation of the past */
-	max = avg = divisor = stddev = 0;
+	/* First calculate the average of past intervals */
+	max = 0;
+	avg = 0;
+	divisor = 0;
 	for (i = 0; i < INTERVALS; i++) {
-		int64_t value = data->intervals[i];
+		unsigned int value = data->intervals[i];
 		if (value <= thresh) {
 			avg += value;
 			divisor++;
@@ -267,15 +231,38 @@ again:
 	}
 	do_div(avg, divisor);
 
+	/* Then try to determine standard deviation */
+	stddev = 0;
 	for (i = 0; i < INTERVALS; i++) {
-		int64_t value = data->intervals[i];
+		unsigned int value = data->intervals[i];
 		if (value <= thresh) {
 			int64_t diff = value - avg;
 			stddev += diff * diff;
 		}
 	}
 	do_div(stddev, divisor);
-	stddev = int_sqrt(stddev);
+	/*
+	 * The typical interval is obtained when standard deviation is small
+	 * or standard deviation is small compared to the average interval.
+	 *
+	 * int_sqrt() formal parameter type is unsigned long. When the
+	 * greatest difference to an outlier exceeds ~65 ms * sqrt(divisor)
+	 * the resulting squared standard deviation exceeds the input domain
+	 * of int_sqrt on platforms where unsigned long is 32 bits in size.
+	 * In such case reject the candidate average.
+	 *
+	 * Use this result only if there is no timer to wake us up sooner.
+	 */
+	if (likely(stddev <= ULONG_MAX)) {
+		stddev = int_sqrt(stddev);
+		if (((avg > stddev * 6) && (divisor * 4 >= INTERVALS * 3))
+							|| stddev <= 20) {
+			if (data->expected_us > avg)
+				data->predicted_us = avg;
+			return;
+		}
+	}
+
 	/*
 	 * If we have outliers to the upside in our distribution, discard
 	 * those by setting the threshold to exclude these outliers, then
@@ -284,23 +271,12 @@ again:
 	 *
 	 * This can deal with workloads that have long pauses interspersed
 	 * with sporadic activity with a bunch of short pauses.
-	 *
-	 * The typical interval is obtained when standard deviation is small
-	 * or standard deviation is small compared to the average interval.
 	 */
-	if (((avg > stddev * 6) && (divisor * 4 >= INTERVALS * 3))
-							|| stddev <= 20) {
-		data->predicted_us = avg;
-		ret = 1;
-		return ret;
-
-	} else if ((divisor * 4) > INTERVALS * 3) {
-		/* Exclude the max interval */
-		thresh = max - 1;
-		goto again;
-	}
+	if ((divisor * 4) <= INTERVALS * 3)
+		return;
 
-	return ret;
+	thresh = max - 1;
+	goto again;
 }
 
 /**
@@ -315,9 +291,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	int i;
 	int multiplier;
 	struct timespec t;
-	int repeat = 0, low_predicted = 0;
-	int cpu = smp_processor_id();
-	struct hrtimer *hrtmr = &per_cpu(menu_hrtimer, cpu);
 
 	if (data->needs_update) {
 		menu_update(drv, dev);
@@ -348,11 +321,16 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	if (data->correction_factor[data->bucket] == 0)
 		data->correction_factor[data->bucket] = RESOLUTION * DECAY;
 
-	/* Make sure to round up for half microseconds */
-	data->predicted_us = div_round64(data->expected_us * data->correction_factor[data->bucket],
+	/*
+	 * Force the result of multiplication to be 64 bits even if both
+	 * operands are 32 bits.
+	 * Make sure to round up for half microseconds.
+	 */
+	data->predicted_us = div_round64((uint64_t)data->expected_us *
+					 data->correction_factor[data->bucket],
 					 RESOLUTION * DECAY);
 
-	repeat = get_typical_interval(data);
+	get_typical_interval(data);
 
 	/*
 	 * We want to default to C1 (hlt), not to busy polling
@@ -373,10 +351,8 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 
 		if (s->disabled || su->disable)
 			continue;
-		if (s->target_residency > data->predicted_us) {
-			low_predicted = 1;
+		if (s->target_residency > data->predicted_us)
 			continue;
-		}
 		if (s->exit_latency > latency_req)
 			continue;
 		if (s->exit_latency * multiplier > data->predicted_us)
@@ -386,44 +362,6 @@ static int menu_select(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 		data->exit_us = s->exit_latency;
 	}
 
-	/* not deepest C-state chosen for low predicted residency */
-	if (low_predicted) {
-		unsigned int timer_us = 0;
-		unsigned int perfect_us = 0;
-
-		/*
-		 * Set a timer to detect whether this sleep is much
-		 * longer than repeat mode predicted.  If the timer
-		 * triggers, the code will evaluate whether to put
-		 * the CPU into a deeper C-state.
-		 * The timer is cancelled on CPU wakeup.
-		 */
-		timer_us = 2 * (data->predicted_us + MAX_DEVIATION);
-
-		perfect_us = perfect_cstate_ms * 1000;
-
-		if (repeat && (4 * timer_us < data->expected_us)) {
-			RCU_NONIDLE(hrtimer_start(hrtmr,
-				ns_to_ktime(1000 * timer_us),
-				HRTIMER_MODE_REL_PINNED));
-			/* In repeat case, menu hrtimer is started */
-			per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_REPEAT;
-		} else if (perfect_us < data->expected_us) {
-			/*
-			 * The next timer is long. This could be because
-			 * we did not make a useful prediction.
-			 * In that case, it makes sense to re-enter
-			 * into a deeper C-state after some time.
-			 */
-			RCU_NONIDLE(hrtimer_start(hrtmr,
-				ns_to_ktime(1000 * timer_us),
-				HRTIMER_MODE_REL_PINNED));
-			/* In general case, menu hrtimer is started */
-			per_cpu(hrtimer_status, cpu) = MENU_HRTIMER_GENERAL;
-		}
-
-	}
-
 	return data->last_state_idx;
 }
 
@@ -455,7 +393,7 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 	unsigned int last_idle_us = cpuidle_get_last_residency(dev);
 	struct cpuidle_state *target = &drv->states[last_idx];
 	unsigned int measured_us;
-	u64 new_factor;
+	unsigned int new_factor;
 
 	/*
 	 * Ugh, this idle state doesn't support residency measurements, so we
@@ -476,10 +414,9 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 		measured_us -= data->exit_us;
 
 
-	/* update our correction ratio */
-
-	new_factor = data->correction_factor[data->bucket]
-			* (DECAY - 1) / DECAY;
+	/* Update our correction ratio */
+	new_factor = data->correction_factor[data->bucket];
+	new_factor -= new_factor / DECAY;
 
 	if (data->expected_us > 0 && measured_us < MAX_INTERESTING)
 		new_factor += RESOLUTION * measured_us / data->expected_us;
@@ -492,9 +429,11 @@ static void menu_update(struct cpuidle_driver *drv, struct cpuidle_device *dev)
 
 	/*
 	 * We don't want 0 as factor; we always want at least
-	 * a tiny bit of estimated time.
+	 * a tiny bit of estimated time. Fortunately, due to rounding,
+	 * new_factor will stay nonzero regardless of measured_us values
+	 * and the compiler can eliminate this test as long as DECAY > 1.
 	 */
-	if (new_factor == 0)
+	if (DECAY == 1 && unlikely(new_factor == 0))
 		new_factor = 1;
 
 	data->correction_factor[data->bucket] = new_factor;
@@ -514,9 +453,6 @@ static int menu_enable_device(struct cpuidle_driver *drv,
 				struct cpuidle_device *dev)
 {
 	struct menu_device *data = &per_cpu(menu_devices, dev->cpu);
-	struct hrtimer *t = &per_cpu(menu_hrtimer, dev->cpu);
-	hrtimer_init(t, CLOCK_MONOTONIC, HRTIMER_MODE_REL);
-	t->function = menu_hrtimer_notify;
 
 	memset(data, 0, sizeof(struct menu_device));
 
@@ -540,14 +476,4 @@ static int __init init_menu(void)
 	return cpuidle_register_governor(&menu_governor);
 }
 
-/**
- * exit_menu - exits the governor
- */
-static void __exit exit_menu(void)
-{
-	cpuidle_unregister_governor(&menu_governor);
-}
-
-MODULE_LICENSE("GPL");
-module_init(init_menu);
-module_exit(exit_menu);
+postcore_initcall(init_menu);
diff --git a/drivers/cpuidle/sysfs.c b/drivers/cpuidle/sysfs.c
index 428754a..8739cc0 100644
--- a/drivers/cpuidle/sysfs.c
+++ b/drivers/cpuidle/sysfs.c
@@ -11,8 +11,10 @@
 #include <linux/sysfs.h>
 #include <linux/slab.h>
 #include <linux/cpu.h>
+#include <linux/completion.h>
 #include <linux/capability.h>
 #include <linux/device.h>
+#include <linux/kobject.h>
 
 #include "cpuidle.h"
 
@@ -33,7 +35,8 @@ static ssize_t show_available_governors(struct device *dev,
 
 	mutex_lock(&cpuidle_lock);
 	list_for_each_entry(tmp, &cpuidle_governors, governor_list) {
-		if (i >= (ssize_t) ((PAGE_SIZE/sizeof(char)) - CPUIDLE_NAME_LEN - 2))
+		if (i >= (ssize_t) ((PAGE_SIZE/sizeof(char)) -
+				    CPUIDLE_NAME_LEN - 2))
 			goto out;
 		i += scnprintf(&buf[i], CPUIDLE_NAME_LEN, "%s ", tmp->name);
 	}
@@ -166,13 +169,28 @@ struct cpuidle_attr {
 #define define_one_rw(_name, show, store) \
 	static struct cpuidle_attr attr_##_name = __ATTR(_name, 0644, show, store)
 
-#define kobj_to_cpuidledev(k) container_of(k, struct cpuidle_device, kobj)
 #define attr_to_cpuidleattr(a) container_of(a, struct cpuidle_attr, attr)
-static ssize_t cpuidle_show(struct kobject * kobj, struct attribute * attr ,char * buf)
+
+struct cpuidle_device_kobj {
+	struct cpuidle_device *dev;
+	struct completion kobj_unregister;
+	struct kobject kobj;
+};
+
+static inline struct cpuidle_device *to_cpuidle_device(struct kobject *kobj)
+{
+	struct cpuidle_device_kobj *kdev =
+		container_of(kobj, struct cpuidle_device_kobj, kobj);
+
+	return kdev->dev;
+}
+
+static ssize_t cpuidle_show(struct kobject *kobj, struct attribute *attr,
+			    char *buf)
 {
 	int ret = -EIO;
-	struct cpuidle_device *dev = kobj_to_cpuidledev(kobj);
-	struct cpuidle_attr * cattr = attr_to_cpuidleattr(attr);
+	struct cpuidle_device *dev = to_cpuidle_device(kobj);
+	struct cpuidle_attr *cattr = attr_to_cpuidleattr(attr);
 
 	if (cattr->show) {
 		mutex_lock(&cpuidle_lock);
@@ -182,12 +200,12 @@ static ssize_t cpuidle_show(struct kobject * kobj, struct attribute * attr ,char
 	return ret;
 }
 
-static ssize_t cpuidle_store(struct kobject * kobj, struct attribute * attr,
-		     const char * buf, size_t count)
+static ssize_t cpuidle_store(struct kobject *kobj, struct attribute *attr,
+			     const char *buf, size_t count)
 {
 	int ret = -EIO;
-	struct cpuidle_device *dev = kobj_to_cpuidledev(kobj);
-	struct cpuidle_attr * cattr = attr_to_cpuidleattr(attr);
+	struct cpuidle_device *dev = to_cpuidle_device(kobj);
+	struct cpuidle_attr *cattr = attr_to_cpuidleattr(attr);
 
 	if (cattr->store) {
 		mutex_lock(&cpuidle_lock);
@@ -204,9 +222,10 @@ static const struct sysfs_ops cpuidle_sysfs_ops = {
 
 static void cpuidle_sysfs_release(struct kobject *kobj)
 {
-	struct cpuidle_device *dev = kobj_to_cpuidledev(kobj);
+	struct cpuidle_device_kobj *kdev =
+		container_of(kobj, struct cpuidle_device_kobj, kobj);
 
-	complete(&dev->kobj_unregister);
+	complete(&kdev->kobj_unregister);
 }
 
 static struct kobj_type ktype_cpuidle = {
@@ -237,8 +256,8 @@ static ssize_t show_state_##_name(struct cpuidle_state *state, \
 
 #define define_store_state_ull_function(_name) \
 static ssize_t store_state_##_name(struct cpuidle_state *state, \
-		struct cpuidle_state_usage *state_usage, \
-		const char *buf, size_t size) \
+				   struct cpuidle_state_usage *state_usage, \
+				   const char *buf, size_t size)	\
 { \
 	unsigned long long value; \
 	int err; \
@@ -256,14 +275,16 @@ static ssize_t store_state_##_name(struct cpuidle_state *state, \
 
 #define define_show_state_ull_function(_name) \
 static ssize_t show_state_##_name(struct cpuidle_state *state, \
-			struct cpuidle_state_usage *state_usage, char *buf) \
+				  struct cpuidle_state_usage *state_usage, \
+				  char *buf)				\
 { \
 	return sprintf(buf, "%llu\n", state_usage->_name);\
 }
 
 #define define_show_state_str_function(_name) \
 static ssize_t show_state_##_name(struct cpuidle_state *state, \
-			struct cpuidle_state_usage *state_usage, char *buf) \
+				  struct cpuidle_state_usage *state_usage, \
+				  char *buf)				\
 { \
 	if (state->_name[0] == '\0')\
 		return sprintf(buf, "<null>\n");\
@@ -309,8 +330,9 @@ struct cpuidle_state_kobj {
 #define kobj_to_state(k) (kobj_to_state_obj(k)->state)
 #define kobj_to_state_usage(k) (kobj_to_state_obj(k)->state_usage)
 #define attr_to_stateattr(a) container_of(a, struct cpuidle_state_attr, attr)
-static ssize_t cpuidle_state_show(struct kobject * kobj,
-	struct attribute * attr ,char * buf)
+
+static ssize_t cpuidle_state_show(struct kobject *kobj, struct attribute *attr,
+				  char * buf)
 {
 	int ret = -EIO;
 	struct cpuidle_state *state = kobj_to_state(kobj);
@@ -323,8 +345,8 @@ static ssize_t cpuidle_state_show(struct kobject * kobj,
 	return ret;
 }
 
-static ssize_t cpuidle_state_store(struct kobject *kobj,
-	struct attribute *attr, const char *buf, size_t size)
+static ssize_t cpuidle_state_store(struct kobject *kobj, struct attribute *attr,
+				   const char *buf, size_t size)
 {
 	int ret = -EIO;
 	struct cpuidle_state *state = kobj_to_state(kobj);
@@ -371,6 +393,7 @@ static int cpuidle_add_state_sysfs(struct cpuidle_device *device)
 {
 	int i, ret = -ENOMEM;
 	struct cpuidle_state_kobj *kobj;
+	struct cpuidle_device_kobj *kdev = device->kobj_dev;
 	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(device);
 
 	/* state statistics */
@@ -383,7 +406,7 @@ static int cpuidle_add_state_sysfs(struct cpuidle_device *device)
 		init_completion(&kobj->kobj_unregister);
 
 		ret = kobject_init_and_add(&kobj->kobj, &ktype_state_cpuidle,
-					   &device->kobj, "state%d", i);
+					   &kdev->kobj, "state%d", i);
 		if (ret) {
 			kfree(kobj);
 			goto error_state;
@@ -449,8 +472,8 @@ static void cpuidle_driver_sysfs_release(struct kobject *kobj)
 	complete(&driver_kobj->kobj_unregister);
 }
 
-static ssize_t cpuidle_driver_show(struct kobject *kobj, struct attribute * attr,
-				   char * buf)
+static ssize_t cpuidle_driver_show(struct kobject *kobj, struct attribute *attr,
+				   char *buf)
 {
 	int ret = -EIO;
 	struct cpuidle_driver_kobj *driver_kobj = kobj_to_driver_kobj(kobj);
@@ -500,6 +523,7 @@ static struct kobj_type ktype_driver_cpuidle = {
 static int cpuidle_add_driver_sysfs(struct cpuidle_device *dev)
 {
 	struct cpuidle_driver_kobj *kdrv;
+	struct cpuidle_device_kobj *kdev = dev->kobj_dev;
 	struct cpuidle_driver *drv = cpuidle_get_cpu_driver(dev);
 	int ret;
 
@@ -511,7 +535,7 @@ static int cpuidle_add_driver_sysfs(struct cpuidle_device *dev)
 	init_completion(&kdrv->kobj_unregister);
 
 	ret = kobject_init_and_add(&kdrv->kobj, &ktype_driver_cpuidle,
-				   &dev->kobj, "driver");
+				   &kdev->kobj, "driver");
 	if (ret) {
 		kfree(kdrv);
 		return ret;
@@ -580,16 +604,28 @@ void cpuidle_remove_device_sysfs(struct cpuidle_device *device)
  */
 int cpuidle_add_sysfs(struct cpuidle_device *dev)
 {
+	struct cpuidle_device_kobj *kdev;
 	struct device *cpu_dev = get_cpu_device((unsigned long)dev->cpu);
 	int error;
 
-	init_completion(&dev->kobj_unregister);
+	kdev = kzalloc(sizeof(*kdev), GFP_KERNEL);
+	if (!kdev)
+		return -ENOMEM;
+	kdev->dev = dev;
+	dev->kobj_dev = kdev;
 
-	error = kobject_init_and_add(&dev->kobj, &ktype_cpuidle, &cpu_dev->kobj,
-				     "cpuidle");
-	if (!error)
-		kobject_uevent(&dev->kobj, KOBJ_ADD);
-	return error;
+	init_completion(&kdev->kobj_unregister);
+
+	error = kobject_init_and_add(&kdev->kobj, &ktype_cpuidle, &cpu_dev->kobj,
+				   "cpuidle");
+	if (error) {
+		kfree(kdev);
+		return error;
+	}
+
+	kobject_uevent(&kdev->kobj, KOBJ_ADD);
+
+	return 0;
 }
 
 /**
@@ -598,6 +634,9 @@ int cpuidle_add_sysfs(struct cpuidle_device *dev)
  */
 void cpuidle_remove_sysfs(struct cpuidle_device *dev)
 {
-	kobject_put(&dev->kobj);
-	wait_for_completion(&dev->kobj_unregister);
+	struct cpuidle_device_kobj *kdev = dev->kobj_dev;
+
+	kobject_put(&kdev->kobj);
+	wait_for_completion(&kdev->kobj_unregister);
+	kfree(kdev);
 }