From e978aa7d7d57d04eb5f88a7507c4fb98577def77 Mon Sep 17 00:00:00 2001 From: Deepthi Dharwar Date: Fri, 28 Oct 2011 16:20:09 +0530 Subject: cpuidle: Move dev->last_residency update to driver enter routine; remove dev->last_state Cpuidle governor only suggests the state to enter using the governor->select() interface, but allows the low level driver to override the recommended state. The actual entered state may be different because of software or hardware demotion. Software demotion is done by the back-end cpuidle driver and can be accounted correctly. Current cpuidle code uses last_state field to capture the actual state entered and based on that updates the statistics for the state entered. Ideally the driver enter routine should update the counters, and it should return the state actually entered rather than the time spent there. The generic cpuidle code should simply handle where the counters live in the sysfs namespace, not updating the counters. Reference: https://lkml.org/lkml/2011/3/25/52 Signed-off-by: Deepthi Dharwar Signed-off-by: Trinabh Gupta Tested-by: Jean Pihet Reviewed-by: Kevin Hilman Acked-by: Arjan van de Ven Acked-by: Kevin Hilman Signed-off-by: Len Brown --- drivers/cpuidle/cpuidle.c | 32 ++++++++++++++++---------------- 1 file changed, 16 insertions(+), 16 deletions(-) (limited to 'drivers/cpuidle/cpuidle.c') diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index d4c5423..88bd121 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -62,7 +62,7 @@ int cpuidle_idle_call(void) { struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); struct cpuidle_state *target_state; - int next_state; + int next_state, entered_state; if (off) return -ENODEV; @@ -102,26 +102,27 @@ int cpuidle_idle_call(void) target_state = &dev->states[next_state]; - /* enter the state and update stats */ - dev->last_state = target_state; - trace_power_start(POWER_CSTATE, next_state, dev->cpu); trace_cpu_idle(next_state, dev->cpu); - dev->last_residency = target_state->enter(dev, target_state); + entered_state = target_state->enter(dev, next_state); trace_power_end(dev->cpu); trace_cpu_idle(PWR_EVENT_EXIT, dev->cpu); - if (dev->last_state) - target_state = dev->last_state; - - target_state->time += (unsigned long long)dev->last_residency; - target_state->usage++; + if (entered_state >= 0) { + /* Update cpuidle counters */ + /* This can be moved to within driver enter routine + * but that results in multiple copies of same code. + */ + dev->states[entered_state].time += + (unsigned long long)dev->last_residency; + dev->states[entered_state].usage++; + } /* give the governor an opportunity to reflect on the outcome */ if (cpuidle_curr_governor->reflect) - cpuidle_curr_governor->reflect(dev); + cpuidle_curr_governor->reflect(dev, entered_state); return 0; } @@ -172,11 +173,10 @@ void cpuidle_resume_and_unlock(void) EXPORT_SYMBOL_GPL(cpuidle_resume_and_unlock); #ifdef CONFIG_ARCH_HAS_CPU_RELAX -static int poll_idle(struct cpuidle_device *dev, struct cpuidle_state *st) +static int poll_idle(struct cpuidle_device *dev, int index) { ktime_t t1, t2; s64 diff; - int ret; t1 = ktime_get(); local_irq_enable(); @@ -188,8 +188,9 @@ static int poll_idle(struct cpuidle_device *dev, struct cpuidle_state *st) if (diff > INT_MAX) diff = INT_MAX; - ret = (int) diff; - return ret; + dev->last_residency = (int) diff; + + return index; } static void poll_idle_init(struct cpuidle_device *dev) @@ -248,7 +249,6 @@ int cpuidle_enable_device(struct cpuidle_device *dev) dev->states[i].time = 0; } dev->last_residency = 0; - dev->last_state = NULL; smp_wmb(); -- cgit v1.1 From b25edc42bfb9602f0503474b2c94701d5536ce60 Mon Sep 17 00:00:00 2001 From: Deepthi Dharwar Date: Fri, 28 Oct 2011 16:20:24 +0530 Subject: cpuidle: Remove CPUIDLE_FLAG_IGNORE and dev->prepare() The cpuidle_device->prepare() mechanism causes updates to the cpuidle_state[].flags, setting and clearing CPUIDLE_FLAG_IGNORE to tell the governor not to chose a state on a per-cpu basis at run-time. State demotion is now handled by the driver and it returns the actual state entered. Hence, this mechanism is not required. Also this removes per-cpu flags from cpuidle_state enabling it to be made global. Reference: https://lkml.org/lkml/2011/3/25/52 Signed-off-by: Deepthi Dharwar Signed-off-by: Trinabh Gupta Tested-by: Jean Pihet Acked-by: Arjan van de Ven Reviewed-by: Kevin Hilman Signed-off-by: Len Brown --- drivers/cpuidle/cpuidle.c | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'drivers/cpuidle/cpuidle.c') diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 88bd121..f66bcf9 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -83,16 +83,6 @@ int cpuidle_idle_call(void) hrtimer_peek_ahead_timers(); #endif - /* - * Call the device's prepare function before calling the - * governor's select function. ->prepare gives the device's - * cpuidle driver a chance to update any dynamic information - * of its cpuidle states for the current idle period, e.g. - * state availability, latencies, residencies, etc. - */ - if (dev->prepare) - dev->prepare(dev); - /* ask the governor for the next state */ next_state = cpuidle_curr_governor->select(dev); if (need_resched()) { -- cgit v1.1 From 4202735e8ab6ecfb0381631a0d0b58fefe0bd4e2 Mon Sep 17 00:00:00 2001 From: Deepthi Dharwar Date: Fri, 28 Oct 2011 16:20:33 +0530 Subject: cpuidle: Split cpuidle_state structure and move per-cpu statistics fields This is the first step towards global registration of cpuidle states. The statistics used primarily by the governor are per-cpu and have to be split from rest of the fields inside cpuidle_state, which would be made global i.e. single copy. The driver_data field is also per-cpu and moved. Signed-off-by: Deepthi Dharwar Signed-off-by: Trinabh Gupta Tested-by: Jean Pihet Reviewed-by: Kevin Hilman Acked-by: Arjan van de Ven Acked-by: Kevin Hilman Signed-off-by: Len Brown --- drivers/cpuidle/cpuidle.c | 11 ++++++----- 1 file changed, 6 insertions(+), 5 deletions(-) (limited to 'drivers/cpuidle/cpuidle.c') diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index f66bcf9..7127e92 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -105,9 +105,9 @@ int cpuidle_idle_call(void) /* This can be moved to within driver enter routine * but that results in multiple copies of same code. */ - dev->states[entered_state].time += + dev->states_usage[entered_state].time += (unsigned long long)dev->last_residency; - dev->states[entered_state].usage++; + dev->states_usage[entered_state].usage++; } /* give the governor an opportunity to reflect on the outcome */ @@ -186,8 +186,9 @@ static int poll_idle(struct cpuidle_device *dev, int index) static void poll_idle_init(struct cpuidle_device *dev) { struct cpuidle_state *state = &dev->states[0]; + struct cpuidle_state_usage *state_usage = &dev->states_usage[0]; - cpuidle_set_statedata(state, NULL); + cpuidle_set_statedata(state_usage, NULL); snprintf(state->name, CPUIDLE_NAME_LEN, "POLL"); snprintf(state->desc, CPUIDLE_DESC_LEN, "CPUIDLE CORE POLL IDLE"); @@ -235,8 +236,8 @@ int cpuidle_enable_device(struct cpuidle_device *dev) goto fail_sysfs; for (i = 0; i < dev->state_count; i++) { - dev->states[i].usage = 0; - dev->states[i].time = 0; + dev->states_usage[i].usage = 0; + dev->states_usage[i].time = 0; } dev->last_residency = 0; -- cgit v1.1 From 46bcfad7a819bd17ac4e831b04405152d59784ab Mon Sep 17 00:00:00 2001 From: Deepthi Dharwar Date: Fri, 28 Oct 2011 16:20:42 +0530 Subject: cpuidle: Single/Global registration of idle states This patch makes the cpuidle_states structure global (single copy) instead of per-cpu. The statistics needed on per-cpu basis by the governor are kept per-cpu. This simplifies the cpuidle subsystem as state registration is done by single cpu only. Having single copy of cpuidle_states saves memory. Rare case of asymmetric C-states can be handled within the cpuidle driver and architectures such as POWER do not have asymmetric C-states. Having single/global registration of all the idle states, dynamic C-state transitions on x86 are handled by the boot cpu. Here, the boot cpu would disable all the devices, re-populate the states and later enable all the devices, irrespective of the cpu that would receive the notification first. Reference: https://lkml.org/lkml/2011/4/25/83 Signed-off-by: Deepthi Dharwar Signed-off-by: Trinabh Gupta Tested-by: Jean Pihet Reviewed-by: Kevin Hilman Acked-by: Arjan van de Ven Acked-by: Kevin Hilman Signed-off-by: Len Brown --- drivers/cpuidle/cpuidle.c | 45 ++++++++++++--------------------------------- 1 file changed, 12 insertions(+), 33 deletions(-) (limited to 'drivers/cpuidle/cpuidle.c') diff --git a/drivers/cpuidle/cpuidle.c b/drivers/cpuidle/cpuidle.c index 7127e92..7a57b11 100644 --- a/drivers/cpuidle/cpuidle.c +++ b/drivers/cpuidle/cpuidle.c @@ -61,6 +61,7 @@ static int __cpuidle_register_device(struct cpuidle_device *dev); int cpuidle_idle_call(void) { struct cpuidle_device *dev = __this_cpu_read(cpuidle_devices); + struct cpuidle_driver *drv = cpuidle_get_driver(); struct cpuidle_state *target_state; int next_state, entered_state; @@ -84,18 +85,18 @@ int cpuidle_idle_call(void) #endif /* ask the governor for the next state */ - next_state = cpuidle_curr_governor->select(dev); + next_state = cpuidle_curr_governor->select(drv, dev); if (need_resched()) { local_irq_enable(); return 0; } - target_state = &dev->states[next_state]; + target_state = &drv->states[next_state]; trace_power_start(POWER_CSTATE, next_state, dev->cpu); trace_cpu_idle(next_state, dev->cpu); - entered_state = target_state->enter(dev, next_state); + entered_state = target_state->enter(dev, drv, next_state); trace_power_end(dev->cpu); trace_cpu_idle(PWR_EVENT_EXIT, dev->cpu); @@ -163,7 +164,8 @@ void cpuidle_resume_and_unlock(void) EXPORT_SYMBOL_GPL(cpuidle_resume_and_unlock); #ifdef CONFIG_ARCH_HAS_CPU_RELAX -static int poll_idle(struct cpuidle_device *dev, int index) +static int poll_idle(struct cpuidle_device *dev, + struct cpuidle_driver *drv, int index) { ktime_t t1, t2; s64 diff; @@ -183,12 +185,9 @@ static int poll_idle(struct cpuidle_device *dev, int index) return index; } -static void poll_idle_init(struct cpuidle_device *dev) +static void poll_idle_init(struct cpuidle_driver *drv) { - struct cpuidle_state *state = &dev->states[0]; - struct cpuidle_state_usage *state_usage = &dev->states_usage[0]; - - cpuidle_set_statedata(state_usage, NULL); + struct cpuidle_state *state = &drv->states[0]; snprintf(state->name, CPUIDLE_NAME_LEN, "POLL"); snprintf(state->desc, CPUIDLE_DESC_LEN, "CPUIDLE CORE POLL IDLE"); @@ -199,7 +198,7 @@ static void poll_idle_init(struct cpuidle_device *dev) state->enter = poll_idle; } #else -static void poll_idle_init(struct cpuidle_device *dev) {} +static void poll_idle_init(struct cpuidle_driver *drv) {} #endif /* CONFIG_ARCH_HAS_CPU_RELAX */ /** @@ -226,13 +225,13 @@ int cpuidle_enable_device(struct cpuidle_device *dev) return ret; } - poll_idle_init(dev); + poll_idle_init(cpuidle_get_driver()); if ((ret = cpuidle_add_state_sysfs(dev))) return ret; if (cpuidle_curr_governor->enable && - (ret = cpuidle_curr_governor->enable(dev))) + (ret = cpuidle_curr_governor->enable(cpuidle_get_driver(), dev))) goto fail_sysfs; for (i = 0; i < dev->state_count; i++) { @@ -273,7 +272,7 @@ void cpuidle_disable_device(struct cpuidle_device *dev) dev->enabled = 0; if (cpuidle_curr_governor->disable) - cpuidle_curr_governor->disable(dev); + cpuidle_curr_governor->disable(cpuidle_get_driver(), dev); cpuidle_remove_state_sysfs(dev); enabled_devices--; @@ -301,26 +300,6 @@ static int __cpuidle_register_device(struct cpuidle_device *dev) init_completion(&dev->kobj_unregister); - /* - * cpuidle driver should set the dev->power_specified bit - * before registering the device if the driver provides - * power_usage numbers. - * - * For those devices whose ->power_specified is not set, - * we fill in power_usage with decreasing values as the - * cpuidle code has an implicit assumption that state Cn - * uses less power than C(n-1). - * - * With CONFIG_ARCH_HAS_CPU_RELAX, C0 is already assigned - * an power value of -1. So we use -2, -3, etc, for other - * c-states. - */ - if (!dev->power_specified) { - int i; - for (i = CPUIDLE_DRIVER_STATE_START; i < dev->state_count; i++) - dev->states[i].power_usage = -1 - i; - } - per_cpu(cpuidle_devices, dev->cpu) = dev; list_add(&dev->device_list, &cpuidle_detected_devices); if ((ret = cpuidle_add_sysfs(sys_dev))) { -- cgit v1.1