diff options
author | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2015-08-07 03:25:16 +0200 |
---|---|---|
committer | Rafael J. Wysocki <rafael.j.wysocki@intel.com> | 2015-08-07 03:25:16 +0200 |
commit | 2dc36ecfaa0e668eabdc6b7df4592a08ef8776ee (patch) | |
tree | 91c8968d7bd3c0201c4be6018dab901c1d23369e | |
parent | 19445b25e350ebebaa304bb2135619f643302947 (diff) | |
parent | 144c8e172b5c388ddf41fa64e154f53384ec3448 (diff) | |
download | op-kernel-dev-2dc36ecfaa0e668eabdc6b7df4592a08ef8776ee.zip op-kernel-dev-2dc36ecfaa0e668eabdc6b7df4592a08ef8776ee.tar.gz |
Merge branch 'pm-cpufreq' into pm-opp
-rw-r--r-- | arch/powerpc/include/asm/opal-api.h | 12 | ||||
-rw-r--r-- | drivers/acpi/processor_perflib.c | 4 | ||||
-rw-r--r-- | drivers/cpufreq/acpi-cpufreq.c | 93 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq.c | 322 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_conservative.c | 25 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_governor.c | 196 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_governor.h | 40 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_ondemand.c | 67 | ||||
-rw-r--r-- | drivers/cpufreq/e_powersaver.c | 2 | ||||
-rw-r--r-- | drivers/cpufreq/ia64-acpi-cpufreq.c | 20 | ||||
-rw-r--r-- | drivers/cpufreq/integrator-cpufreq.c | 18 | ||||
-rw-r--r-- | drivers/cpufreq/intel_pstate.c | 15 | ||||
-rw-r--r-- | drivers/cpufreq/powernow-k7.c | 4 | ||||
-rw-r--r-- | drivers/cpufreq/powernow-k8.c | 5 | ||||
-rw-r--r-- | drivers/cpufreq/powernv-cpufreq.c | 198 | ||||
-rw-r--r-- | drivers/xen/xen-acpi-processor.c | 16 | ||||
-rw-r--r-- | include/acpi/processor.h | 5 |
17 files changed, 638 insertions, 404 deletions
diff --git a/arch/powerpc/include/asm/opal-api.h b/arch/powerpc/include/asm/opal-api.h index e9e4c52..64dc9f5 100644 --- a/arch/powerpc/include/asm/opal-api.h +++ b/arch/powerpc/include/asm/opal-api.h @@ -361,6 +361,7 @@ enum opal_msg_type { OPAL_MSG_HMI_EVT, OPAL_MSG_DPO, OPAL_MSG_PRD, + OPAL_MSG_OCC, OPAL_MSG_TYPE_MAX, }; @@ -700,6 +701,17 @@ struct opal_prd_msg_header { struct opal_prd_msg; +#define OCC_RESET 0 +#define OCC_LOAD 1 +#define OCC_THROTTLE 2 +#define OCC_MAX_THROTTLE_STATUS 5 + +struct opal_occ_msg { + __be64 type; + __be64 chip; + __be64 throttle_status; +}; + /* * SG entries * diff --git a/drivers/acpi/processor_perflib.c b/drivers/acpi/processor_perflib.c index cfc8aba..36b6da2 100644 --- a/drivers/acpi/processor_perflib.c +++ b/drivers/acpi/processor_perflib.c @@ -784,9 +784,7 @@ acpi_processor_register_performance(struct acpi_processor_performance EXPORT_SYMBOL(acpi_processor_register_performance); -void -acpi_processor_unregister_performance(struct acpi_processor_performance - *performance, unsigned int cpu) +void acpi_processor_unregister_performance(unsigned int cpu) { struct acpi_processor *pr; diff --git a/drivers/cpufreq/acpi-cpufreq.c b/drivers/cpufreq/acpi-cpufreq.c index 0136dfc..15b921a 100644 --- a/drivers/cpufreq/acpi-cpufreq.c +++ b/drivers/cpufreq/acpi-cpufreq.c @@ -65,18 +65,21 @@ enum { #define MSR_K7_HWCR_CPB_DIS (1ULL << 25) struct acpi_cpufreq_data { - struct acpi_processor_performance *acpi_data; struct cpufreq_frequency_table *freq_table; unsigned int resume; unsigned int cpu_feature; + unsigned int acpi_perf_cpu; cpumask_var_t freqdomain_cpus; }; -static DEFINE_PER_CPU(struct acpi_cpufreq_data *, acfreq_data); - /* acpi_perf_data is a pointer to percpu data. */ static struct acpi_processor_performance __percpu *acpi_perf_data; +static inline struct acpi_processor_performance *to_perf_data(struct acpi_cpufreq_data *data) +{ + return per_cpu_ptr(acpi_perf_data, data->acpi_perf_cpu); +} + static struct cpufreq_driver acpi_cpufreq_driver; static unsigned int acpi_pstate_strict; @@ -144,7 +147,7 @@ static int _store_boost(int val) static ssize_t show_freqdomain_cpus(struct cpufreq_policy *policy, char *buf) { - struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu); + struct acpi_cpufreq_data *data = policy->driver_data; return cpufreq_show_cpus(data->freqdomain_cpus, buf); } @@ -202,7 +205,7 @@ static unsigned extract_io(u32 value, struct acpi_cpufreq_data *data) struct acpi_processor_performance *perf; int i; - perf = data->acpi_data; + perf = to_perf_data(data); for (i = 0; i < perf->state_count; i++) { if (value == perf->states[i].status) @@ -221,7 +224,7 @@ static unsigned extract_msr(u32 msr, struct acpi_cpufreq_data *data) else msr &= INTEL_MSR_RANGE; - perf = data->acpi_data; + perf = to_perf_data(data); cpufreq_for_each_entry(pos, data->freq_table) if (msr == perf->states[pos->driver_data].status) @@ -327,7 +330,8 @@ static void drv_write(struct drv_cmd *cmd) put_cpu(); } -static u32 get_cur_val(const struct cpumask *mask) +static u32 +get_cur_val(const struct cpumask *mask, struct acpi_cpufreq_data *data) { struct acpi_processor_performance *perf; struct drv_cmd cmd; @@ -335,7 +339,7 @@ static u32 get_cur_val(const struct cpumask *mask) if (unlikely(cpumask_empty(mask))) return 0; - switch (per_cpu(acfreq_data, cpumask_first(mask))->cpu_feature) { + switch (data->cpu_feature) { case SYSTEM_INTEL_MSR_CAPABLE: cmd.type = SYSTEM_INTEL_MSR_CAPABLE; cmd.addr.msr.reg = MSR_IA32_PERF_CTL; @@ -346,7 +350,7 @@ static u32 get_cur_val(const struct cpumask *mask) break; case SYSTEM_IO_CAPABLE: cmd.type = SYSTEM_IO_CAPABLE; - perf = per_cpu(acfreq_data, cpumask_first(mask))->acpi_data; + perf = to_perf_data(data); cmd.addr.io.port = perf->control_register.address; cmd.addr.io.bit_width = perf->control_register.bit_width; break; @@ -364,19 +368,24 @@ static u32 get_cur_val(const struct cpumask *mask) static unsigned int get_cur_freq_on_cpu(unsigned int cpu) { - struct acpi_cpufreq_data *data = per_cpu(acfreq_data, cpu); + struct acpi_cpufreq_data *data; + struct cpufreq_policy *policy; unsigned int freq; unsigned int cached_freq; pr_debug("get_cur_freq_on_cpu (%d)\n", cpu); - if (unlikely(data == NULL || - data->acpi_data == NULL || data->freq_table == NULL)) { + policy = cpufreq_cpu_get(cpu); + if (unlikely(!policy)) return 0; - } - cached_freq = data->freq_table[data->acpi_data->state].frequency; - freq = extract_freq(get_cur_val(cpumask_of(cpu)), data); + data = policy->driver_data; + cpufreq_cpu_put(policy); + if (unlikely(!data || !data->freq_table)) + return 0; + + cached_freq = data->freq_table[to_perf_data(data)->state].frequency; + freq = extract_freq(get_cur_val(cpumask_of(cpu), data), data); if (freq != cached_freq) { /* * The dreaded BIOS frequency change behind our back. @@ -397,7 +406,7 @@ static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq, unsigned int i; for (i = 0; i < 100; i++) { - cur_freq = extract_freq(get_cur_val(mask), data); + cur_freq = extract_freq(get_cur_val(mask, data), data); if (cur_freq == freq) return 1; udelay(10); @@ -408,18 +417,17 @@ static unsigned int check_freqs(const struct cpumask *mask, unsigned int freq, static int acpi_cpufreq_target(struct cpufreq_policy *policy, unsigned int index) { - struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu); + struct acpi_cpufreq_data *data = policy->driver_data; struct acpi_processor_performance *perf; struct drv_cmd cmd; unsigned int next_perf_state = 0; /* Index into perf table */ int result = 0; - if (unlikely(data == NULL || - data->acpi_data == NULL || data->freq_table == NULL)) { + if (unlikely(data == NULL || data->freq_table == NULL)) { return -ENODEV; } - perf = data->acpi_data; + perf = to_perf_data(data); next_perf_state = data->freq_table[index].driver_data; if (perf->state == next_perf_state) { if (unlikely(data->resume)) { @@ -482,8 +490,9 @@ out: static unsigned long acpi_cpufreq_guess_freq(struct acpi_cpufreq_data *data, unsigned int cpu) { - struct acpi_processor_performance *perf = data->acpi_data; + struct acpi_processor_performance *perf; + perf = to_perf_data(data); if (cpu_khz) { /* search the closest match to cpu_khz */ unsigned int i; @@ -672,17 +681,17 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) goto err_free; } - data->acpi_data = per_cpu_ptr(acpi_perf_data, cpu); - per_cpu(acfreq_data, cpu) = data; + perf = per_cpu_ptr(acpi_perf_data, cpu); + data->acpi_perf_cpu = cpu; + policy->driver_data = data; if (cpu_has(c, X86_FEATURE_CONSTANT_TSC)) acpi_cpufreq_driver.flags |= CPUFREQ_CONST_LOOPS; - result = acpi_processor_register_performance(data->acpi_data, cpu); + result = acpi_processor_register_performance(perf, cpu); if (result) goto err_free_mask; - perf = data->acpi_data; policy->shared_type = perf->shared_type; /* @@ -838,26 +847,25 @@ static int acpi_cpufreq_cpu_init(struct cpufreq_policy *policy) err_freqfree: kfree(data->freq_table); err_unreg: - acpi_processor_unregister_performance(perf, cpu); + acpi_processor_unregister_performance(cpu); err_free_mask: free_cpumask_var(data->freqdomain_cpus); err_free: kfree(data); - per_cpu(acfreq_data, cpu) = NULL; + policy->driver_data = NULL; return result; } static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) { - struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu); + struct acpi_cpufreq_data *data = policy->driver_data; pr_debug("acpi_cpufreq_cpu_exit\n"); if (data) { - per_cpu(acfreq_data, policy->cpu) = NULL; - acpi_processor_unregister_performance(data->acpi_data, - policy->cpu); + policy->driver_data = NULL; + acpi_processor_unregister_performance(data->acpi_perf_cpu); free_cpumask_var(data->freqdomain_cpus); kfree(data->freq_table); kfree(data); @@ -868,7 +876,7 @@ static int acpi_cpufreq_cpu_exit(struct cpufreq_policy *policy) static int acpi_cpufreq_resume(struct cpufreq_policy *policy) { - struct acpi_cpufreq_data *data = per_cpu(acfreq_data, policy->cpu); + struct acpi_cpufreq_data *data = policy->driver_data; pr_debug("acpi_cpufreq_resume\n"); @@ -880,7 +888,9 @@ static int acpi_cpufreq_resume(struct cpufreq_policy *policy) static struct freq_attr *acpi_cpufreq_attr[] = { &cpufreq_freq_attr_scaling_available_freqs, &freqdomain_cpus, - NULL, /* this is a placeholder for cpb, do not remove */ +#ifdef CONFIG_X86_ACPI_CPUFREQ_CPB + &cpb, +#endif NULL, }; @@ -953,17 +963,16 @@ static int __init acpi_cpufreq_init(void) * only if configured. This is considered legacy code, which * will probably be removed at some point in the future. */ - if (check_amd_hwpstate_cpu(0)) { - struct freq_attr **iter; - - pr_debug("adding sysfs entry for cpb\n"); + if (!check_amd_hwpstate_cpu(0)) { + struct freq_attr **attr; - for (iter = acpi_cpufreq_attr; *iter != NULL; iter++) - ; + pr_debug("CPB unsupported, do not expose it\n"); - /* make sure there is a terminator behind it */ - if (iter[1] == NULL) - *iter = &cpb; + for (attr = acpi_cpufreq_attr; *attr; attr++) + if (*attr == &cpb) { + *attr = NULL; + break; + } } #endif acpi_cpufreq_boost_init(); diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c index 7a3c30c..76a2660 100644 --- a/drivers/cpufreq/cpufreq.c +++ b/drivers/cpufreq/cpufreq.c @@ -112,12 +112,6 @@ static inline bool has_target(void) return cpufreq_driver->target_index || cpufreq_driver->target; } -/* - * rwsem to guarantee that cpufreq driver module doesn't unload during critical - * sections - */ -static DECLARE_RWSEM(cpufreq_rwsem); - /* internal prototypes */ static int __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event); @@ -277,10 +271,6 @@ EXPORT_SYMBOL_GPL(cpufreq_generic_get); * If corresponding call cpufreq_cpu_put() isn't made, the policy wouldn't be * freed as that depends on the kobj count. * - * It also takes a read-lock of 'cpufreq_rwsem' and doesn't put it back if a - * valid policy is found. This is done to make sure the driver doesn't get - * unregistered while the policy is being used. - * * Return: A valid policy on success, otherwise NULL on failure. */ struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu) @@ -291,9 +281,6 @@ struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu) if (WARN_ON(cpu >= nr_cpu_ids)) return NULL; - if (!down_read_trylock(&cpufreq_rwsem)) - return NULL; - /* get the cpufreq driver */ read_lock_irqsave(&cpufreq_driver_lock, flags); @@ -306,9 +293,6 @@ struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu) read_unlock_irqrestore(&cpufreq_driver_lock, flags); - if (!policy) - up_read(&cpufreq_rwsem); - return policy; } EXPORT_SYMBOL_GPL(cpufreq_cpu_get); @@ -320,13 +304,10 @@ EXPORT_SYMBOL_GPL(cpufreq_cpu_get); * * This decrements the kobject reference count incremented earlier by calling * cpufreq_cpu_get(). - * - * It also drops the read-lock of 'cpufreq_rwsem' taken at cpufreq_cpu_get(). */ void cpufreq_cpu_put(struct cpufreq_policy *policy) { kobject_put(&policy->kobj); - up_read(&cpufreq_rwsem); } EXPORT_SYMBOL_GPL(cpufreq_cpu_put); @@ -851,9 +832,6 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) struct freq_attr *fattr = to_attr(attr); ssize_t ret; - if (!down_read_trylock(&cpufreq_rwsem)) - return -EINVAL; - down_read(&policy->rwsem); if (fattr->show) @@ -862,7 +840,6 @@ static ssize_t show(struct kobject *kobj, struct attribute *attr, char *buf) ret = -EIO; up_read(&policy->rwsem); - up_read(&cpufreq_rwsem); return ret; } @@ -879,9 +856,6 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, if (!cpu_online(policy->cpu)) goto unlock; - if (!down_read_trylock(&cpufreq_rwsem)) - goto unlock; - down_write(&policy->rwsem); /* Updating inactive policies is invalid, so avoid doing that. */ @@ -897,8 +871,6 @@ static ssize_t store(struct kobject *kobj, struct attribute *attr, unlock_policy_rwsem: up_write(&policy->rwsem); - - up_read(&cpufreq_rwsem); unlock: put_online_cpus(); @@ -1027,8 +999,7 @@ static void cpufreq_remove_dev_symlink(struct cpufreq_policy *policy) } } -static int cpufreq_add_dev_interface(struct cpufreq_policy *policy, - struct device *dev) +static int cpufreq_add_dev_interface(struct cpufreq_policy *policy) { struct freq_attr **drv_attr; int ret = 0; @@ -1060,11 +1031,10 @@ static int cpufreq_add_dev_interface(struct cpufreq_policy *policy, return cpufreq_add_dev_symlink(policy); } -static void cpufreq_init_policy(struct cpufreq_policy *policy) +static int cpufreq_init_policy(struct cpufreq_policy *policy) { struct cpufreq_governor *gov = NULL; struct cpufreq_policy new_policy; - int ret = 0; memcpy(&new_policy, policy, sizeof(*policy)); @@ -1083,16 +1053,10 @@ static void cpufreq_init_policy(struct cpufreq_policy *policy) cpufreq_parse_governor(gov->name, &new_policy.policy, NULL); /* set default policy */ - ret = cpufreq_set_policy(policy, &new_policy); - if (ret) { - pr_debug("setting policy failed\n"); - if (cpufreq_driver->exit) - cpufreq_driver->exit(policy); - } + return cpufreq_set_policy(policy, &new_policy); } -static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, - unsigned int cpu, struct device *dev) +static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, unsigned int cpu) { int ret = 0; @@ -1126,33 +1090,15 @@ static int cpufreq_add_policy_cpu(struct cpufreq_policy *policy, return 0; } -static struct cpufreq_policy *cpufreq_policy_restore(unsigned int cpu) -{ - struct cpufreq_policy *policy; - unsigned long flags; - - read_lock_irqsave(&cpufreq_driver_lock, flags); - policy = per_cpu(cpufreq_cpu_data, cpu); - read_unlock_irqrestore(&cpufreq_driver_lock, flags); - - if (likely(policy)) { - /* Policy should be inactive here */ - WARN_ON(!policy_is_inactive(policy)); - - down_write(&policy->rwsem); - policy->cpu = cpu; - policy->governor = NULL; - up_write(&policy->rwsem); - } - - return policy; -} - -static struct cpufreq_policy *cpufreq_policy_alloc(struct device *dev) +static struct cpufreq_policy *cpufreq_policy_alloc(unsigned int cpu) { + struct device *dev = get_cpu_device(cpu); struct cpufreq_policy *policy; int ret; + if (WARN_ON(!dev)) + return NULL; + policy = kzalloc(sizeof(*policy), GFP_KERNEL); if (!policy) return NULL; @@ -1180,10 +1126,10 @@ static struct cpufreq_policy *cpufreq_policy_alloc(struct device *dev) init_completion(&policy->kobj_unregister); INIT_WORK(&policy->update, handle_update); - policy->cpu = dev->id; + policy->cpu = cpu; /* Set this once on allocation */ - policy->kobj_cpu = dev->id; + policy->kobj_cpu = cpu; return policy; @@ -1245,59 +1191,34 @@ static void cpufreq_policy_free(struct cpufreq_policy *policy, bool notify) kfree(policy); } -/** - * cpufreq_add_dev - add a CPU device - * - * Adds the cpufreq interface for a CPU device. - * - * The Oracle says: try running cpufreq registration/unregistration concurrently - * with with cpu hotplugging and all hell will break loose. Tried to clean this - * mess up, but more thorough testing is needed. - Mathieu - */ -static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) +static int cpufreq_online(unsigned int cpu) { - unsigned int j, cpu = dev->id; - int ret = -ENOMEM; struct cpufreq_policy *policy; + bool new_policy; unsigned long flags; - bool recover_policy = !sif; - - pr_debug("adding CPU %u\n", cpu); - - if (cpu_is_offline(cpu)) { - /* - * Only possible if we are here from the subsys_interface add - * callback. A hotplug notifier will follow and we will handle - * it as CPU online then. For now, just create the sysfs link, - * unless there is no policy or the link is already present. - */ - policy = per_cpu(cpufreq_cpu_data, cpu); - return policy && !cpumask_test_and_set_cpu(cpu, policy->real_cpus) - ? add_cpu_dev_symlink(policy, cpu) : 0; - } + unsigned int j; + int ret; - if (!down_read_trylock(&cpufreq_rwsem)) - return 0; + pr_debug("%s: bringing CPU%u online\n", __func__, cpu); /* Check if this CPU already has a policy to manage it */ policy = per_cpu(cpufreq_cpu_data, cpu); - if (policy && !policy_is_inactive(policy)) { + if (policy) { WARN_ON(!cpumask_test_cpu(cpu, policy->related_cpus)); - ret = cpufreq_add_policy_cpu(policy, cpu, dev); - up_read(&cpufreq_rwsem); - return ret; - } + if (!policy_is_inactive(policy)) + return cpufreq_add_policy_cpu(policy, cpu); - /* - * Restore the saved policy when doing light-weight init and fall back - * to the full init if that fails. - */ - policy = recover_policy ? cpufreq_policy_restore(cpu) : NULL; - if (!policy) { - recover_policy = false; - policy = cpufreq_policy_alloc(dev); + /* This is the only online CPU for the policy. Start over. */ + new_policy = false; + down_write(&policy->rwsem); + policy->cpu = cpu; + policy->governor = NULL; + up_write(&policy->rwsem); + } else { + new_policy = true; + policy = cpufreq_policy_alloc(cpu); if (!policy) - goto nomem_out; + return -ENOMEM; } cpumask_copy(policy->cpus, cpumask_of(cpu)); @@ -1308,17 +1229,17 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) ret = cpufreq_driver->init(policy); if (ret) { pr_debug("initialization failed\n"); - goto err_set_policy_cpu; + goto out_free_policy; } down_write(&policy->rwsem); - /* related cpus should atleast have policy->cpus */ - cpumask_or(policy->related_cpus, policy->related_cpus, policy->cpus); - - /* Remember which CPUs have been present at the policy creation time. */ - if (!recover_policy) + if (new_policy) { + /* related_cpus should at least include policy->cpus. */ + cpumask_or(policy->related_cpus, policy->related_cpus, policy->cpus); + /* Remember CPUs present at the policy creation time. */ cpumask_and(policy->real_cpus, policy->cpus, cpu_present_mask); + } /* * affected cpus must always be the one, which are online. We aren't @@ -1326,7 +1247,7 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) */ cpumask_and(policy->cpus, policy->cpus, cpu_online_mask); - if (!recover_policy) { + if (new_policy) { policy->user_policy.min = policy->min; policy->user_policy.max = policy->max; @@ -1340,7 +1261,7 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) policy->cur = cpufreq_driver->get(policy->cpu); if (!policy->cur) { pr_err("%s: ->get() failed\n", __func__); - goto err_get_freq; + goto out_exit_policy; } } @@ -1387,10 +1308,10 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) blocking_notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_START, policy); - if (!recover_policy) { - ret = cpufreq_add_dev_interface(policy, dev); + if (new_policy) { + ret = cpufreq_add_dev_interface(policy); if (ret) - goto err_out_unregister; + goto out_exit_policy; blocking_notifier_call_chain(&cpufreq_policy_notifier_list, CPUFREQ_CREATE_POLICY, policy); @@ -1399,9 +1320,16 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) write_unlock_irqrestore(&cpufreq_driver_lock, flags); } - cpufreq_init_policy(policy); + ret = cpufreq_init_policy(policy); + if (ret) { + pr_err("%s: Failed to initialize policy for cpu: %d (%d)\n", + __func__, cpu, ret); + /* cpufreq_policy_free() will notify based on this */ + new_policy = false; + goto out_exit_policy; + } - if (!recover_policy) { + if (new_policy) { policy->user_policy.policy = policy->policy; policy->user_policy.governor = policy->governor; } @@ -1409,8 +1337,6 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) kobject_uevent(&policy->kobj, KOBJ_ADD); - up_read(&cpufreq_rwsem); - /* Callback for handling stuff after policy is ready */ if (cpufreq_driver->ready) cpufreq_driver->ready(policy); @@ -1419,24 +1345,47 @@ static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) return 0; -err_out_unregister: -err_get_freq: +out_exit_policy: up_write(&policy->rwsem); if (cpufreq_driver->exit) cpufreq_driver->exit(policy); -err_set_policy_cpu: - cpufreq_policy_free(policy, recover_policy); -nomem_out: - up_read(&cpufreq_rwsem); +out_free_policy: + cpufreq_policy_free(policy, !new_policy); + return ret; +} + +/** + * cpufreq_add_dev - the cpufreq interface for a CPU device. + * @dev: CPU device. + * @sif: Subsystem interface structure pointer (not used) + */ +static int cpufreq_add_dev(struct device *dev, struct subsys_interface *sif) +{ + unsigned cpu = dev->id; + int ret; + + dev_dbg(dev, "%s: adding CPU%u\n", __func__, cpu); + + if (cpu_online(cpu)) { + ret = cpufreq_online(cpu); + } else { + /* + * A hotplug notifier will follow and we will handle it as CPU + * online then. For now, just create the sysfs link, unless + * there is no policy or the link is already present. + */ + struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu); + + ret = policy && !cpumask_test_and_set_cpu(cpu, policy->real_cpus) + ? add_cpu_dev_symlink(policy, cpu) : 0; + } return ret; } -static int __cpufreq_remove_dev_prepare(struct device *dev) +static void cpufreq_offline_prepare(unsigned int cpu) { - unsigned int cpu = dev->id; - int ret = 0; struct cpufreq_policy *policy; pr_debug("%s: unregistering CPU %u\n", __func__, cpu); @@ -1444,11 +1393,11 @@ static int __cpufreq_remove_dev_prepare(struct device *dev) policy = cpufreq_cpu_get_raw(cpu); if (!policy) { pr_debug("%s: No cpu_data found\n", __func__); - return -EINVAL; + return; } if (has_target()) { - ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); + int ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); if (ret) pr_err("%s: Failed to stop governor\n", __func__); } @@ -1469,7 +1418,7 @@ static int __cpufreq_remove_dev_prepare(struct device *dev) /* Start governor again for active policy */ if (!policy_is_inactive(policy)) { if (has_target()) { - ret = __cpufreq_governor(policy, CPUFREQ_GOV_START); + int ret = __cpufreq_governor(policy, CPUFREQ_GOV_START); if (!ret) ret = __cpufreq_governor(policy, CPUFREQ_GOV_LIMITS); @@ -1479,28 +1428,24 @@ static int __cpufreq_remove_dev_prepare(struct device *dev) } else if (cpufreq_driver->stop_cpu) { cpufreq_driver->stop_cpu(policy); } - - return ret; } -static int __cpufreq_remove_dev_finish(struct device *dev) +static void cpufreq_offline_finish(unsigned int cpu) { - unsigned int cpu = dev->id; - int ret; struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu); if (!policy) { pr_debug("%s: No cpu_data found\n", __func__); - return -EINVAL; + return; } /* Only proceed for inactive policies */ if (!policy_is_inactive(policy)) - return 0; + return; /* If cpu is last user of policy, free policy */ if (has_target()) { - ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); + int ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); if (ret) pr_err("%s: Failed to exit governor\n", __func__); } @@ -1512,8 +1457,6 @@ static int __cpufreq_remove_dev_finish(struct device *dev) */ if (cpufreq_driver->exit) cpufreq_driver->exit(policy); - - return 0; } /** @@ -1530,8 +1473,8 @@ static int cpufreq_remove_dev(struct device *dev, struct subsys_interface *sif) return 0; if (cpu_online(cpu)) { - __cpufreq_remove_dev_prepare(dev); - __cpufreq_remove_dev_finish(dev); + cpufreq_offline_prepare(cpu); + cpufreq_offline_finish(cpu); } cpumask_clear_cpu(cpu, policy->real_cpus); @@ -2247,7 +2190,11 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, memcpy(&new_policy->cpuinfo, &policy->cpuinfo, sizeof(policy->cpuinfo)); - if (new_policy->min > policy->max || new_policy->max < policy->min) + /* + * This check works well when we store new min/max freq attributes, + * because new_policy is a copy of policy with one field updated. + */ + if (new_policy->min > new_policy->max) return -EINVAL; /* verify the cpu speed can be set within this limit */ @@ -2296,16 +2243,31 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, old_gov = policy->governor; /* end old governor */ if (old_gov) { - __cpufreq_governor(policy, CPUFREQ_GOV_STOP); + ret = __cpufreq_governor(policy, CPUFREQ_GOV_STOP); + if (ret) { + /* This can happen due to race with other operations */ + pr_debug("%s: Failed to Stop Governor: %s (%d)\n", + __func__, old_gov->name, ret); + return ret; + } + up_write(&policy->rwsem); - __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); + ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_EXIT); down_write(&policy->rwsem); + + if (ret) { + pr_err("%s: Failed to Exit Governor: %s (%d)\n", + __func__, old_gov->name, ret); + return ret; + } } /* start new governor */ policy->governor = new_policy->governor; - if (!__cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT)) { - if (!__cpufreq_governor(policy, CPUFREQ_GOV_START)) + ret = __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT); + if (!ret) { + ret = __cpufreq_governor(policy, CPUFREQ_GOV_START); + if (!ret) goto out; up_write(&policy->rwsem); @@ -2317,11 +2279,13 @@ static int cpufreq_set_policy(struct cpufreq_policy *policy, pr_debug("starting governor %s failed\n", policy->governor->name); if (old_gov) { policy->governor = old_gov; - __cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT); - __cpufreq_governor(policy, CPUFREQ_GOV_START); + if (__cpufreq_governor(policy, CPUFREQ_GOV_POLICY_INIT)) + policy->governor = NULL; + else + __cpufreq_governor(policy, CPUFREQ_GOV_START); } - return -EINVAL; + return ret; out: pr_debug("governor: change or update limits\n"); @@ -2387,27 +2351,23 @@ static int cpufreq_cpu_callback(struct notifier_block *nfb, unsigned long action, void *hcpu) { unsigned int cpu = (unsigned long)hcpu; - struct device *dev; - dev = get_cpu_device(cpu); - if (dev) { - switch (action & ~CPU_TASKS_FROZEN) { - case CPU_ONLINE: - cpufreq_add_dev(dev, NULL); - break; + switch (action & ~CPU_TASKS_FROZEN) { + case CPU_ONLINE: + cpufreq_online(cpu); + break; - case CPU_DOWN_PREPARE: - __cpufreq_remove_dev_prepare(dev); - break; + case CPU_DOWN_PREPARE: + cpufreq_offline_prepare(cpu); + break; - case CPU_POST_DEAD: - __cpufreq_remove_dev_finish(dev); - break; + case CPU_POST_DEAD: + cpufreq_offline_finish(cpu); + break; - case CPU_DOWN_FAILED: - cpufreq_add_dev(dev, NULL); - break; - } + case CPU_DOWN_FAILED: + cpufreq_online(cpu); + break; } return NOTIFY_OK; } @@ -2515,10 +2475,14 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data) pr_debug("trying to register driver %s\n", driver_data->name); + /* Protect against concurrent CPU online/offline. */ + get_online_cpus(); + write_lock_irqsave(&cpufreq_driver_lock, flags); if (cpufreq_driver) { write_unlock_irqrestore(&cpufreq_driver_lock, flags); - return -EEXIST; + ret = -EEXIST; + goto out; } cpufreq_driver = driver_data; write_unlock_irqrestore(&cpufreq_driver_lock, flags); @@ -2557,7 +2521,10 @@ int cpufreq_register_driver(struct cpufreq_driver *driver_data) register_hotcpu_notifier(&cpufreq_cpu_notifier); pr_debug("driver %s up and running\n", driver_data->name); - return 0; +out: + put_online_cpus(); + return ret; + err_if_unreg: subsys_interface_unregister(&cpufreq_interface); err_boost_unreg: @@ -2567,7 +2534,7 @@ err_null_driver: write_lock_irqsave(&cpufreq_driver_lock, flags); cpufreq_driver = NULL; write_unlock_irqrestore(&cpufreq_driver_lock, flags); - return ret; + goto out; } EXPORT_SYMBOL_GPL(cpufreq_register_driver); @@ -2588,19 +2555,20 @@ int cpufreq_unregister_driver(struct cpufreq_driver *driver) pr_debug("unregistering driver %s\n", driver->name); + /* Protect against concurrent cpu hotplug */ + get_online_cpus(); subsys_interface_unregister(&cpufreq_interface); if (cpufreq_boost_supported()) cpufreq_sysfs_remove_file(&boost.attr); unregister_hotcpu_notifier(&cpufreq_cpu_notifier); - down_write(&cpufreq_rwsem); write_lock_irqsave(&cpufreq_driver_lock, flags); cpufreq_driver = NULL; write_unlock_irqrestore(&cpufreq_driver_lock, flags); - up_write(&cpufreq_rwsem); + put_online_cpus(); return 0; } diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c index c86a10c..84a1506 100644 --- a/drivers/cpufreq/cpufreq_conservative.c +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -47,7 +47,7 @@ static inline unsigned int get_freq_target(struct cs_dbs_tuners *cs_tuners, static void cs_check_cpu(int cpu, unsigned int load) { struct cs_cpu_dbs_info_s *dbs_info = &per_cpu(cs_cpu_dbs_info, cpu); - struct cpufreq_policy *policy = dbs_info->cdbs.cur_policy; + struct cpufreq_policy *policy = dbs_info->cdbs.shared->policy; struct dbs_data *dbs_data = policy->governor_data; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; @@ -102,26 +102,15 @@ static void cs_check_cpu(int cpu, unsigned int load) } } -static void cs_dbs_timer(struct work_struct *work) +static unsigned int cs_dbs_timer(struct cpu_dbs_info *cdbs, + struct dbs_data *dbs_data, bool modify_all) { - struct cs_cpu_dbs_info_s *dbs_info = container_of(work, - struct cs_cpu_dbs_info_s, cdbs.work.work); - unsigned int cpu = dbs_info->cdbs.cur_policy->cpu; - struct cs_cpu_dbs_info_s *core_dbs_info = &per_cpu(cs_cpu_dbs_info, - cpu); - struct dbs_data *dbs_data = dbs_info->cdbs.cur_policy->governor_data; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; - int delay = delay_for_sampling_rate(cs_tuners->sampling_rate); - bool modify_all = true; - mutex_lock(&core_dbs_info->cdbs.timer_mutex); - if (!need_load_eval(&core_dbs_info->cdbs, cs_tuners->sampling_rate)) - modify_all = false; - else - dbs_check_cpu(dbs_data, cpu); + if (modify_all) + dbs_check_cpu(dbs_data, cdbs->shared->policy->cpu); - gov_queue_work(dbs_data, dbs_info->cdbs.cur_policy, delay, modify_all); - mutex_unlock(&core_dbs_info->cdbs.timer_mutex); + return delay_for_sampling_rate(cs_tuners->sampling_rate); } static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, @@ -135,7 +124,7 @@ static int dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, if (!dbs_info->enable) return 0; - policy = dbs_info->cdbs.cur_policy; + policy = dbs_info->cdbs.shared->policy; /* * we only care if our internally tracked freq moves outside the 'valid' diff --git a/drivers/cpufreq/cpufreq_governor.c b/drivers/cpufreq/cpufreq_governor.c index 57a39f8..939197f 100644 --- a/drivers/cpufreq/cpufreq_governor.c +++ b/drivers/cpufreq/cpufreq_governor.c @@ -32,10 +32,10 @@ static struct attribute_group *get_sysfs_attr(struct dbs_data *dbs_data) void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) { - struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); + struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); struct od_dbs_tuners *od_tuners = dbs_data->tuners; struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; - struct cpufreq_policy *policy; + struct cpufreq_policy *policy = cdbs->shared->policy; unsigned int sampling_rate; unsigned int max_load = 0; unsigned int ignore_nice; @@ -60,11 +60,9 @@ void dbs_check_cpu(struct dbs_data *dbs_data, int cpu) ignore_nice = cs_tuners->ignore_nice_load; } - policy = cdbs->cur_policy; - /* Get Absolute Load */ for_each_cpu(j, policy->cpus) { - struct cpu_dbs_common_info *j_cdbs; + struct cpu_dbs_info *j_cdbs; u64 cur_wall_time, cur_idle_time; unsigned int idle_time, wall_time; unsigned int load; @@ -163,9 +161,9 @@ EXPORT_SYMBOL_GPL(dbs_check_cpu); static inline void __gov_queue_work(int cpu, struct dbs_data *dbs_data, unsigned int delay) { - struct cpu_dbs_common_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); + struct cpu_dbs_info *cdbs = dbs_data->cdata->get_cpu_cdbs(cpu); - mod_delayed_work_on(cpu, system_wq, &cdbs->work, delay); + mod_delayed_work_on(cpu, system_wq, &cdbs->dwork, delay); } void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy, @@ -199,33 +197,63 @@ EXPORT_SYMBOL_GPL(gov_queue_work); static inline void gov_cancel_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy) { - struct cpu_dbs_common_info *cdbs; + struct cpu_dbs_info *cdbs; int i; for_each_cpu(i, policy->cpus) { cdbs = dbs_data->cdata->get_cpu_cdbs(i); - cancel_delayed_work_sync(&cdbs->work); + cancel_delayed_work_sync(&cdbs->dwork); } } /* Will return if we need to evaluate cpu load again or not */ -bool need_load_eval(struct cpu_dbs_common_info *cdbs, - unsigned int sampling_rate) +static bool need_load_eval(struct cpu_common_dbs_info *shared, + unsigned int sampling_rate) { - if (policy_is_shared(cdbs->cur_policy)) { + if (policy_is_shared(shared->policy)) { ktime_t time_now = ktime_get(); - s64 delta_us = ktime_us_delta(time_now, cdbs->time_stamp); + s64 delta_us = ktime_us_delta(time_now, shared->time_stamp); /* Do nothing if we recently have sampled */ if (delta_us < (s64)(sampling_rate / 2)) return false; else - cdbs->time_stamp = time_now; + shared->time_stamp = time_now; } return true; } -EXPORT_SYMBOL_GPL(need_load_eval); + +static void dbs_timer(struct work_struct *work) +{ + struct cpu_dbs_info *cdbs = container_of(work, struct cpu_dbs_info, + dwork.work); + struct cpu_common_dbs_info *shared = cdbs->shared; + struct cpufreq_policy *policy = shared->policy; + struct dbs_data *dbs_data = policy->governor_data; + unsigned int sampling_rate, delay; + bool modify_all = true; + + mutex_lock(&shared->timer_mutex); + + if (dbs_data->cdata->governor == GOV_CONSERVATIVE) { + struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; + + sampling_rate = cs_tuners->sampling_rate; + } else { + struct od_dbs_tuners *od_tuners = dbs_data->tuners; + + sampling_rate = od_tuners->sampling_rate; + } + + if (!need_load_eval(cdbs->shared, sampling_rate)) + modify_all = false; + + delay = dbs_data->cdata->gov_dbs_timer(cdbs, dbs_data, modify_all); + gov_queue_work(dbs_data, policy, delay, modify_all); + + mutex_unlock(&shared->timer_mutex); +} static void set_sampling_rate(struct dbs_data *dbs_data, unsigned int sampling_rate) @@ -239,6 +267,37 @@ static void set_sampling_rate(struct dbs_data *dbs_data, } } +static int alloc_common_dbs_info(struct cpufreq_policy *policy, + struct common_dbs_data *cdata) +{ + struct cpu_common_dbs_info *shared; + int j; + + /* Allocate memory for the common information for policy->cpus */ + shared = kzalloc(sizeof(*shared), GFP_KERNEL); + if (!shared) + return -ENOMEM; + + /* Set shared for all CPUs, online+offline */ + for_each_cpu(j, policy->related_cpus) + cdata->get_cpu_cdbs(j)->shared = shared; + + return 0; +} + +static void free_common_dbs_info(struct cpufreq_policy *policy, + struct common_dbs_data *cdata) +{ + struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(policy->cpu); + struct cpu_common_dbs_info *shared = cdbs->shared; + int j; + + for_each_cpu(j, policy->cpus) + cdata->get_cpu_cdbs(j)->shared = NULL; + + kfree(shared); +} + static int cpufreq_governor_init(struct cpufreq_policy *policy, struct dbs_data *dbs_data, struct common_dbs_data *cdata) @@ -246,9 +305,18 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy, unsigned int latency; int ret; + /* State should be equivalent to EXIT */ + if (policy->governor_data) + return -EBUSY; + if (dbs_data) { if (WARN_ON(have_governor_per_policy())) return -EINVAL; + + ret = alloc_common_dbs_info(policy, cdata); + if (ret) + return ret; + dbs_data->usage_count++; policy->governor_data = dbs_data; return 0; @@ -258,12 +326,16 @@ static int cpufreq_governor_init(struct cpufreq_policy *policy, if (!dbs_data) return -ENOMEM; + ret = alloc_common_dbs_info(policy, cdata); + if (ret) + goto free_dbs_data; + dbs_data->cdata = cdata; dbs_data->usage_count = 1; ret = cdata->init(dbs_data, !policy->governor->initialized); if (ret) - goto free_dbs_data; + goto free_common_dbs_info; /* policy latency is in ns. Convert it to us first */ latency = policy->cpuinfo.transition_latency / 1000; @@ -300,15 +372,22 @@ put_kobj: } cdata_exit: cdata->exit(dbs_data, !policy->governor->initialized); +free_common_dbs_info: + free_common_dbs_info(policy, cdata); free_dbs_data: kfree(dbs_data); return ret; } -static void cpufreq_governor_exit(struct cpufreq_policy *policy, - struct dbs_data *dbs_data) +static int cpufreq_governor_exit(struct cpufreq_policy *policy, + struct dbs_data *dbs_data) { struct common_dbs_data *cdata = dbs_data->cdata; + struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(policy->cpu); + + /* State should be equivalent to INIT */ + if (!cdbs->shared || cdbs->shared->policy) + return -EBUSY; policy->governor_data = NULL; if (!--dbs_data->usage_count) { @@ -323,6 +402,9 @@ static void cpufreq_governor_exit(struct cpufreq_policy *policy, cdata->exit(dbs_data, policy->governor->initialized == 1); kfree(dbs_data); } + + free_common_dbs_info(policy, cdata); + return 0; } static int cpufreq_governor_start(struct cpufreq_policy *policy, @@ -330,12 +412,17 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy, { struct common_dbs_data *cdata = dbs_data->cdata; unsigned int sampling_rate, ignore_nice, j, cpu = policy->cpu; - struct cpu_dbs_common_info *cpu_cdbs = cdata->get_cpu_cdbs(cpu); + struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu); + struct cpu_common_dbs_info *shared = cdbs->shared; int io_busy = 0; if (!policy->cur) return -EINVAL; + /* State should be equivalent to INIT */ + if (!shared || shared->policy) + return -EBUSY; + if (cdata->governor == GOV_CONSERVATIVE) { struct cs_dbs_tuners *cs_tuners = dbs_data->tuners; @@ -349,12 +436,14 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy, io_busy = od_tuners->io_is_busy; } + shared->policy = policy; + shared->time_stamp = ktime_get(); + mutex_init(&shared->timer_mutex); + for_each_cpu(j, policy->cpus) { - struct cpu_dbs_common_info *j_cdbs = cdata->get_cpu_cdbs(j); + struct cpu_dbs_info *j_cdbs = cdata->get_cpu_cdbs(j); unsigned int prev_load; - j_cdbs->cpu = j; - j_cdbs->cur_policy = policy; j_cdbs->prev_cpu_idle = get_cpu_idle_time(j, &j_cdbs->prev_cpu_wall, io_busy); @@ -366,8 +455,7 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy, if (ignore_nice) j_cdbs->prev_cpu_nice = kcpustat_cpu(j).cpustat[CPUTIME_NICE]; - mutex_init(&j_cdbs->timer_mutex); - INIT_DEFERRABLE_WORK(&j_cdbs->work, cdata->gov_dbs_timer); + INIT_DEFERRABLE_WORK(&j_cdbs->dwork, dbs_timer); } if (cdata->governor == GOV_CONSERVATIVE) { @@ -386,20 +474,24 @@ static int cpufreq_governor_start(struct cpufreq_policy *policy, od_ops->powersave_bias_init_cpu(cpu); } - /* Initiate timer time stamp */ - cpu_cdbs->time_stamp = ktime_get(); - gov_queue_work(dbs_data, policy, delay_for_sampling_rate(sampling_rate), true); return 0; } -static void cpufreq_governor_stop(struct cpufreq_policy *policy, - struct dbs_data *dbs_data) +static int cpufreq_governor_stop(struct cpufreq_policy *policy, + struct dbs_data *dbs_data) { struct common_dbs_data *cdata = dbs_data->cdata; unsigned int cpu = policy->cpu; - struct cpu_dbs_common_info *cpu_cdbs = cdata->get_cpu_cdbs(cpu); + struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu); + struct cpu_common_dbs_info *shared = cdbs->shared; + + /* State should be equivalent to START */ + if (!shared || !shared->policy) + return -EBUSY; + + gov_cancel_work(dbs_data, policy); if (cdata->governor == GOV_CONSERVATIVE) { struct cs_cpu_dbs_info_s *cs_dbs_info = @@ -408,38 +500,40 @@ static void cpufreq_governor_stop(struct cpufreq_policy *policy, cs_dbs_info->enable = 0; } - gov_cancel_work(dbs_data, policy); - - mutex_destroy(&cpu_cdbs->timer_mutex); - cpu_cdbs->cur_policy = NULL; + shared->policy = NULL; + mutex_destroy(&shared->timer_mutex); + return 0; } -static void cpufreq_governor_limits(struct cpufreq_policy *policy, - struct dbs_data *dbs_data) +static int cpufreq_governor_limits(struct cpufreq_policy *policy, + struct dbs_data *dbs_data) { struct common_dbs_data *cdata = dbs_data->cdata; unsigned int cpu = policy->cpu; - struct cpu_dbs_common_info *cpu_cdbs = cdata->get_cpu_cdbs(cpu); + struct cpu_dbs_info *cdbs = cdata->get_cpu_cdbs(cpu); - if (!cpu_cdbs->cur_policy) - return; + /* State should be equivalent to START */ + if (!cdbs->shared || !cdbs->shared->policy) + return -EBUSY; - mutex_lock(&cpu_cdbs->timer_mutex); - if (policy->max < cpu_cdbs->cur_policy->cur) - __cpufreq_driver_target(cpu_cdbs->cur_policy, policy->max, + mutex_lock(&cdbs->shared->timer_mutex); + if (policy->max < cdbs->shared->policy->cur) + __cpufreq_driver_target(cdbs->shared->policy, policy->max, CPUFREQ_RELATION_H); - else if (policy->min > cpu_cdbs->cur_policy->cur) - __cpufreq_driver_target(cpu_cdbs->cur_policy, policy->min, + else if (policy->min > cdbs->shared->policy->cur) + __cpufreq_driver_target(cdbs->shared->policy, policy->min, CPUFREQ_RELATION_L); dbs_check_cpu(dbs_data, cpu); - mutex_unlock(&cpu_cdbs->timer_mutex); + mutex_unlock(&cdbs->shared->timer_mutex); + + return 0; } int cpufreq_governor_dbs(struct cpufreq_policy *policy, struct common_dbs_data *cdata, unsigned int event) { struct dbs_data *dbs_data; - int ret = 0; + int ret; /* Lock governor to block concurrent initialization of governor */ mutex_lock(&cdata->mutex); @@ -449,7 +543,7 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy, else dbs_data = cdata->gdbs_data; - if (WARN_ON(!dbs_data && (event != CPUFREQ_GOV_POLICY_INIT))) { + if (!dbs_data && (event != CPUFREQ_GOV_POLICY_INIT)) { ret = -EINVAL; goto unlock; } @@ -459,17 +553,19 @@ int cpufreq_governor_dbs(struct cpufreq_policy *policy, ret = cpufreq_governor_init(policy, dbs_data, cdata); break; case CPUFREQ_GOV_POLICY_EXIT: - cpufreq_governor_exit(policy, dbs_data); + ret = cpufreq_governor_exit(policy, dbs_data); break; case CPUFREQ_GOV_START: ret = cpufreq_governor_start(policy, dbs_data); break; case CPUFREQ_GOV_STOP: - cpufreq_governor_stop(policy, dbs_data); + ret = cpufreq_governor_stop(policy, dbs_data); break; case CPUFREQ_GOV_LIMITS: - cpufreq_governor_limits(policy, dbs_data); + ret = cpufreq_governor_limits(policy, dbs_data); break; + default: + ret = -EINVAL; } unlock: diff --git a/drivers/cpufreq/cpufreq_governor.h b/drivers/cpufreq/cpufreq_governor.h index 34736f5..50f1717 100644 --- a/drivers/cpufreq/cpufreq_governor.h +++ b/drivers/cpufreq/cpufreq_governor.h @@ -109,7 +109,7 @@ store_one(_gov, file_name) /* create helper routines */ #define define_get_cpu_dbs_routines(_dbs_info) \ -static struct cpu_dbs_common_info *get_cpu_cdbs(int cpu) \ +static struct cpu_dbs_info *get_cpu_cdbs(int cpu) \ { \ return &per_cpu(_dbs_info, cpu).cdbs; \ } \ @@ -128,9 +128,20 @@ static void *get_cpu_dbs_info_s(int cpu) \ * cs_*: Conservative governor */ +/* Common to all CPUs of a policy */ +struct cpu_common_dbs_info { + struct cpufreq_policy *policy; + /* + * percpu mutex that serializes governor limit change with dbs_timer + * invocation. We do not want dbs_timer to run when user is changing + * the governor or limits. + */ + struct mutex timer_mutex; + ktime_t time_stamp; +}; + /* Per cpu structures */ -struct cpu_dbs_common_info { - int cpu; +struct cpu_dbs_info { u64 prev_cpu_idle; u64 prev_cpu_wall; u64 prev_cpu_nice; @@ -141,19 +152,12 @@ struct cpu_dbs_common_info { * wake-up from idle. */ unsigned int prev_load; - struct cpufreq_policy *cur_policy; - struct delayed_work work; - /* - * percpu mutex that serializes governor limit change with gov_dbs_timer - * invocation. We do not want gov_dbs_timer to run when user is changing - * the governor or limits. - */ - struct mutex timer_mutex; - ktime_t time_stamp; + struct delayed_work dwork; + struct cpu_common_dbs_info *shared; }; struct od_cpu_dbs_info_s { - struct cpu_dbs_common_info cdbs; + struct cpu_dbs_info cdbs; struct cpufreq_frequency_table *freq_table; unsigned int freq_lo; unsigned int freq_lo_jiffies; @@ -163,7 +167,7 @@ struct od_cpu_dbs_info_s { }; struct cs_cpu_dbs_info_s { - struct cpu_dbs_common_info cdbs; + struct cpu_dbs_info cdbs; unsigned int down_skip; unsigned int requested_freq; unsigned int enable:1; @@ -204,9 +208,11 @@ struct common_dbs_data { */ struct dbs_data *gdbs_data; - struct cpu_dbs_common_info *(*get_cpu_cdbs)(int cpu); + struct cpu_dbs_info *(*get_cpu_cdbs)(int cpu); void *(*get_cpu_dbs_info_s)(int cpu); - void (*gov_dbs_timer)(struct work_struct *work); + unsigned int (*gov_dbs_timer)(struct cpu_dbs_info *cdbs, + struct dbs_data *dbs_data, + bool modify_all); void (*gov_check_cpu)(int cpu, unsigned int load); int (*init)(struct dbs_data *dbs_data, bool notify); void (*exit)(struct dbs_data *dbs_data, bool notify); @@ -265,8 +271,6 @@ static ssize_t show_sampling_rate_min_gov_pol \ extern struct mutex cpufreq_governor_lock; void dbs_check_cpu(struct dbs_data *dbs_data, int cpu); -bool need_load_eval(struct cpu_dbs_common_info *cdbs, - unsigned int sampling_rate); int cpufreq_governor_dbs(struct cpufreq_policy *policy, struct common_dbs_data *cdata, unsigned int event); void gov_queue_work(struct dbs_data *dbs_data, struct cpufreq_policy *policy, diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c index 3c1e10f..1fa9088 100644 --- a/drivers/cpufreq/cpufreq_ondemand.c +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -155,7 +155,7 @@ static void dbs_freq_increase(struct cpufreq_policy *policy, unsigned int freq) static void od_check_cpu(int cpu, unsigned int load) { struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); - struct cpufreq_policy *policy = dbs_info->cdbs.cur_policy; + struct cpufreq_policy *policy = dbs_info->cdbs.shared->policy; struct dbs_data *dbs_data = policy->governor_data; struct od_dbs_tuners *od_tuners = dbs_data->tuners; @@ -191,46 +191,40 @@ static void od_check_cpu(int cpu, unsigned int load) } } -static void od_dbs_timer(struct work_struct *work) +static unsigned int od_dbs_timer(struct cpu_dbs_info *cdbs, + struct dbs_data *dbs_data, bool modify_all) { - struct od_cpu_dbs_info_s *dbs_info = - container_of(work, struct od_cpu_dbs_info_s, cdbs.work.work); - unsigned int cpu = dbs_info->cdbs.cur_policy->cpu; - struct od_cpu_dbs_info_s *core_dbs_info = &per_cpu(od_cpu_dbs_info, + struct cpufreq_policy *policy = cdbs->shared->policy; + unsigned int cpu = policy->cpu; + struct od_cpu_dbs_info_s *dbs_info = &per_cpu(od_cpu_dbs_info, cpu); - struct dbs_data *dbs_data = dbs_info->cdbs.cur_policy->governor_data; struct od_dbs_tuners *od_tuners = dbs_data->tuners; - int delay = 0, sample_type = core_dbs_info->sample_type; - bool modify_all = true; + int delay = 0, sample_type = dbs_info->sample_type; - mutex_lock(&core_dbs_info->cdbs.timer_mutex); - if (!need_load_eval(&core_dbs_info->cdbs, od_tuners->sampling_rate)) { - modify_all = false; + if (!modify_all) goto max_delay; - } /* Common NORMAL_SAMPLE setup */ - core_dbs_info->sample_type = OD_NORMAL_SAMPLE; + dbs_info->sample_type = OD_NORMAL_SAMPLE; if (sample_type == OD_SUB_SAMPLE) { - delay = core_dbs_info->freq_lo_jiffies; - __cpufreq_driver_target(core_dbs_info->cdbs.cur_policy, - core_dbs_info->freq_lo, CPUFREQ_RELATION_H); + delay = dbs_info->freq_lo_jiffies; + __cpufreq_driver_target(policy, dbs_info->freq_lo, + CPUFREQ_RELATION_H); } else { dbs_check_cpu(dbs_data, cpu); - if (core_dbs_info->freq_lo) { + if (dbs_info->freq_lo) { /* Setup timer for SUB_SAMPLE */ - core_dbs_info->sample_type = OD_SUB_SAMPLE; - delay = core_dbs_info->freq_hi_jiffies; + dbs_info->sample_type = OD_SUB_SAMPLE; + delay = dbs_info->freq_hi_jiffies; } } max_delay: if (!delay) delay = delay_for_sampling_rate(od_tuners->sampling_rate - * core_dbs_info->rate_mult); + * dbs_info->rate_mult); - gov_queue_work(dbs_data, dbs_info->cdbs.cur_policy, delay, modify_all); - mutex_unlock(&core_dbs_info->cdbs.timer_mutex); + return delay; } /************************** sysfs interface ************************/ @@ -273,27 +267,27 @@ static void update_sampling_rate(struct dbs_data *dbs_data, dbs_info = &per_cpu(od_cpu_dbs_info, cpu); cpufreq_cpu_put(policy); - mutex_lock(&dbs_info->cdbs.timer_mutex); + mutex_lock(&dbs_info->cdbs.shared->timer_mutex); - if (!delayed_work_pending(&dbs_info->cdbs.work)) { - mutex_unlock(&dbs_info->cdbs.timer_mutex); + if (!delayed_work_pending(&dbs_info->cdbs.dwork)) { + mutex_unlock(&dbs_info->cdbs.shared->timer_mutex); continue; } next_sampling = jiffies + usecs_to_jiffies(new_rate); - appointed_at = dbs_info->cdbs.work.timer.expires; + appointed_at = dbs_info->cdbs.dwork.timer.expires; if (time_before(next_sampling, appointed_at)) { - mutex_unlock(&dbs_info->cdbs.timer_mutex); - cancel_delayed_work_sync(&dbs_info->cdbs.work); - mutex_lock(&dbs_info->cdbs.timer_mutex); + mutex_unlock(&dbs_info->cdbs.shared->timer_mutex); + cancel_delayed_work_sync(&dbs_info->cdbs.dwork); + mutex_lock(&dbs_info->cdbs.shared->timer_mutex); - gov_queue_work(dbs_data, dbs_info->cdbs.cur_policy, - usecs_to_jiffies(new_rate), true); + gov_queue_work(dbs_data, policy, + usecs_to_jiffies(new_rate), true); } - mutex_unlock(&dbs_info->cdbs.timer_mutex); + mutex_unlock(&dbs_info->cdbs.shared->timer_mutex); } } @@ -556,13 +550,16 @@ static void od_set_powersave_bias(unsigned int powersave_bias) get_online_cpus(); for_each_online_cpu(cpu) { + struct cpu_common_dbs_info *shared; + if (cpumask_test_cpu(cpu, &done)) continue; - policy = per_cpu(od_cpu_dbs_info, cpu).cdbs.cur_policy; - if (!policy) + shared = per_cpu(od_cpu_dbs_info, cpu).cdbs.shared; + if (!shared) continue; + policy = shared->policy; cpumask_or(&done, &done, policy->cpus); if (policy->governor != &cpufreq_gov_ondemand) diff --git a/drivers/cpufreq/e_powersaver.c b/drivers/cpufreq/e_powersaver.c index a0d2a42..4085244c 100644 --- a/drivers/cpufreq/e_powersaver.c +++ b/drivers/cpufreq/e_powersaver.c @@ -78,7 +78,7 @@ static int eps_acpi_init(void) static int eps_acpi_exit(struct cpufreq_policy *policy) { if (eps_acpi_cpu_perf) { - acpi_processor_unregister_performance(eps_acpi_cpu_perf, 0); + acpi_processor_unregister_performance(0); free_cpumask_var(eps_acpi_cpu_perf->shared_cpu_map); kfree(eps_acpi_cpu_perf); eps_acpi_cpu_perf = NULL; diff --git a/drivers/cpufreq/ia64-acpi-cpufreq.c b/drivers/cpufreq/ia64-acpi-cpufreq.c index c30aaa6..0202429 100644 --- a/drivers/cpufreq/ia64-acpi-cpufreq.c +++ b/drivers/cpufreq/ia64-acpi-cpufreq.c @@ -29,7 +29,6 @@ MODULE_LICENSE("GPL"); struct cpufreq_acpi_io { struct acpi_processor_performance acpi_data; - struct cpufreq_frequency_table *freq_table; unsigned int resume; }; @@ -221,6 +220,7 @@ acpi_cpufreq_cpu_init ( unsigned int cpu = policy->cpu; struct cpufreq_acpi_io *data; unsigned int result = 0; + struct cpufreq_frequency_table *freq_table; pr_debug("acpi_cpufreq_cpu_init\n"); @@ -254,10 +254,10 @@ acpi_cpufreq_cpu_init ( } /* alloc freq_table */ - data->freq_table = kzalloc(sizeof(*data->freq_table) * + freq_table = kzalloc(sizeof(*freq_table) * (data->acpi_data.state_count + 1), GFP_KERNEL); - if (!data->freq_table) { + if (!freq_table) { result = -ENOMEM; goto err_unreg; } @@ -276,14 +276,14 @@ acpi_cpufreq_cpu_init ( for (i = 0; i <= data->acpi_data.state_count; i++) { if (i < data->acpi_data.state_count) { - data->freq_table[i].frequency = + freq_table[i].frequency = data->acpi_data.states[i].core_frequency * 1000; } else { - data->freq_table[i].frequency = CPUFREQ_TABLE_END; + freq_table[i].frequency = CPUFREQ_TABLE_END; } } - result = cpufreq_table_validate_and_show(policy, data->freq_table); + result = cpufreq_table_validate_and_show(policy, freq_table); if (result) { goto err_freqfree; } @@ -311,9 +311,9 @@ acpi_cpufreq_cpu_init ( return (result); err_freqfree: - kfree(data->freq_table); + kfree(freq_table); err_unreg: - acpi_processor_unregister_performance(&data->acpi_data, cpu); + acpi_processor_unregister_performance(cpu); err_free: kfree(data); acpi_io_data[cpu] = NULL; @@ -332,8 +332,8 @@ acpi_cpufreq_cpu_exit ( if (data) { acpi_io_data[policy->cpu] = NULL; - acpi_processor_unregister_performance(&data->acpi_data, - policy->cpu); + acpi_processor_unregister_performance(policy->cpu); + kfree(policy->freq_table); kfree(data); } diff --git a/drivers/cpufreq/integrator-cpufreq.c b/drivers/cpufreq/integrator-cpufreq.c index 129e266..2faa421 100644 --- a/drivers/cpufreq/integrator-cpufreq.c +++ b/drivers/cpufreq/integrator-cpufreq.c @@ -98,11 +98,10 @@ static int integrator_set_target(struct cpufreq_policy *policy, /* get current setting */ cm_osc = __raw_readl(cm_base + INTEGRATOR_HDR_OSC_OFFSET); - if (machine_is_integrator()) { + if (machine_is_integrator()) vco.s = (cm_osc >> 8) & 7; - } else if (machine_is_cintegrator()) { + else if (machine_is_cintegrator()) vco.s = 1; - } vco.v = cm_osc & 255; vco.r = 22; freqs.old = icst_hz(&cclk_params, vco) / 1000; @@ -163,11 +162,10 @@ static unsigned int integrator_get(unsigned int cpu) /* detect memory etc. */ cm_osc = __raw_readl(cm_base + INTEGRATOR_HDR_OSC_OFFSET); - if (machine_is_integrator()) { + if (machine_is_integrator()) vco.s = (cm_osc >> 8) & 7; - } else { + else vco.s = 1; - } vco.v = cm_osc & 255; vco.r = 22; @@ -203,7 +201,7 @@ static int __init integrator_cpufreq_probe(struct platform_device *pdev) struct resource *res; res = platform_get_resource(pdev, IORESOURCE_MEM, 0); - if (!res) + if (!res) return -ENODEV; cm_base = devm_ioremap(&pdev->dev, res->start, resource_size(res)); @@ -234,6 +232,6 @@ static struct platform_driver integrator_cpufreq_driver = { module_platform_driver_probe(integrator_cpufreq_driver, integrator_cpufreq_probe); -MODULE_AUTHOR ("Russell M. King"); -MODULE_DESCRIPTION ("cpufreq driver for ARM Integrator CPUs"); -MODULE_LICENSE ("GPL"); +MODULE_AUTHOR("Russell M. King"); +MODULE_DESCRIPTION("cpufreq driver for ARM Integrator CPUs"); +MODULE_LICENSE("GPL"); diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index fcb929e..b9354b6 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -484,12 +484,11 @@ static void __init intel_pstate_sysfs_expose_params(void) } /************************** sysfs end ************************/ -static void intel_pstate_hwp_enable(void) +static void intel_pstate_hwp_enable(struct cpudata *cpudata) { - hwp_active++; pr_info("intel_pstate: HWP enabled\n"); - wrmsrl( MSR_PM_ENABLE, 0x1); + wrmsrl_on_cpu(cpudata->cpu, MSR_PM_ENABLE, 0x1); } static int byt_get_min_pstate(void) @@ -522,7 +521,7 @@ static void byt_set_pstate(struct cpudata *cpudata, int pstate) int32_t vid_fp; u32 vid; - val = pstate << 8; + val = (u64)pstate << 8; if (limits.no_turbo && !limits.turbo_disabled) val |= (u64)1 << 32; @@ -611,7 +610,7 @@ static void core_set_pstate(struct cpudata *cpudata, int pstate) { u64 val; - val = pstate << 8; + val = (u64)pstate << 8; if (limits.no_turbo && !limits.turbo_disabled) val |= (u64)1 << 32; @@ -933,6 +932,10 @@ static int intel_pstate_init_cpu(unsigned int cpunum) cpu = all_cpu_data[cpunum]; cpu->cpu = cpunum; + + if (hwp_active) + intel_pstate_hwp_enable(cpu); + intel_pstate_get_cpu_pstates(cpu); init_timer_deferrable(&cpu->timer); @@ -1246,7 +1249,7 @@ static int __init intel_pstate_init(void) return -ENOMEM; if (static_cpu_has_safe(X86_FEATURE_HWP) && !no_hwp) - intel_pstate_hwp_enable(); + hwp_active++; if (!hwp_active && hwp_only) goto out; diff --git a/drivers/cpufreq/powernow-k7.c b/drivers/cpufreq/powernow-k7.c index 37c5742..c1ae199 100644 --- a/drivers/cpufreq/powernow-k7.c +++ b/drivers/cpufreq/powernow-k7.c @@ -421,7 +421,7 @@ static int powernow_acpi_init(void) return 0; err2: - acpi_processor_unregister_performance(acpi_processor_perf, 0); + acpi_processor_unregister_performance(0); err1: free_cpumask_var(acpi_processor_perf->shared_cpu_map); err05: @@ -661,7 +661,7 @@ static int powernow_cpu_exit(struct cpufreq_policy *policy) { #ifdef CONFIG_X86_POWERNOW_K7_ACPI if (acpi_processor_perf) { - acpi_processor_unregister_performance(acpi_processor_perf, 0); + acpi_processor_unregister_performance(0); free_cpumask_var(acpi_processor_perf->shared_cpu_map); kfree(acpi_processor_perf); } diff --git a/drivers/cpufreq/powernow-k8.c b/drivers/cpufreq/powernow-k8.c index 5c035d0..0b5bf13 100644 --- a/drivers/cpufreq/powernow-k8.c +++ b/drivers/cpufreq/powernow-k8.c @@ -795,7 +795,7 @@ err_out_mem: kfree(powernow_table); err_out: - acpi_processor_unregister_performance(&data->acpi_data, data->cpu); + acpi_processor_unregister_performance(data->cpu); /* data->acpi_data.state_count informs us at ->exit() * whether ACPI was used */ @@ -863,8 +863,7 @@ static int fill_powernow_table_fidvid(struct powernow_k8_data *data, static void powernow_k8_cpu_exit_acpi(struct powernow_k8_data *data) { if (data->acpi_data.state_count) - acpi_processor_unregister_performance(&data->acpi_data, - data->cpu); + acpi_processor_unregister_performance(data->cpu); free_cpumask_var(data->acpi_data.shared_cpu_map); } diff --git a/drivers/cpufreq/powernv-cpufreq.c b/drivers/cpufreq/powernv-cpufreq.c index ebef0d8..546e056 100644 --- a/drivers/cpufreq/powernv-cpufreq.c +++ b/drivers/cpufreq/powernv-cpufreq.c @@ -27,20 +27,31 @@ #include <linux/smp.h> #include <linux/of.h> #include <linux/reboot.h> +#include <linux/slab.h> #include <asm/cputhreads.h> #include <asm/firmware.h> #include <asm/reg.h> #include <asm/smp.h> /* Required for cpu_sibling_mask() in UP configs */ +#include <asm/opal.h> #define POWERNV_MAX_PSTATES 256 #define PMSR_PSAFE_ENABLE (1UL << 30) #define PMSR_SPR_EM_DISABLE (1UL << 31) #define PMSR_MAX(x) ((x >> 32) & 0xFF) -#define PMSR_LP(x) ((x >> 48) & 0xFF) static struct cpufreq_frequency_table powernv_freqs[POWERNV_MAX_PSTATES+1]; -static bool rebooting, throttled; +static bool rebooting, throttled, occ_reset; + +static struct chip { + unsigned int id; + bool throttled; + cpumask_t mask; + struct work_struct throttle; + bool restore; +} *chips; + +static int nr_chips; /* * Note: The set of pstates consists of contiguous integers, the @@ -298,28 +309,35 @@ static inline unsigned int get_nominal_index(void) return powernv_pstate_info.max - powernv_pstate_info.nominal; } -static void powernv_cpufreq_throttle_check(unsigned int cpu) +static void powernv_cpufreq_throttle_check(void *data) { + unsigned int cpu = smp_processor_id(); unsigned long pmsr; - int pmsr_pmax, pmsr_lp; + int pmsr_pmax, i; pmsr = get_pmspr(SPRN_PMSR); + for (i = 0; i < nr_chips; i++) + if (chips[i].id == cpu_to_chip_id(cpu)) + break; + /* Check for Pmax Capping */ pmsr_pmax = (s8)PMSR_MAX(pmsr); if (pmsr_pmax != powernv_pstate_info.max) { - throttled = true; - pr_info("CPU %d Pmax is reduced to %d\n", cpu, pmsr_pmax); - pr_info("Max allowed Pstate is capped\n"); + if (chips[i].throttled) + goto next; + chips[i].throttled = true; + pr_info("CPU %d on Chip %u has Pmax reduced to %d\n", cpu, + chips[i].id, pmsr_pmax); + } else if (chips[i].throttled) { + chips[i].throttled = false; + pr_info("CPU %d on Chip %u has Pmax restored to %d\n", cpu, + chips[i].id, pmsr_pmax); } - /* - * Check for Psafe by reading LocalPstate - * or check if Psafe_mode_active is set in PMSR. - */ - pmsr_lp = (s8)PMSR_LP(pmsr); - if ((pmsr_lp < powernv_pstate_info.min) || - (pmsr & PMSR_PSAFE_ENABLE)) { + /* Check if Psafe_mode_active is set in PMSR. */ +next: + if (pmsr & PMSR_PSAFE_ENABLE) { throttled = true; pr_info("Pstate set to safe frequency\n"); } @@ -350,7 +368,7 @@ static int powernv_cpufreq_target_index(struct cpufreq_policy *policy, return 0; if (!throttled) - powernv_cpufreq_throttle_check(smp_processor_id()); + powernv_cpufreq_throttle_check(NULL); freq_data.pstate_id = powernv_freqs[new_index].driver_data; @@ -395,6 +413,118 @@ static struct notifier_block powernv_cpufreq_reboot_nb = { .notifier_call = powernv_cpufreq_reboot_notifier, }; +void powernv_cpufreq_work_fn(struct work_struct *work) +{ + struct chip *chip = container_of(work, struct chip, throttle); + unsigned int cpu; + cpumask_var_t mask; + + smp_call_function_any(&chip->mask, + powernv_cpufreq_throttle_check, NULL, 0); + + if (!chip->restore) + return; + + chip->restore = false; + cpumask_copy(mask, &chip->mask); + for_each_cpu_and(cpu, mask, cpu_online_mask) { + int index, tcpu; + struct cpufreq_policy policy; + + cpufreq_get_policy(&policy, cpu); + cpufreq_frequency_table_target(&policy, policy.freq_table, + policy.cur, + CPUFREQ_RELATION_C, &index); + powernv_cpufreq_target_index(&policy, index); + for_each_cpu(tcpu, policy.cpus) + cpumask_clear_cpu(tcpu, mask); + } +} + +static char throttle_reason[][30] = { + "No throttling", + "Power Cap", + "Processor Over Temperature", + "Power Supply Failure", + "Over Current", + "OCC Reset" + }; + +static int powernv_cpufreq_occ_msg(struct notifier_block *nb, + unsigned long msg_type, void *_msg) +{ + struct opal_msg *msg = _msg; + struct opal_occ_msg omsg; + int i; + + if (msg_type != OPAL_MSG_OCC) + return 0; + + omsg.type = be64_to_cpu(msg->params[0]); + + switch (omsg.type) { + case OCC_RESET: + occ_reset = true; + /* + * powernv_cpufreq_throttle_check() is called in + * target() callback which can detect the throttle state + * for governors like ondemand. + * But static governors will not call target() often thus + * report throttling here. + */ + if (!throttled) { + throttled = true; + pr_crit("CPU Frequency is throttled\n"); + } + pr_info("OCC: Reset\n"); + break; + case OCC_LOAD: + pr_info("OCC: Loaded\n"); + break; + case OCC_THROTTLE: + omsg.chip = be64_to_cpu(msg->params[1]); + omsg.throttle_status = be64_to_cpu(msg->params[2]); + + if (occ_reset) { + occ_reset = false; + throttled = false; + pr_info("OCC: Active\n"); + + for (i = 0; i < nr_chips; i++) { + chips[i].restore = true; + schedule_work(&chips[i].throttle); + } + + return 0; + } + + if (omsg.throttle_status && + omsg.throttle_status <= OCC_MAX_THROTTLE_STATUS) + pr_info("OCC: Chip %u Pmax reduced due to %s\n", + (unsigned int)omsg.chip, + throttle_reason[omsg.throttle_status]); + else if (!omsg.throttle_status) + pr_info("OCC: Chip %u %s\n", (unsigned int)omsg.chip, + throttle_reason[omsg.throttle_status]); + else + return 0; + + for (i = 0; i < nr_chips; i++) + if (chips[i].id == omsg.chip) { + if (!omsg.throttle_status) + chips[i].restore = true; + schedule_work(&chips[i].throttle); + } + } + return 0; +} + +static struct notifier_block powernv_cpufreq_opal_nb = { + .notifier_call = powernv_cpufreq_occ_msg, + .next = NULL, + .priority = 0, +}; + static void powernv_cpufreq_stop_cpu(struct cpufreq_policy *policy) { struct powernv_smp_call_data freq_data; @@ -414,6 +544,36 @@ static struct cpufreq_driver powernv_cpufreq_driver = { .attr = powernv_cpu_freq_attr, }; +static int init_chip_info(void) +{ + unsigned int chip[256]; + unsigned int cpu, i; + unsigned int prev_chip_id = UINT_MAX; + + for_each_possible_cpu(cpu) { + unsigned int id = cpu_to_chip_id(cpu); + + if (prev_chip_id != id) { + prev_chip_id = id; + chip[nr_chips++] = id; + } + } + + chips = kmalloc_array(nr_chips, sizeof(struct chip), GFP_KERNEL); + if (!chips) + return -ENOMEM; + + for (i = 0; i < nr_chips; i++) { + chips[i].id = chip[i]; + chips[i].throttled = false; + cpumask_copy(&chips[i].mask, cpumask_of_node(chip[i])); + INIT_WORK(&chips[i].throttle, powernv_cpufreq_work_fn); + chips[i].restore = false; + } + + return 0; +} + static int __init powernv_cpufreq_init(void) { int rc = 0; @@ -429,7 +589,13 @@ static int __init powernv_cpufreq_init(void) return rc; } + /* Populate chip info */ + rc = init_chip_info(); + if (rc) + return rc; + register_reboot_notifier(&powernv_cpufreq_reboot_nb); + opal_message_notifier_register(OPAL_MSG_OCC, &powernv_cpufreq_opal_nb); return cpufreq_register_driver(&powernv_cpufreq_driver); } module_init(powernv_cpufreq_init); @@ -437,6 +603,8 @@ module_init(powernv_cpufreq_init); static void __exit powernv_cpufreq_exit(void) { unregister_reboot_notifier(&powernv_cpufreq_reboot_nb); + opal_message_notifier_unregister(OPAL_MSG_OCC, + &powernv_cpufreq_opal_nb); cpufreq_unregister_driver(&powernv_cpufreq_driver); } module_exit(powernv_cpufreq_exit); diff --git a/drivers/xen/xen-acpi-processor.c b/drivers/xen/xen-acpi-processor.c index 59fc190..70fa438 100644 --- a/drivers/xen/xen-acpi-processor.c +++ b/drivers/xen/xen-acpi-processor.c @@ -560,11 +560,9 @@ static int __init xen_acpi_processor_init(void) return 0; err_unregister: - for_each_possible_cpu(i) { - struct acpi_processor_performance *perf; - perf = per_cpu_ptr(acpi_perf_data, i); - acpi_processor_unregister_performance(perf, i); - } + for_each_possible_cpu(i) + acpi_processor_unregister_performance(i); + err_out: /* Freeing a NULL pointer is OK: alloc_percpu zeroes. */ free_acpi_perf_data(); @@ -579,11 +577,9 @@ static void __exit xen_acpi_processor_exit(void) kfree(acpi_ids_done); kfree(acpi_id_present); kfree(acpi_id_cst_present); - for_each_possible_cpu(i) { - struct acpi_processor_performance *perf; - perf = per_cpu_ptr(acpi_perf_data, i); - acpi_processor_unregister_performance(perf, i); - } + for_each_possible_cpu(i) + acpi_processor_unregister_performance(i); + free_acpi_perf_data(); } diff --git a/include/acpi/processor.h b/include/acpi/processor.h index 4188a4d..aad1f2a 100644 --- a/include/acpi/processor.h +++ b/include/acpi/processor.h @@ -228,10 +228,7 @@ extern int acpi_processor_preregister_performance(struct extern int acpi_processor_register_performance(struct acpi_processor_performance *performance, unsigned int cpu); -extern void acpi_processor_unregister_performance(struct - acpi_processor_performance - *performance, - unsigned int cpu); +extern void acpi_processor_unregister_performance(unsigned int cpu); /* note: this locks both the calling module and the processor module if a _PPC object exists, rmmod is disallowed then */ |