diff options
author | Timothy Pearson <tpearson@raptorengineering.com> | 2017-08-23 14:45:25 -0500 |
---|---|---|
committer | Timothy Pearson <tpearson@raptorengineering.com> | 2017-08-23 14:45:25 -0500 |
commit | fcbb27b0ec6dcbc5a5108cb8fb19eae64593d204 (patch) | |
tree | 22962a4387943edc841c72a4e636a068c66d58fd /drivers/cpufreq | |
download | ast2050-linux-kernel-fcbb27b0ec6dcbc5a5108cb8fb19eae64593d204.zip ast2050-linux-kernel-fcbb27b0ec6dcbc5a5108cb8fb19eae64593d204.tar.gz |
Initial import of modified Linux 2.6.28 tree
Original upstream URL:
git://git.kernel.org/pub/scm/linux/kernel/git/stable/linux-stable.git | branch linux-2.6.28.y
Diffstat (limited to 'drivers/cpufreq')
-rw-r--r-- | drivers/cpufreq/Kconfig | 193 | ||||
-rw-r--r-- | drivers/cpufreq/Makefile | 15 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq.c | 1917 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_conservative.c | 614 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_ondemand.c | 685 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_performance.c | 66 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_powersave.c | 68 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_stats.c | 388 | ||||
-rw-r--r-- | drivers/cpufreq/cpufreq_userspace.c | 222 | ||||
-rw-r--r-- | drivers/cpufreq/freq_table.c | 239 |
10 files changed, 4407 insertions, 0 deletions
diff --git a/drivers/cpufreq/Kconfig b/drivers/cpufreq/Kconfig new file mode 100644 index 0000000..5f076ae --- /dev/null +++ b/drivers/cpufreq/Kconfig @@ -0,0 +1,193 @@ +config CPU_FREQ + bool "CPU Frequency scaling" + help + CPU Frequency scaling allows you to change the clock speed of + CPUs on the fly. This is a nice method to save power, because + the lower the CPU clock speed, the less power the CPU consumes. + + Note that this driver doesn't automatically change the CPU + clock speed, you need to either enable a dynamic cpufreq governor + (see below) after boot, or use a userspace tool. + + For details, take a look at <file:Documentation/cpu-freq>. + + If in doubt, say N. + +if CPU_FREQ + +config CPU_FREQ_TABLE + tristate + +config CPU_FREQ_DEBUG + bool "Enable CPUfreq debugging" + help + Say Y here to enable CPUfreq subsystem (including drivers) + debugging. You will need to activate it via the kernel + command line by passing + cpufreq.debug=<value> + + To get <value>, add + 1 to activate CPUfreq core debugging, + 2 to activate CPUfreq drivers debugging, and + 4 to activate CPUfreq governor debugging + +config CPU_FREQ_STAT + tristate "CPU frequency translation statistics" + select CPU_FREQ_TABLE + default y + help + This driver exports CPU frequency statistics information through sysfs + file system. + + To compile this driver as a module, choose M here: the + module will be called cpufreq_stats. + + If in doubt, say N. + +config CPU_FREQ_STAT_DETAILS + bool "CPU frequency translation statistics details" + depends on CPU_FREQ_STAT + help + This will show detail CPU frequency translation table in sysfs file + system. + + If in doubt, say N. + +choice + prompt "Default CPUFreq governor" + default CPU_FREQ_DEFAULT_GOV_USERSPACE if CPU_FREQ_SA1100 || CPU_FREQ_SA1110 + default CPU_FREQ_DEFAULT_GOV_PERFORMANCE + help + This option sets which CPUFreq governor shall be loaded at + startup. If in doubt, select 'performance'. + +config CPU_FREQ_DEFAULT_GOV_PERFORMANCE + bool "performance" + select CPU_FREQ_GOV_PERFORMANCE + help + Use the CPUFreq governor 'performance' as default. This sets + the frequency statically to the highest frequency supported by + the CPU. + +config CPU_FREQ_DEFAULT_GOV_POWERSAVE + bool "powersave" + depends on EMBEDDED + select CPU_FREQ_GOV_POWERSAVE + help + Use the CPUFreq governor 'powersave' as default. This sets + the frequency statically to the lowest frequency supported by + the CPU. + +config CPU_FREQ_DEFAULT_GOV_USERSPACE + bool "userspace" + select CPU_FREQ_GOV_USERSPACE + help + Use the CPUFreq governor 'userspace' as default. This allows + you to set the CPU frequency manually or when an userspace + program shall be able to set the CPU dynamically without having + to enable the userspace governor manually. + +config CPU_FREQ_DEFAULT_GOV_ONDEMAND + bool "ondemand" + select CPU_FREQ_GOV_ONDEMAND + select CPU_FREQ_GOV_PERFORMANCE + help + Use the CPUFreq governor 'ondemand' as default. This allows + you to get a full dynamic frequency capable system by simply + loading your cpufreq low-level hardware driver. + Be aware that not all cpufreq drivers support the ondemand + governor. If unsure have a look at the help section of the + driver. Fallback governor will be the performance governor. + +config CPU_FREQ_DEFAULT_GOV_CONSERVATIVE + bool "conservative" + select CPU_FREQ_GOV_CONSERVATIVE + select CPU_FREQ_GOV_PERFORMANCE + help + Use the CPUFreq governor 'conservative' as default. This allows + you to get a full dynamic frequency capable system by simply + loading your cpufreq low-level hardware driver. + Be aware that not all cpufreq drivers support the conservative + governor. If unsure have a look at the help section of the + driver. Fallback governor will be the performance governor. +endchoice + +config CPU_FREQ_GOV_PERFORMANCE + tristate "'performance' governor" + help + This cpufreq governor sets the frequency statically to the + highest available CPU frequency. + + To compile this driver as a module, choose M here: the + module will be called cpufreq_performance. + + If in doubt, say Y. + +config CPU_FREQ_GOV_POWERSAVE + tristate "'powersave' governor" + help + This cpufreq governor sets the frequency statically to the + lowest available CPU frequency. + + To compile this driver as a module, choose M here: the + module will be called cpufreq_powersave. + + If in doubt, say Y. + +config CPU_FREQ_GOV_USERSPACE + tristate "'userspace' governor for userspace frequency scaling" + help + Enable this cpufreq governor when you either want to set the + CPU frequency manually or when an userspace program shall + be able to set the CPU dynamically, like on LART + <http://www.lartmaker.nl/>. + + To compile this driver as a module, choose M here: the + module will be called cpufreq_userspace. + + For details, take a look at <file:Documentation/cpu-freq/>. + + If in doubt, say Y. + +config CPU_FREQ_GOV_ONDEMAND + tristate "'ondemand' cpufreq policy governor" + select CPU_FREQ_TABLE + help + 'ondemand' - This driver adds a dynamic cpufreq policy governor. + The governor does a periodic polling and + changes frequency based on the CPU utilization. + The support for this governor depends on CPU capability to + do fast frequency switching (i.e, very low latency frequency + transitions). + + To compile this driver as a module, choose M here: the + module will be called cpufreq_ondemand. + + For details, take a look at linux/Documentation/cpu-freq. + + If in doubt, say N. + +config CPU_FREQ_GOV_CONSERVATIVE + tristate "'conservative' cpufreq governor" + depends on CPU_FREQ + help + 'conservative' - this driver is rather similar to the 'ondemand' + governor both in its source code and its purpose, the difference is + its optimisation for better suitability in a battery powered + environment. The frequency is gracefully increased and decreased + rather than jumping to 100% when speed is required. + + If you have a desktop machine then you should really be considering + the 'ondemand' governor instead, however if you are using a laptop, + PDA or even an AMD64 based computer (due to the unacceptable + step-by-step latency issues between the minimum and maximum frequency + transitions in the CPU) you will probably want to use this governor. + + To compile this driver as a module, choose M here: the + module will be called cpufreq_conservative. + + For details, take a look at linux/Documentation/cpu-freq. + + If in doubt, say N. + +endif # CPU_FREQ diff --git a/drivers/cpufreq/Makefile b/drivers/cpufreq/Makefile new file mode 100644 index 0000000..71fc3b4 --- /dev/null +++ b/drivers/cpufreq/Makefile @@ -0,0 +1,15 @@ +# CPUfreq core +obj-$(CONFIG_CPU_FREQ) += cpufreq.o +# CPUfreq stats +obj-$(CONFIG_CPU_FREQ_STAT) += cpufreq_stats.o + +# CPUfreq governors +obj-$(CONFIG_CPU_FREQ_GOV_PERFORMANCE) += cpufreq_performance.o +obj-$(CONFIG_CPU_FREQ_GOV_POWERSAVE) += cpufreq_powersave.o +obj-$(CONFIG_CPU_FREQ_GOV_USERSPACE) += cpufreq_userspace.o +obj-$(CONFIG_CPU_FREQ_GOV_ONDEMAND) += cpufreq_ondemand.o +obj-$(CONFIG_CPU_FREQ_GOV_CONSERVATIVE) += cpufreq_conservative.o + +# CPUfreq cross-arch helpers +obj-$(CONFIG_CPU_FREQ_TABLE) += freq_table.o + diff --git a/drivers/cpufreq/cpufreq.c b/drivers/cpufreq/cpufreq.c new file mode 100644 index 0000000..31d6f53 --- /dev/null +++ b/drivers/cpufreq/cpufreq.c @@ -0,0 +1,1917 @@ +/* + * linux/drivers/cpufreq/cpufreq.c + * + * Copyright (C) 2001 Russell King + * (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de> + * + * Oct 2005 - Ashok Raj <ashok.raj@intel.com> + * Added handling for CPU hotplug + * Feb 2006 - Jacob Shin <jacob.shin@amd.com> + * Fix handling for CPU hotplug -- affected CPUs + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/notifier.h> +#include <linux/cpufreq.h> +#include <linux/delay.h> +#include <linux/interrupt.h> +#include <linux/spinlock.h> +#include <linux/device.h> +#include <linux/slab.h> +#include <linux/cpu.h> +#include <linux/completion.h> +#include <linux/mutex.h> + +#define dprintk(msg...) cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, \ + "cpufreq-core", msg) + +/** + * The "cpufreq driver" - the arch- or hardware-dependent low + * level driver of CPUFreq support, and its spinlock. This lock + * also protects the cpufreq_cpu_data array. + */ +static struct cpufreq_driver *cpufreq_driver; +static DEFINE_PER_CPU(struct cpufreq_policy *, cpufreq_cpu_data); +#ifdef CONFIG_HOTPLUG_CPU +/* This one keeps track of the previously set governor of a removed CPU */ +static DEFINE_PER_CPU(struct cpufreq_governor *, cpufreq_cpu_governor); +#endif +static DEFINE_SPINLOCK(cpufreq_driver_lock); + +/* + * cpu_policy_rwsem is a per CPU reader-writer semaphore designed to cure + * all cpufreq/hotplug/workqueue/etc related lock issues. + * + * The rules for this semaphore: + * - Any routine that wants to read from the policy structure will + * do a down_read on this semaphore. + * - Any routine that will write to the policy structure and/or may take away + * the policy altogether (eg. CPU hotplug), will hold this lock in write + * mode before doing so. + * + * Additional rules: + * - All holders of the lock should check to make sure that the CPU they + * are concerned with are online after they get the lock. + * - Governor routines that can be called in cpufreq hotplug path should not + * take this sem as top level hotplug notifier handler takes this. + */ +static DEFINE_PER_CPU(int, policy_cpu); +static DEFINE_PER_CPU(struct rw_semaphore, cpu_policy_rwsem); + +#define lock_policy_rwsem(mode, cpu) \ +int lock_policy_rwsem_##mode \ +(int cpu) \ +{ \ + int policy_cpu = per_cpu(policy_cpu, cpu); \ + BUG_ON(policy_cpu == -1); \ + down_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu)); \ + if (unlikely(!cpu_online(cpu))) { \ + up_##mode(&per_cpu(cpu_policy_rwsem, policy_cpu)); \ + return -1; \ + } \ + \ + return 0; \ +} + +lock_policy_rwsem(read, cpu); +EXPORT_SYMBOL_GPL(lock_policy_rwsem_read); + +lock_policy_rwsem(write, cpu); +EXPORT_SYMBOL_GPL(lock_policy_rwsem_write); + +void unlock_policy_rwsem_read(int cpu) +{ + int policy_cpu = per_cpu(policy_cpu, cpu); + BUG_ON(policy_cpu == -1); + up_read(&per_cpu(cpu_policy_rwsem, policy_cpu)); +} +EXPORT_SYMBOL_GPL(unlock_policy_rwsem_read); + +void unlock_policy_rwsem_write(int cpu) +{ + int policy_cpu = per_cpu(policy_cpu, cpu); + BUG_ON(policy_cpu == -1); + up_write(&per_cpu(cpu_policy_rwsem, policy_cpu)); +} +EXPORT_SYMBOL_GPL(unlock_policy_rwsem_write); + + +/* internal prototypes */ +static int __cpufreq_governor(struct cpufreq_policy *policy, unsigned int event); +static unsigned int __cpufreq_get(unsigned int cpu); +static void handle_update(struct work_struct *work); + +/** + * Two notifier lists: the "policy" list is involved in the + * validation process for a new CPU frequency policy; the + * "transition" list for kernel code that needs to handle + * changes to devices when the CPU clock speed changes. + * The mutex locks both lists. + */ +static BLOCKING_NOTIFIER_HEAD(cpufreq_policy_notifier_list); +static struct srcu_notifier_head cpufreq_transition_notifier_list; + +static bool init_cpufreq_transition_notifier_list_called; +static int __init init_cpufreq_transition_notifier_list(void) +{ + srcu_init_notifier_head(&cpufreq_transition_notifier_list); + init_cpufreq_transition_notifier_list_called = true; + return 0; +} +pure_initcall(init_cpufreq_transition_notifier_list); + +static LIST_HEAD(cpufreq_governor_list); +static DEFINE_MUTEX (cpufreq_governor_mutex); + +struct cpufreq_policy *cpufreq_cpu_get(unsigned int cpu) +{ + struct cpufreq_policy *data; + unsigned long flags; + + if (cpu >= nr_cpu_ids) + goto err_out; + + /* get the cpufreq driver */ + spin_lock_irqsave(&cpufreq_driver_lock, flags); + + if (!cpufreq_driver) + goto err_out_unlock; + + if (!try_module_get(cpufreq_driver->owner)) + goto err_out_unlock; + + + /* get the CPU */ + data = per_cpu(cpufreq_cpu_data, cpu); + + if (!data) + goto err_out_put_module; + + if (!kobject_get(&data->kobj)) + goto err_out_put_module; + + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); + return data; + +err_out_put_module: + module_put(cpufreq_driver->owner); +err_out_unlock: + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); +err_out: + return NULL; +} +EXPORT_SYMBOL_GPL(cpufreq_cpu_get); + + +void cpufreq_cpu_put(struct cpufreq_policy *data) +{ + kobject_put(&data->kobj); + module_put(cpufreq_driver->owner); +} +EXPORT_SYMBOL_GPL(cpufreq_cpu_put); + + +/********************************************************************* + * UNIFIED DEBUG HELPERS * + *********************************************************************/ +#ifdef CONFIG_CPU_FREQ_DEBUG + +/* what part(s) of the CPUfreq subsystem are debugged? */ +static unsigned int debug; + +/* is the debug output ratelimit'ed using printk_ratelimit? User can + * set or modify this value. + */ +static unsigned int debug_ratelimit = 1; + +/* is the printk_ratelimit'ing enabled? It's enabled after a successful + * loading of a cpufreq driver, temporarily disabled when a new policy + * is set, and disabled upon cpufreq driver removal + */ +static unsigned int disable_ratelimit = 1; +static DEFINE_SPINLOCK(disable_ratelimit_lock); + +static void cpufreq_debug_enable_ratelimit(void) +{ + unsigned long flags; + + spin_lock_irqsave(&disable_ratelimit_lock, flags); + if (disable_ratelimit) + disable_ratelimit--; + spin_unlock_irqrestore(&disable_ratelimit_lock, flags); +} + +static void cpufreq_debug_disable_ratelimit(void) +{ + unsigned long flags; + + spin_lock_irqsave(&disable_ratelimit_lock, flags); + disable_ratelimit++; + spin_unlock_irqrestore(&disable_ratelimit_lock, flags); +} + +void cpufreq_debug_printk(unsigned int type, const char *prefix, + const char *fmt, ...) +{ + char s[256]; + va_list args; + unsigned int len; + unsigned long flags; + + WARN_ON(!prefix); + if (type & debug) { + spin_lock_irqsave(&disable_ratelimit_lock, flags); + if (!disable_ratelimit && debug_ratelimit + && !printk_ratelimit()) { + spin_unlock_irqrestore(&disable_ratelimit_lock, flags); + return; + } + spin_unlock_irqrestore(&disable_ratelimit_lock, flags); + + len = snprintf(s, 256, KERN_DEBUG "%s: ", prefix); + + va_start(args, fmt); + len += vsnprintf(&s[len], (256 - len), fmt, args); + va_end(args); + + printk(s); + + WARN_ON(len < 5); + } +} +EXPORT_SYMBOL(cpufreq_debug_printk); + + +module_param(debug, uint, 0644); +MODULE_PARM_DESC(debug, "CPUfreq debugging: add 1 to debug core," + " 2 to debug drivers, and 4 to debug governors."); + +module_param(debug_ratelimit, uint, 0644); +MODULE_PARM_DESC(debug_ratelimit, "CPUfreq debugging:" + " set to 0 to disable ratelimiting."); + +#else /* !CONFIG_CPU_FREQ_DEBUG */ + +static inline void cpufreq_debug_enable_ratelimit(void) { return; } +static inline void cpufreq_debug_disable_ratelimit(void) { return; } + +#endif /* CONFIG_CPU_FREQ_DEBUG */ + + +/********************************************************************* + * EXTERNALLY AFFECTING FREQUENCY CHANGES * + *********************************************************************/ + +/** + * adjust_jiffies - adjust the system "loops_per_jiffy" + * + * This function alters the system "loops_per_jiffy" for the clock + * speed change. Note that loops_per_jiffy cannot be updated on SMP + * systems as each CPU might be scaled differently. So, use the arch + * per-CPU loops_per_jiffy value wherever possible. + */ +#ifndef CONFIG_SMP +static unsigned long l_p_j_ref; +static unsigned int l_p_j_ref_freq; + +static void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci) +{ + if (ci->flags & CPUFREQ_CONST_LOOPS) + return; + + if (!l_p_j_ref_freq) { + l_p_j_ref = loops_per_jiffy; + l_p_j_ref_freq = ci->old; + dprintk("saving %lu as reference value for loops_per_jiffy; " + "freq is %u kHz\n", l_p_j_ref, l_p_j_ref_freq); + } + if ((val == CPUFREQ_PRECHANGE && ci->old < ci->new) || + (val == CPUFREQ_POSTCHANGE && ci->old > ci->new) || + (val == CPUFREQ_RESUMECHANGE || val == CPUFREQ_SUSPENDCHANGE)) { + loops_per_jiffy = cpufreq_scale(l_p_j_ref, l_p_j_ref_freq, + ci->new); + dprintk("scaling loops_per_jiffy to %lu " + "for frequency %u kHz\n", loops_per_jiffy, ci->new); + } +} +#else +static inline void adjust_jiffies(unsigned long val, struct cpufreq_freqs *ci) +{ + return; +} +#endif + + +/** + * cpufreq_notify_transition - call notifier chain and adjust_jiffies + * on frequency transition. + * + * This function calls the transition notifiers and the "adjust_jiffies" + * function. It is called twice on all CPU frequency changes that have + * external effects. + */ +void cpufreq_notify_transition(struct cpufreq_freqs *freqs, unsigned int state) +{ + struct cpufreq_policy *policy; + + BUG_ON(irqs_disabled()); + + freqs->flags = cpufreq_driver->flags; + dprintk("notification %u of frequency transition to %u kHz\n", + state, freqs->new); + + policy = per_cpu(cpufreq_cpu_data, freqs->cpu); + switch (state) { + + case CPUFREQ_PRECHANGE: + /* detect if the driver reported a value as "old frequency" + * which is not equal to what the cpufreq core thinks is + * "old frequency". + */ + if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) { + if ((policy) && (policy->cpu == freqs->cpu) && + (policy->cur) && (policy->cur != freqs->old)) { + dprintk("Warning: CPU frequency is" + " %u, cpufreq assumed %u kHz.\n", + freqs->old, policy->cur); + freqs->old = policy->cur; + } + } + srcu_notifier_call_chain(&cpufreq_transition_notifier_list, + CPUFREQ_PRECHANGE, freqs); + adjust_jiffies(CPUFREQ_PRECHANGE, freqs); + break; + + case CPUFREQ_POSTCHANGE: + adjust_jiffies(CPUFREQ_POSTCHANGE, freqs); + srcu_notifier_call_chain(&cpufreq_transition_notifier_list, + CPUFREQ_POSTCHANGE, freqs); + if (likely(policy) && likely(policy->cpu == freqs->cpu)) + policy->cur = freqs->new; + break; + } +} +EXPORT_SYMBOL_GPL(cpufreq_notify_transition); + + + +/********************************************************************* + * SYSFS INTERFACE * + *********************************************************************/ + +static struct cpufreq_governor *__find_governor(const char *str_governor) +{ + struct cpufreq_governor *t; + + list_for_each_entry(t, &cpufreq_governor_list, governor_list) + if (!strnicmp(str_governor,t->name,CPUFREQ_NAME_LEN)) + return t; + + return NULL; +} + +/** + * cpufreq_parse_governor - parse a governor string + */ +static int cpufreq_parse_governor(char *str_governor, unsigned int *policy, + struct cpufreq_governor **governor) +{ + int err = -EINVAL; + + if (!cpufreq_driver) + goto out; + + if (cpufreq_driver->setpolicy) { + if (!strnicmp(str_governor, "performance", CPUFREQ_NAME_LEN)) { + *policy = CPUFREQ_POLICY_PERFORMANCE; + err = 0; + } else if (!strnicmp(str_governor, "powersave", + CPUFREQ_NAME_LEN)) { + *policy = CPUFREQ_POLICY_POWERSAVE; + err = 0; + } + } else if (cpufreq_driver->target) { + struct cpufreq_governor *t; + + mutex_lock(&cpufreq_governor_mutex); + + t = __find_governor(str_governor); + + if (t == NULL) { + char *name = kasprintf(GFP_KERNEL, "cpufreq_%s", + str_governor); + + if (name) { + int ret; + + mutex_unlock(&cpufreq_governor_mutex); + ret = request_module("%s", name); + mutex_lock(&cpufreq_governor_mutex); + + if (ret == 0) + t = __find_governor(str_governor); + } + + kfree(name); + } + + if (t != NULL) { + *governor = t; + err = 0; + } + + mutex_unlock(&cpufreq_governor_mutex); + } + out: + return err; +} + + +/* drivers/base/cpu.c */ +extern struct sysdev_class cpu_sysdev_class; + + +/** + * cpufreq_per_cpu_attr_read() / show_##file_name() - + * print out cpufreq information + * + * Write out information from cpufreq_driver->policy[cpu]; object must be + * "unsigned int". + */ + +#define show_one(file_name, object) \ +static ssize_t show_##file_name \ +(struct cpufreq_policy *policy, char *buf) \ +{ \ + return sprintf (buf, "%u\n", policy->object); \ +} + +show_one(cpuinfo_min_freq, cpuinfo.min_freq); +show_one(cpuinfo_max_freq, cpuinfo.max_freq); +show_one(scaling_min_freq, min); +show_one(scaling_max_freq, max); +show_one(scaling_cur_freq, cur); + +static int __cpufreq_set_policy(struct cpufreq_policy *data, + struct cpufreq_policy *policy); + +/** + * cpufreq_per_cpu_attr_write() / store_##file_name() - sysfs write access + */ +#define store_one(file_name, object) \ +static ssize_t store_##file_name \ +(struct cpufreq_policy *policy, const char *buf, size_t count) \ +{ \ + unsigned int ret = -EINVAL; \ + struct cpufreq_policy new_policy; \ + \ + ret = cpufreq_get_policy(&new_policy, policy->cpu); \ + if (ret) \ + return -EINVAL; \ + \ + ret = sscanf (buf, "%u", &new_policy.object); \ + if (ret != 1) \ + return -EINVAL; \ + \ + ret = __cpufreq_set_policy(policy, &new_policy); \ + policy->user_policy.object = policy->object; \ + \ + return ret ? ret : count; \ +} + +store_one(scaling_min_freq,min); +store_one(scaling_max_freq,max); + +/** + * show_cpuinfo_cur_freq - current CPU frequency as detected by hardware + */ +static ssize_t show_cpuinfo_cur_freq(struct cpufreq_policy *policy, + char *buf) +{ + unsigned int cur_freq = __cpufreq_get(policy->cpu); + if (!cur_freq) + return sprintf(buf, "<unknown>"); + return sprintf(buf, "%u\n", cur_freq); +} + + +/** + * show_scaling_governor - show the current policy for the specified CPU + */ +static ssize_t show_scaling_governor(struct cpufreq_policy *policy, char *buf) +{ + if(policy->policy == CPUFREQ_POLICY_POWERSAVE) + return sprintf(buf, "powersave\n"); + else if (policy->policy == CPUFREQ_POLICY_PERFORMANCE) + return sprintf(buf, "performance\n"); + else if (policy->governor) + return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", policy->governor->name); + return -EINVAL; +} + + +/** + * store_scaling_governor - store policy for the specified CPU + */ +static ssize_t store_scaling_governor(struct cpufreq_policy *policy, + const char *buf, size_t count) +{ + unsigned int ret = -EINVAL; + char str_governor[16]; + struct cpufreq_policy new_policy; + + ret = cpufreq_get_policy(&new_policy, policy->cpu); + if (ret) + return ret; + + ret = sscanf (buf, "%15s", str_governor); + if (ret != 1) + return -EINVAL; + + if (cpufreq_parse_governor(str_governor, &new_policy.policy, + &new_policy.governor)) + return -EINVAL; + + /* Do not use cpufreq_set_policy here or the user_policy.max + will be wrongly overridden */ + ret = __cpufreq_set_policy(policy, &new_policy); + + policy->user_policy.policy = policy->policy; + policy->user_policy.governor = policy->governor; + + if (ret) + return ret; + else + return count; +} + +/** + * show_scaling_driver - show the cpufreq driver currently loaded + */ +static ssize_t show_scaling_driver(struct cpufreq_policy *policy, char *buf) +{ + return scnprintf(buf, CPUFREQ_NAME_LEN, "%s\n", cpufreq_driver->name); +} + +/** + * show_scaling_available_governors - show the available CPUfreq governors + */ +static ssize_t show_scaling_available_governors(struct cpufreq_policy *policy, + char *buf) +{ + ssize_t i = 0; + struct cpufreq_governor *t; + + if (!cpufreq_driver->target) { + i += sprintf(buf, "performance powersave"); + goto out; + } + + list_for_each_entry(t, &cpufreq_governor_list, governor_list) { + if (i >= (ssize_t) ((PAGE_SIZE / sizeof(char)) - (CPUFREQ_NAME_LEN + 2))) + goto out; + i += scnprintf(&buf[i], CPUFREQ_NAME_LEN, "%s ", t->name); + } +out: + i += sprintf(&buf[i], "\n"); + return i; +} + +static ssize_t show_cpus(cpumask_t mask, char *buf) +{ + ssize_t i = 0; + unsigned int cpu; + + for_each_cpu_mask_nr(cpu, mask) { + if (i) + i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), " "); + i += scnprintf(&buf[i], (PAGE_SIZE - i - 2), "%u", cpu); + if (i >= (PAGE_SIZE - 5)) + break; + } + i += sprintf(&buf[i], "\n"); + return i; +} + +/** + * show_related_cpus - show the CPUs affected by each transition even if + * hw coordination is in use + */ +static ssize_t show_related_cpus(struct cpufreq_policy *policy, char *buf) +{ + if (cpus_empty(policy->related_cpus)) + return show_cpus(policy->cpus, buf); + return show_cpus(policy->related_cpus, buf); +} + +/** + * show_affected_cpus - show the CPUs affected by each transition + */ +static ssize_t show_affected_cpus(struct cpufreq_policy *policy, char *buf) +{ + return show_cpus(policy->cpus, buf); +} + +static ssize_t store_scaling_setspeed(struct cpufreq_policy *policy, + const char *buf, size_t count) +{ + unsigned int freq = 0; + unsigned int ret; + + if (!policy->governor || !policy->governor->store_setspeed) + return -EINVAL; + + ret = sscanf(buf, "%u", &freq); + if (ret != 1) + return -EINVAL; + + policy->governor->store_setspeed(policy, freq); + + return count; +} + +static ssize_t show_scaling_setspeed(struct cpufreq_policy *policy, char *buf) +{ + if (!policy->governor || !policy->governor->show_setspeed) + return sprintf(buf, "<unsupported>\n"); + + return policy->governor->show_setspeed(policy, buf); +} + +#define define_one_ro(_name) \ +static struct freq_attr _name = \ +__ATTR(_name, 0444, show_##_name, NULL) + +#define define_one_ro0400(_name) \ +static struct freq_attr _name = \ +__ATTR(_name, 0400, show_##_name, NULL) + +#define define_one_rw(_name) \ +static struct freq_attr _name = \ +__ATTR(_name, 0644, show_##_name, store_##_name) + +define_one_ro0400(cpuinfo_cur_freq); +define_one_ro(cpuinfo_min_freq); +define_one_ro(cpuinfo_max_freq); +define_one_ro(scaling_available_governors); +define_one_ro(scaling_driver); +define_one_ro(scaling_cur_freq); +define_one_ro(related_cpus); +define_one_ro(affected_cpus); +define_one_rw(scaling_min_freq); +define_one_rw(scaling_max_freq); +define_one_rw(scaling_governor); +define_one_rw(scaling_setspeed); + +static struct attribute *default_attrs[] = { + &cpuinfo_min_freq.attr, + &cpuinfo_max_freq.attr, + &scaling_min_freq.attr, + &scaling_max_freq.attr, + &affected_cpus.attr, + &related_cpus.attr, + &scaling_governor.attr, + &scaling_driver.attr, + &scaling_available_governors.attr, + &scaling_setspeed.attr, + NULL +}; + +#define to_policy(k) container_of(k,struct cpufreq_policy,kobj) +#define to_attr(a) container_of(a,struct freq_attr,attr) + +static ssize_t show(struct kobject *kobj, struct attribute *attr ,char *buf) +{ + struct cpufreq_policy *policy = to_policy(kobj); + struct freq_attr *fattr = to_attr(attr); + ssize_t ret = -EINVAL; + policy = cpufreq_cpu_get(policy->cpu); + if (!policy) + goto no_policy; + + if (lock_policy_rwsem_read(policy->cpu) < 0) + goto fail; + + if (fattr->show) + ret = fattr->show(policy, buf); + else + ret = -EIO; + + unlock_policy_rwsem_read(policy->cpu); +fail: + cpufreq_cpu_put(policy); +no_policy: + return ret; +} + +static ssize_t store(struct kobject *kobj, struct attribute *attr, + const char *buf, size_t count) +{ + struct cpufreq_policy *policy = to_policy(kobj); + struct freq_attr *fattr = to_attr(attr); + ssize_t ret = -EINVAL; + policy = cpufreq_cpu_get(policy->cpu); + if (!policy) + goto no_policy; + + if (lock_policy_rwsem_write(policy->cpu) < 0) + goto fail; + + if (fattr->store) + ret = fattr->store(policy, buf, count); + else + ret = -EIO; + + unlock_policy_rwsem_write(policy->cpu); +fail: + cpufreq_cpu_put(policy); +no_policy: + return ret; +} + +static void cpufreq_sysfs_release(struct kobject *kobj) +{ + struct cpufreq_policy *policy = to_policy(kobj); + dprintk("last reference is dropped\n"); + complete(&policy->kobj_unregister); +} + +static struct sysfs_ops sysfs_ops = { + .show = show, + .store = store, +}; + +static struct kobj_type ktype_cpufreq = { + .sysfs_ops = &sysfs_ops, + .default_attrs = default_attrs, + .release = cpufreq_sysfs_release, +}; + + +/** + * cpufreq_add_dev - add a CPU device + * + * Adds the cpufreq interface for a CPU device. + */ +static int cpufreq_add_dev(struct sys_device *sys_dev) +{ + unsigned int cpu = sys_dev->id; + int ret = 0; + struct cpufreq_policy new_policy; + struct cpufreq_policy *policy; + struct freq_attr **drv_attr; + struct sys_device *cpu_sys_dev; + unsigned long flags; + unsigned int j; +#ifdef CONFIG_SMP + struct cpufreq_policy *managed_policy; +#endif + + if (cpu_is_offline(cpu)) + return 0; + + cpufreq_debug_disable_ratelimit(); + dprintk("adding CPU %u\n", cpu); + +#ifdef CONFIG_SMP + /* check whether a different CPU already registered this + * CPU because it is in the same boat. */ + policy = cpufreq_cpu_get(cpu); + if (unlikely(policy)) { + cpufreq_cpu_put(policy); + cpufreq_debug_enable_ratelimit(); + return 0; + } +#endif + + if (!try_module_get(cpufreq_driver->owner)) { + ret = -EINVAL; + goto module_out; + } + + policy = kzalloc(sizeof(struct cpufreq_policy), GFP_KERNEL); + if (!policy) { + ret = -ENOMEM; + goto nomem_out; + } + + policy->cpu = cpu; + policy->cpus = cpumask_of_cpu(cpu); + + /* Initially set CPU itself as the policy_cpu */ + per_cpu(policy_cpu, cpu) = cpu; + lock_policy_rwsem_write(cpu); + + init_completion(&policy->kobj_unregister); + INIT_WORK(&policy->update, handle_update); + + /* Set governor before ->init, so that driver could check it */ + policy->governor = CPUFREQ_DEFAULT_GOVERNOR; + /* call driver. From then on the cpufreq must be able + * to accept all calls to ->verify and ->setpolicy for this CPU + */ + ret = cpufreq_driver->init(policy); + if (ret) { + dprintk("initialization failed\n"); + goto err_out; + } + policy->user_policy.min = policy->cpuinfo.min_freq; + policy->user_policy.max = policy->cpuinfo.max_freq; + + blocking_notifier_call_chain(&cpufreq_policy_notifier_list, + CPUFREQ_START, policy); + +#ifdef CONFIG_SMP + +#ifdef CONFIG_HOTPLUG_CPU + if (per_cpu(cpufreq_cpu_governor, cpu)) { + policy->governor = per_cpu(cpufreq_cpu_governor, cpu); + dprintk("Restoring governor %s for cpu %d\n", + policy->governor->name, cpu); + } +#endif + + for_each_cpu_mask_nr(j, policy->cpus) { + if (cpu == j) + continue; + + /* check for existing affected CPUs. They may not be aware + * of it due to CPU Hotplug. + */ + managed_policy = cpufreq_cpu_get(j); // FIXME: Where is this released? What about error paths? + if (unlikely(managed_policy)) { + + /* Set proper policy_cpu */ + unlock_policy_rwsem_write(cpu); + per_cpu(policy_cpu, cpu) = managed_policy->cpu; + + if (lock_policy_rwsem_write(cpu) < 0) + goto err_out_driver_exit; + + spin_lock_irqsave(&cpufreq_driver_lock, flags); + managed_policy->cpus = policy->cpus; + per_cpu(cpufreq_cpu_data, cpu) = managed_policy; + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); + + dprintk("CPU already managed, adding link\n"); + ret = sysfs_create_link(&sys_dev->kobj, + &managed_policy->kobj, + "cpufreq"); + if (ret) + goto err_out_driver_exit; + + cpufreq_debug_enable_ratelimit(); + ret = 0; + goto err_out_driver_exit; /* call driver->exit() */ + } + } +#endif + memcpy(&new_policy, policy, sizeof(struct cpufreq_policy)); + + /* prepare interface data */ + ret = kobject_init_and_add(&policy->kobj, &ktype_cpufreq, &sys_dev->kobj, + "cpufreq"); + if (ret) + goto err_out_driver_exit; + + /* set up files for this cpu device */ + drv_attr = cpufreq_driver->attr; + while ((drv_attr) && (*drv_attr)) { + ret = sysfs_create_file(&policy->kobj, &((*drv_attr)->attr)); + if (ret) + goto err_out_driver_exit; + drv_attr++; + } + if (cpufreq_driver->get) { + ret = sysfs_create_file(&policy->kobj, &cpuinfo_cur_freq.attr); + if (ret) + goto err_out_driver_exit; + } + if (cpufreq_driver->target) { + ret = sysfs_create_file(&policy->kobj, &scaling_cur_freq.attr); + if (ret) + goto err_out_driver_exit; + } + + spin_lock_irqsave(&cpufreq_driver_lock, flags); + for_each_cpu_mask_nr(j, policy->cpus) { + per_cpu(cpufreq_cpu_data, j) = policy; + per_cpu(policy_cpu, j) = policy->cpu; + } + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); + + /* symlink affected CPUs */ + for_each_cpu_mask_nr(j, policy->cpus) { + if (j == cpu) + continue; + if (!cpu_online(j)) + continue; + + dprintk("CPU %u already managed, adding link\n", j); + cpufreq_cpu_get(cpu); + cpu_sys_dev = get_cpu_sysdev(j); + ret = sysfs_create_link(&cpu_sys_dev->kobj, &policy->kobj, + "cpufreq"); + if (ret) + goto err_out_unregister; + } + + policy->governor = NULL; /* to assure that the starting sequence is + * run in cpufreq_set_policy */ + + /* set default policy */ + ret = __cpufreq_set_policy(policy, &new_policy); + policy->user_policy.policy = policy->policy; + policy->user_policy.governor = policy->governor; + + if (ret) { + dprintk("setting policy failed\n"); + goto err_out_unregister; + } + + unlock_policy_rwsem_write(cpu); + + kobject_uevent(&policy->kobj, KOBJ_ADD); + module_put(cpufreq_driver->owner); + dprintk("initialization complete\n"); + cpufreq_debug_enable_ratelimit(); + + return 0; + + +err_out_unregister: + spin_lock_irqsave(&cpufreq_driver_lock, flags); + for_each_cpu_mask_nr(j, policy->cpus) + per_cpu(cpufreq_cpu_data, j) = NULL; + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); + + kobject_put(&policy->kobj); + wait_for_completion(&policy->kobj_unregister); + +err_out_driver_exit: + if (cpufreq_driver->exit) + cpufreq_driver->exit(policy); + +err_out: + unlock_policy_rwsem_write(cpu); + kfree(policy); + +nomem_out: + module_put(cpufreq_driver->owner); +module_out: + cpufreq_debug_enable_ratelimit(); + return ret; +} + + +/** + * __cpufreq_remove_dev - remove a CPU device + * + * Removes the cpufreq interface for a CPU device. + * Caller should already have policy_rwsem in write mode for this CPU. + * This routine frees the rwsem before returning. + */ +static int __cpufreq_remove_dev(struct sys_device *sys_dev) +{ + unsigned int cpu = sys_dev->id; + unsigned long flags; + struct cpufreq_policy *data; +#ifdef CONFIG_SMP + struct sys_device *cpu_sys_dev; + unsigned int j; +#endif + + cpufreq_debug_disable_ratelimit(); + dprintk("unregistering CPU %u\n", cpu); + + spin_lock_irqsave(&cpufreq_driver_lock, flags); + data = per_cpu(cpufreq_cpu_data, cpu); + + if (!data) { + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); + cpufreq_debug_enable_ratelimit(); + unlock_policy_rwsem_write(cpu); + return -EINVAL; + } + per_cpu(cpufreq_cpu_data, cpu) = NULL; + + +#ifdef CONFIG_SMP + /* if this isn't the CPU which is the parent of the kobj, we + * only need to unlink, put and exit + */ + if (unlikely(cpu != data->cpu)) { + dprintk("removing link\n"); + cpu_clear(cpu, data->cpus); + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); + sysfs_remove_link(&sys_dev->kobj, "cpufreq"); + cpufreq_cpu_put(data); + cpufreq_debug_enable_ratelimit(); + unlock_policy_rwsem_write(cpu); + return 0; + } +#endif + +#ifdef CONFIG_SMP + +#ifdef CONFIG_HOTPLUG_CPU + per_cpu(cpufreq_cpu_governor, cpu) = data->governor; +#endif + + /* if we have other CPUs still registered, we need to unlink them, + * or else wait_for_completion below will lock up. Clean the + * per_cpu(cpufreq_cpu_data) while holding the lock, and remove + * the sysfs links afterwards. + */ + if (unlikely(cpus_weight(data->cpus) > 1)) { + for_each_cpu_mask_nr(j, data->cpus) { + if (j == cpu) + continue; + per_cpu(cpufreq_cpu_data, j) = NULL; + } + } + + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); + + if (unlikely(cpus_weight(data->cpus) > 1)) { + for_each_cpu_mask_nr(j, data->cpus) { + if (j == cpu) + continue; + dprintk("removing link for cpu %u\n", j); +#ifdef CONFIG_HOTPLUG_CPU + per_cpu(cpufreq_cpu_governor, j) = data->governor; +#endif + cpu_sys_dev = get_cpu_sysdev(j); + sysfs_remove_link(&cpu_sys_dev->kobj, "cpufreq"); + cpufreq_cpu_put(data); + } + } +#else + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); +#endif + + if (cpufreq_driver->target) + __cpufreq_governor(data, CPUFREQ_GOV_STOP); + + unlock_policy_rwsem_write(cpu); + + kobject_put(&data->kobj); + + /* we need to make sure that the underlying kobj is actually + * not referenced anymore by anybody before we proceed with + * unloading. + */ + dprintk("waiting for dropping of refcount\n"); + wait_for_completion(&data->kobj_unregister); + dprintk("wait complete\n"); + + if (cpufreq_driver->exit) + cpufreq_driver->exit(data); + + kfree(data); + + cpufreq_debug_enable_ratelimit(); + return 0; +} + + +static int cpufreq_remove_dev(struct sys_device *sys_dev) +{ + unsigned int cpu = sys_dev->id; + int retval; + + if (cpu_is_offline(cpu)) + return 0; + + if (unlikely(lock_policy_rwsem_write(cpu))) + BUG(); + + retval = __cpufreq_remove_dev(sys_dev); + return retval; +} + + +static void handle_update(struct work_struct *work) +{ + struct cpufreq_policy *policy = + container_of(work, struct cpufreq_policy, update); + unsigned int cpu = policy->cpu; + dprintk("handle_update for cpu %u called\n", cpu); + cpufreq_update_policy(cpu); +} + +/** + * cpufreq_out_of_sync - If actual and saved CPU frequency differs, we're in deep trouble. + * @cpu: cpu number + * @old_freq: CPU frequency the kernel thinks the CPU runs at + * @new_freq: CPU frequency the CPU actually runs at + * + * We adjust to current frequency first, and need to clean up later. So either call + * to cpufreq_update_policy() or schedule handle_update()). + */ +static void cpufreq_out_of_sync(unsigned int cpu, unsigned int old_freq, + unsigned int new_freq) +{ + struct cpufreq_freqs freqs; + + dprintk("Warning: CPU frequency out of sync: cpufreq and timing " + "core thinks of %u, is %u kHz.\n", old_freq, new_freq); + + freqs.cpu = cpu; + freqs.old = old_freq; + freqs.new = new_freq; + cpufreq_notify_transition(&freqs, CPUFREQ_PRECHANGE); + cpufreq_notify_transition(&freqs, CPUFREQ_POSTCHANGE); +} + + +/** + * cpufreq_quick_get - get the CPU frequency (in kHz) from policy->cur + * @cpu: CPU number + * + * This is the last known freq, without actually getting it from the driver. + * Return value will be same as what is shown in scaling_cur_freq in sysfs. + */ +unsigned int cpufreq_quick_get(unsigned int cpu) +{ + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + unsigned int ret_freq = 0; + + if (policy) { + ret_freq = policy->cur; + cpufreq_cpu_put(policy); + } + + return ret_freq; +} +EXPORT_SYMBOL(cpufreq_quick_get); + + +static unsigned int __cpufreq_get(unsigned int cpu) +{ + struct cpufreq_policy *policy = per_cpu(cpufreq_cpu_data, cpu); + unsigned int ret_freq = 0; + + if (!cpufreq_driver->get) + return ret_freq; + + ret_freq = cpufreq_driver->get(cpu); + + if (ret_freq && policy->cur && + !(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) { + /* verify no discrepancy between actual and + saved value exists */ + if (unlikely(ret_freq != policy->cur)) { + cpufreq_out_of_sync(cpu, policy->cur, ret_freq); + schedule_work(&policy->update); + } + } + + return ret_freq; +} + +/** + * cpufreq_get - get the current CPU frequency (in kHz) + * @cpu: CPU number + * + * Get the CPU current (static) CPU frequency + */ +unsigned int cpufreq_get(unsigned int cpu) +{ + unsigned int ret_freq = 0; + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + + if (!policy) + goto out; + + if (unlikely(lock_policy_rwsem_read(cpu))) + goto out_policy; + + ret_freq = __cpufreq_get(cpu); + + unlock_policy_rwsem_read(cpu); + +out_policy: + cpufreq_cpu_put(policy); +out: + return ret_freq; +} +EXPORT_SYMBOL(cpufreq_get); + + +/** + * cpufreq_suspend - let the low level driver prepare for suspend + */ + +static int cpufreq_suspend(struct sys_device *sysdev, pm_message_t pmsg) +{ + int cpu = sysdev->id; + int ret = 0; + unsigned int cur_freq = 0; + struct cpufreq_policy *cpu_policy; + + dprintk("suspending cpu %u\n", cpu); + + if (!cpu_online(cpu)) + return 0; + + /* we may be lax here as interrupts are off. Nonetheless + * we need to grab the correct cpu policy, as to check + * whether we really run on this CPU. + */ + + cpu_policy = cpufreq_cpu_get(cpu); + if (!cpu_policy) + return -EINVAL; + + /* only handle each CPU group once */ + if (unlikely(cpu_policy->cpu != cpu)) + goto out; + + if (cpufreq_driver->suspend) { + ret = cpufreq_driver->suspend(cpu_policy, pmsg); + if (ret) { + printk(KERN_ERR "cpufreq: suspend failed in ->suspend " + "step on CPU %u\n", cpu_policy->cpu); + goto out; + } + } + + if (cpufreq_driver->flags & CPUFREQ_CONST_LOOPS) + goto out; + + if (cpufreq_driver->get) + cur_freq = cpufreq_driver->get(cpu_policy->cpu); + + if (!cur_freq || !cpu_policy->cur) { + printk(KERN_ERR "cpufreq: suspend failed to assert current " + "frequency is what timing core thinks it is.\n"); + goto out; + } + + if (unlikely(cur_freq != cpu_policy->cur)) { + struct cpufreq_freqs freqs; + + if (!(cpufreq_driver->flags & CPUFREQ_PM_NO_WARN)) + dprintk("Warning: CPU frequency is %u, " + "cpufreq assumed %u kHz.\n", + cur_freq, cpu_policy->cur); + + freqs.cpu = cpu; + freqs.old = cpu_policy->cur; + freqs.new = cur_freq; + + srcu_notifier_call_chain(&cpufreq_transition_notifier_list, + CPUFREQ_SUSPENDCHANGE, &freqs); + adjust_jiffies(CPUFREQ_SUSPENDCHANGE, &freqs); + + cpu_policy->cur = cur_freq; + } + +out: + cpufreq_cpu_put(cpu_policy); + return ret; +} + +/** + * cpufreq_resume - restore proper CPU frequency handling after resume + * + * 1.) resume CPUfreq hardware support (cpufreq_driver->resume()) + * 2.) if ->target and !CPUFREQ_CONST_LOOPS: verify we're in sync + * 3.) schedule call cpufreq_update_policy() ASAP as interrupts are + * restored. + */ +static int cpufreq_resume(struct sys_device *sysdev) +{ + int cpu = sysdev->id; + int ret = 0; + struct cpufreq_policy *cpu_policy; + + dprintk("resuming cpu %u\n", cpu); + + if (!cpu_online(cpu)) + return 0; + + /* we may be lax here as interrupts are off. Nonetheless + * we need to grab the correct cpu policy, as to check + * whether we really run on this CPU. + */ + + cpu_policy = cpufreq_cpu_get(cpu); + if (!cpu_policy) + return -EINVAL; + + /* only handle each CPU group once */ + if (unlikely(cpu_policy->cpu != cpu)) + goto fail; + + if (cpufreq_driver->resume) { + ret = cpufreq_driver->resume(cpu_policy); + if (ret) { + printk(KERN_ERR "cpufreq: resume failed in ->resume " + "step on CPU %u\n", cpu_policy->cpu); + goto fail; + } + } + + if (!(cpufreq_driver->flags & CPUFREQ_CONST_LOOPS)) { + unsigned int cur_freq = 0; + + if (cpufreq_driver->get) + cur_freq = cpufreq_driver->get(cpu_policy->cpu); + + if (!cur_freq || !cpu_policy->cur) { + printk(KERN_ERR "cpufreq: resume failed to assert " + "current frequency is what timing core " + "thinks it is.\n"); + goto out; + } + + if (unlikely(cur_freq != cpu_policy->cur)) { + struct cpufreq_freqs freqs; + + if (!(cpufreq_driver->flags & CPUFREQ_PM_NO_WARN)) + dprintk("Warning: CPU frequency " + "is %u, cpufreq assumed %u kHz.\n", + cur_freq, cpu_policy->cur); + + freqs.cpu = cpu; + freqs.old = cpu_policy->cur; + freqs.new = cur_freq; + + srcu_notifier_call_chain( + &cpufreq_transition_notifier_list, + CPUFREQ_RESUMECHANGE, &freqs); + adjust_jiffies(CPUFREQ_RESUMECHANGE, &freqs); + + cpu_policy->cur = cur_freq; + } + } + +out: + schedule_work(&cpu_policy->update); +fail: + cpufreq_cpu_put(cpu_policy); + return ret; +} + +static struct sysdev_driver cpufreq_sysdev_driver = { + .add = cpufreq_add_dev, + .remove = cpufreq_remove_dev, + .suspend = cpufreq_suspend, + .resume = cpufreq_resume, +}; + + +/********************************************************************* + * NOTIFIER LISTS INTERFACE * + *********************************************************************/ + +/** + * cpufreq_register_notifier - register a driver with cpufreq + * @nb: notifier function to register + * @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER + * + * Add a driver to one of two lists: either a list of drivers that + * are notified about clock rate changes (once before and once after + * the transition), or a list of drivers that are notified about + * changes in cpufreq policy. + * + * This function may sleep, and has the same return conditions as + * blocking_notifier_chain_register. + */ +int cpufreq_register_notifier(struct notifier_block *nb, unsigned int list) +{ + int ret; + + WARN_ON(!init_cpufreq_transition_notifier_list_called); + + switch (list) { + case CPUFREQ_TRANSITION_NOTIFIER: + ret = srcu_notifier_chain_register( + &cpufreq_transition_notifier_list, nb); + break; + case CPUFREQ_POLICY_NOTIFIER: + ret = blocking_notifier_chain_register( + &cpufreq_policy_notifier_list, nb); + break; + default: + ret = -EINVAL; + } + + return ret; +} +EXPORT_SYMBOL(cpufreq_register_notifier); + + +/** + * cpufreq_unregister_notifier - unregister a driver with cpufreq + * @nb: notifier block to be unregistered + * @list: CPUFREQ_TRANSITION_NOTIFIER or CPUFREQ_POLICY_NOTIFIER + * + * Remove a driver from the CPU frequency notifier list. + * + * This function may sleep, and has the same return conditions as + * blocking_notifier_chain_unregister. + */ +int cpufreq_unregister_notifier(struct notifier_block *nb, unsigned int list) +{ + int ret; + + switch (list) { + case CPUFREQ_TRANSITION_NOTIFIER: + ret = srcu_notifier_chain_unregister( + &cpufreq_transition_notifier_list, nb); + break; + case CPUFREQ_POLICY_NOTIFIER: + ret = blocking_notifier_chain_unregister( + &cpufreq_policy_notifier_list, nb); + break; + default: + ret = -EINVAL; + } + + return ret; +} +EXPORT_SYMBOL(cpufreq_unregister_notifier); + + +/********************************************************************* + * GOVERNORS * + *********************************************************************/ + + +int __cpufreq_driver_target(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + int retval = -EINVAL; + + dprintk("target for CPU %u: %u kHz, relation %u\n", policy->cpu, + target_freq, relation); + if (cpu_online(policy->cpu) && cpufreq_driver->target) + retval = cpufreq_driver->target(policy, target_freq, relation); + + return retval; +} +EXPORT_SYMBOL_GPL(__cpufreq_driver_target); + +int cpufreq_driver_target(struct cpufreq_policy *policy, + unsigned int target_freq, + unsigned int relation) +{ + int ret = -EINVAL; + + policy = cpufreq_cpu_get(policy->cpu); + if (!policy) + goto no_policy; + + if (unlikely(lock_policy_rwsem_write(policy->cpu))) + goto fail; + + ret = __cpufreq_driver_target(policy, target_freq, relation); + + unlock_policy_rwsem_write(policy->cpu); + +fail: + cpufreq_cpu_put(policy); +no_policy: + return ret; +} +EXPORT_SYMBOL_GPL(cpufreq_driver_target); + +int __cpufreq_driver_getavg(struct cpufreq_policy *policy, unsigned int cpu) +{ + int ret = 0; + + policy = cpufreq_cpu_get(policy->cpu); + if (!policy) + return -EINVAL; + + if (cpu_online(cpu) && cpufreq_driver->getavg) + ret = cpufreq_driver->getavg(policy, cpu); + + cpufreq_cpu_put(policy); + return ret; +} +EXPORT_SYMBOL_GPL(__cpufreq_driver_getavg); + +/* + * when "event" is CPUFREQ_GOV_LIMITS + */ + +static int __cpufreq_governor(struct cpufreq_policy *policy, + unsigned int event) +{ + int ret; + + /* Only must be defined when default governor is known to have latency + restrictions, like e.g. conservative or ondemand. + That this is the case is already ensured in Kconfig + */ +#ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE + struct cpufreq_governor *gov = &cpufreq_gov_performance; +#else + struct cpufreq_governor *gov = NULL; +#endif + + if (policy->governor->max_transition_latency && + policy->cpuinfo.transition_latency > + policy->governor->max_transition_latency) { + if (!gov) + return -EINVAL; + else { + printk(KERN_WARNING "%s governor failed, too long" + " transition latency of HW, fallback" + " to %s governor\n", + policy->governor->name, + gov->name); + policy->governor = gov; + } + } + + if (!try_module_get(policy->governor->owner)) + return -EINVAL; + + dprintk("__cpufreq_governor for CPU %u, event %u\n", + policy->cpu, event); + ret = policy->governor->governor(policy, event); + + /* we keep one module reference alive for + each CPU governed by this CPU */ + if ((event != CPUFREQ_GOV_START) || ret) + module_put(policy->governor->owner); + if ((event == CPUFREQ_GOV_STOP) && !ret) + module_put(policy->governor->owner); + + return ret; +} + + +int cpufreq_register_governor(struct cpufreq_governor *governor) +{ + int err; + + if (!governor) + return -EINVAL; + + mutex_lock(&cpufreq_governor_mutex); + + err = -EBUSY; + if (__find_governor(governor->name) == NULL) { + err = 0; + list_add(&governor->governor_list, &cpufreq_governor_list); + } + + mutex_unlock(&cpufreq_governor_mutex); + return err; +} +EXPORT_SYMBOL_GPL(cpufreq_register_governor); + + +void cpufreq_unregister_governor(struct cpufreq_governor *governor) +{ + if (!governor) + return; + + mutex_lock(&cpufreq_governor_mutex); + list_del(&governor->governor_list); + mutex_unlock(&cpufreq_governor_mutex); + return; +} +EXPORT_SYMBOL_GPL(cpufreq_unregister_governor); + + + +/********************************************************************* + * POLICY INTERFACE * + *********************************************************************/ + +/** + * cpufreq_get_policy - get the current cpufreq_policy + * @policy: struct cpufreq_policy into which the current cpufreq_policy is written + * + * Reads the current cpufreq policy. + */ +int cpufreq_get_policy(struct cpufreq_policy *policy, unsigned int cpu) +{ + struct cpufreq_policy *cpu_policy; + if (!policy) + return -EINVAL; + + cpu_policy = cpufreq_cpu_get(cpu); + if (!cpu_policy) + return -EINVAL; + + memcpy(policy, cpu_policy, sizeof(struct cpufreq_policy)); + + cpufreq_cpu_put(cpu_policy); + return 0; +} +EXPORT_SYMBOL(cpufreq_get_policy); + + +/* + * data : current policy. + * policy : policy to be set. + */ +static int __cpufreq_set_policy(struct cpufreq_policy *data, + struct cpufreq_policy *policy) +{ + int ret = 0; + + cpufreq_debug_disable_ratelimit(); + dprintk("setting new policy for CPU %u: %u - %u kHz\n", policy->cpu, + policy->min, policy->max); + + memcpy(&policy->cpuinfo, &data->cpuinfo, + sizeof(struct cpufreq_cpuinfo)); + + if (policy->min > data->max || policy->max < data->min) { + ret = -EINVAL; + goto error_out; + } + + /* verify the cpu speed can be set within this limit */ + ret = cpufreq_driver->verify(policy); + if (ret) + goto error_out; + + /* adjust if necessary - all reasons */ + blocking_notifier_call_chain(&cpufreq_policy_notifier_list, + CPUFREQ_ADJUST, policy); + + /* adjust if necessary - hardware incompatibility*/ + blocking_notifier_call_chain(&cpufreq_policy_notifier_list, + CPUFREQ_INCOMPATIBLE, policy); + + /* verify the cpu speed can be set within this limit, + which might be different to the first one */ + ret = cpufreq_driver->verify(policy); + if (ret) + goto error_out; + + /* notification of the new policy */ + blocking_notifier_call_chain(&cpufreq_policy_notifier_list, + CPUFREQ_NOTIFY, policy); + + data->min = policy->min; + data->max = policy->max; + + dprintk("new min and max freqs are %u - %u kHz\n", + data->min, data->max); + + if (cpufreq_driver->setpolicy) { + data->policy = policy->policy; + dprintk("setting range\n"); + ret = cpufreq_driver->setpolicy(policy); + } else { + if (policy->governor != data->governor) { + /* save old, working values */ + struct cpufreq_governor *old_gov = data->governor; + + dprintk("governor switch\n"); + + /* end old governor */ + if (data->governor) + __cpufreq_governor(data, CPUFREQ_GOV_STOP); + + /* start new governor */ + data->governor = policy->governor; + if (__cpufreq_governor(data, CPUFREQ_GOV_START)) { + /* new governor failed, so re-start old one */ + dprintk("starting governor %s failed\n", + data->governor->name); + if (old_gov) { + data->governor = old_gov; + __cpufreq_governor(data, + CPUFREQ_GOV_START); + } + ret = -EINVAL; + goto error_out; + } + /* might be a policy change, too, so fall through */ + } + dprintk("governor: change or update limits\n"); + __cpufreq_governor(data, CPUFREQ_GOV_LIMITS); + } + +error_out: + cpufreq_debug_enable_ratelimit(); + return ret; +} + +/** + * cpufreq_update_policy - re-evaluate an existing cpufreq policy + * @cpu: CPU which shall be re-evaluated + * + * Usefull for policy notifiers which have different necessities + * at different times. + */ +int cpufreq_update_policy(unsigned int cpu) +{ + struct cpufreq_policy *data = cpufreq_cpu_get(cpu); + struct cpufreq_policy policy; + int ret; + + if (!data) { + ret = -ENODEV; + goto no_policy; + } + + if (unlikely(lock_policy_rwsem_write(cpu))) { + ret = -EINVAL; + goto fail; + } + + dprintk("updating policy for CPU %u\n", cpu); + memcpy(&policy, data, sizeof(struct cpufreq_policy)); + policy.min = data->user_policy.min; + policy.max = data->user_policy.max; + policy.policy = data->user_policy.policy; + policy.governor = data->user_policy.governor; + + /* BIOS might change freq behind our back + -> ask driver for current freq and notify governors about a change */ + if (cpufreq_driver->get) { + policy.cur = cpufreq_driver->get(cpu); + if (!data->cur) { + dprintk("Driver did not initialize current freq"); + data->cur = policy.cur; + } else { + if (data->cur != policy.cur) + cpufreq_out_of_sync(cpu, data->cur, + policy.cur); + } + } + + ret = __cpufreq_set_policy(data, &policy); + + unlock_policy_rwsem_write(cpu); + +fail: + cpufreq_cpu_put(data); +no_policy: + return ret; +} +EXPORT_SYMBOL(cpufreq_update_policy); + +static int __cpuinit cpufreq_cpu_callback(struct notifier_block *nfb, + unsigned long action, void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + struct sys_device *sys_dev; + + sys_dev = get_cpu_sysdev(cpu); + if (sys_dev) { + switch (action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + cpufreq_add_dev(sys_dev); + break; + case CPU_DOWN_PREPARE: + case CPU_DOWN_PREPARE_FROZEN: + if (unlikely(lock_policy_rwsem_write(cpu))) + BUG(); + + __cpufreq_remove_dev(sys_dev); + break; + case CPU_DOWN_FAILED: + case CPU_DOWN_FAILED_FROZEN: + cpufreq_add_dev(sys_dev); + break; + } + } + return NOTIFY_OK; +} + +static struct notifier_block __refdata cpufreq_cpu_notifier = +{ + .notifier_call = cpufreq_cpu_callback, +}; + +/********************************************************************* + * REGISTER / UNREGISTER CPUFREQ DRIVER * + *********************************************************************/ + +/** + * cpufreq_register_driver - register a CPU Frequency driver + * @driver_data: A struct cpufreq_driver containing the values# + * submitted by the CPU Frequency driver. + * + * Registers a CPU Frequency driver to this core code. This code + * returns zero on success, -EBUSY when another driver got here first + * (and isn't unregistered in the meantime). + * + */ +int cpufreq_register_driver(struct cpufreq_driver *driver_data) +{ + unsigned long flags; + int ret; + + if (!driver_data || !driver_data->verify || !driver_data->init || + ((!driver_data->setpolicy) && (!driver_data->target))) + return -EINVAL; + + dprintk("trying to register driver %s\n", driver_data->name); + + if (driver_data->setpolicy) + driver_data->flags |= CPUFREQ_CONST_LOOPS; + + spin_lock_irqsave(&cpufreq_driver_lock, flags); + if (cpufreq_driver) { + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); + return -EBUSY; + } + cpufreq_driver = driver_data; + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); + + ret = sysdev_driver_register(&cpu_sysdev_class, + &cpufreq_sysdev_driver); + + if ((!ret) && !(cpufreq_driver->flags & CPUFREQ_STICKY)) { + int i; + ret = -ENODEV; + + /* check for at least one working CPU */ + for (i = 0; i < nr_cpu_ids; i++) + if (cpu_possible(i) && per_cpu(cpufreq_cpu_data, i)) { + ret = 0; + break; + } + + /* if all ->init() calls failed, unregister */ + if (ret) { + dprintk("no CPU initialized for driver %s\n", + driver_data->name); + sysdev_driver_unregister(&cpu_sysdev_class, + &cpufreq_sysdev_driver); + + spin_lock_irqsave(&cpufreq_driver_lock, flags); + cpufreq_driver = NULL; + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); + } + } + + if (!ret) { + register_hotcpu_notifier(&cpufreq_cpu_notifier); + dprintk("driver %s up and running\n", driver_data->name); + cpufreq_debug_enable_ratelimit(); + } + + return ret; +} +EXPORT_SYMBOL_GPL(cpufreq_register_driver); + + +/** + * cpufreq_unregister_driver - unregister the current CPUFreq driver + * + * Unregister the current CPUFreq driver. Only call this if you have + * the right to do so, i.e. if you have succeeded in initialising before! + * Returns zero if successful, and -EINVAL if the cpufreq_driver is + * currently not initialised. + */ +int cpufreq_unregister_driver(struct cpufreq_driver *driver) +{ + unsigned long flags; + + cpufreq_debug_disable_ratelimit(); + + if (!cpufreq_driver || (driver != cpufreq_driver)) { + cpufreq_debug_enable_ratelimit(); + return -EINVAL; + } + + dprintk("unregistering driver %s\n", driver->name); + + sysdev_driver_unregister(&cpu_sysdev_class, &cpufreq_sysdev_driver); + unregister_hotcpu_notifier(&cpufreq_cpu_notifier); + + spin_lock_irqsave(&cpufreq_driver_lock, flags); + cpufreq_driver = NULL; + spin_unlock_irqrestore(&cpufreq_driver_lock, flags); + + return 0; +} +EXPORT_SYMBOL_GPL(cpufreq_unregister_driver); + +static int __init cpufreq_core_init(void) +{ + int cpu; + + for_each_possible_cpu(cpu) { + per_cpu(policy_cpu, cpu) = -1; + init_rwsem(&per_cpu(cpu_policy_rwsem, cpu)); + } + return 0; +} + +core_initcall(cpufreq_core_init); diff --git a/drivers/cpufreq/cpufreq_conservative.c b/drivers/cpufreq/cpufreq_conservative.c new file mode 100644 index 0000000..e265783 --- /dev/null +++ b/drivers/cpufreq/cpufreq_conservative.c @@ -0,0 +1,614 @@ +/* + * drivers/cpufreq/cpufreq_conservative.c + * + * Copyright (C) 2001 Russell King + * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>. + * Jun Nakajima <jun.nakajima@intel.com> + * (C) 2004 Alexander Clouter <alex-kernel@digriz.org.uk> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/smp.h> +#include <linux/init.h> +#include <linux/interrupt.h> +#include <linux/ctype.h> +#include <linux/cpufreq.h> +#include <linux/sysctl.h> +#include <linux/types.h> +#include <linux/fs.h> +#include <linux/sysfs.h> +#include <linux/cpu.h> +#include <linux/kmod.h> +#include <linux/workqueue.h> +#include <linux/jiffies.h> +#include <linux/kernel_stat.h> +#include <linux/percpu.h> +#include <linux/mutex.h> +/* + * dbs is used in this file as a shortform for demandbased switching + * It helps to keep variable names smaller, simpler + */ + +#define DEF_FREQUENCY_UP_THRESHOLD (80) +#define DEF_FREQUENCY_DOWN_THRESHOLD (20) + +/* + * The polling frequency of this governor depends on the capability of + * the processor. Default polling frequency is 1000 times the transition + * latency of the processor. The governor will work on any processor with + * transition latency <= 10mS, using appropriate sampling + * rate. + * For CPUs with transition latency > 10mS (mostly drivers + * with CPUFREQ_ETERNAL), this governor will not work. + * All times here are in uS. + */ +static unsigned int def_sampling_rate; +#define MIN_SAMPLING_RATE_RATIO (2) +/* for correct statistics, we need at least 10 ticks between each measure */ +#define MIN_STAT_SAMPLING_RATE \ + (MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10)) +#define MIN_SAMPLING_RATE \ + (def_sampling_rate / MIN_SAMPLING_RATE_RATIO) +#define MAX_SAMPLING_RATE (500 * def_sampling_rate) +#define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER (1000) +#define DEF_SAMPLING_DOWN_FACTOR (1) +#define MAX_SAMPLING_DOWN_FACTOR (10) +#define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) + +static void do_dbs_timer(struct work_struct *work); + +struct cpu_dbs_info_s { + struct cpufreq_policy *cur_policy; + unsigned int prev_cpu_idle_up; + unsigned int prev_cpu_idle_down; + unsigned int enable; + unsigned int down_skip; + unsigned int requested_freq; +}; +static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info); + +static unsigned int dbs_enable; /* number of CPUs using this policy */ + +/* + * DEADLOCK ALERT! There is a ordering requirement between cpu_hotplug + * lock and dbs_mutex. cpu_hotplug lock should always be held before + * dbs_mutex. If any function that can potentially take cpu_hotplug lock + * (like __cpufreq_driver_target()) is being called with dbs_mutex taken, then + * cpu_hotplug lock should be taken before that. Note that cpu_hotplug lock + * is recursive for the same process. -Venki + */ +static DEFINE_MUTEX (dbs_mutex); +static DECLARE_DELAYED_WORK(dbs_work, do_dbs_timer); + +struct dbs_tuners { + unsigned int sampling_rate; + unsigned int sampling_down_factor; + unsigned int up_threshold; + unsigned int down_threshold; + unsigned int ignore_nice; + unsigned int freq_step; +}; + +static struct dbs_tuners dbs_tuners_ins = { + .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, + .down_threshold = DEF_FREQUENCY_DOWN_THRESHOLD, + .sampling_down_factor = DEF_SAMPLING_DOWN_FACTOR, + .ignore_nice = 0, + .freq_step = 5, +}; + +static inline unsigned int get_cpu_idle_time(unsigned int cpu) +{ + unsigned int add_nice = 0, ret; + + if (dbs_tuners_ins.ignore_nice) + add_nice = kstat_cpu(cpu).cpustat.nice; + + ret = kstat_cpu(cpu).cpustat.idle + + kstat_cpu(cpu).cpustat.iowait + + add_nice; + + return ret; +} + +/* keep track of frequency transitions */ +static int +dbs_cpufreq_notifier(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct cpufreq_freqs *freq = data; + struct cpu_dbs_info_s *this_dbs_info = &per_cpu(cpu_dbs_info, + freq->cpu); + + if (!this_dbs_info->enable) + return 0; + + this_dbs_info->requested_freq = freq->new; + + return 0; +} + +static struct notifier_block dbs_cpufreq_notifier_block = { + .notifier_call = dbs_cpufreq_notifier +}; + +/************************** sysfs interface ************************/ +static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf) +{ + return sprintf (buf, "%u\n", MAX_SAMPLING_RATE); +} + +static ssize_t show_sampling_rate_min(struct cpufreq_policy *policy, char *buf) +{ + return sprintf (buf, "%u\n", MIN_SAMPLING_RATE); +} + +#define define_one_ro(_name) \ +static struct freq_attr _name = \ +__ATTR(_name, 0444, show_##_name, NULL) + +define_one_ro(sampling_rate_max); +define_one_ro(sampling_rate_min); + +/* cpufreq_conservative Governor Tunables */ +#define show_one(file_name, object) \ +static ssize_t show_##file_name \ +(struct cpufreq_policy *unused, char *buf) \ +{ \ + return sprintf(buf, "%u\n", dbs_tuners_ins.object); \ +} +show_one(sampling_rate, sampling_rate); +show_one(sampling_down_factor, sampling_down_factor); +show_one(up_threshold, up_threshold); +show_one(down_threshold, down_threshold); +show_one(ignore_nice_load, ignore_nice); +show_one(freq_step, freq_step); + +static ssize_t store_sampling_down_factor(struct cpufreq_policy *unused, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf (buf, "%u", &input); + if (ret != 1 || input > MAX_SAMPLING_DOWN_FACTOR || input < 1) + return -EINVAL; + + mutex_lock(&dbs_mutex); + dbs_tuners_ins.sampling_down_factor = input; + mutex_unlock(&dbs_mutex); + + return count; +} + +static ssize_t store_sampling_rate(struct cpufreq_policy *unused, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf (buf, "%u", &input); + + mutex_lock(&dbs_mutex); + if (ret != 1 || input > MAX_SAMPLING_RATE || input < MIN_SAMPLING_RATE) { + mutex_unlock(&dbs_mutex); + return -EINVAL; + } + + dbs_tuners_ins.sampling_rate = input; + mutex_unlock(&dbs_mutex); + + return count; +} + +static ssize_t store_up_threshold(struct cpufreq_policy *unused, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf (buf, "%u", &input); + + mutex_lock(&dbs_mutex); + if (ret != 1 || input > 100 || input <= dbs_tuners_ins.down_threshold) { + mutex_unlock(&dbs_mutex); + return -EINVAL; + } + + dbs_tuners_ins.up_threshold = input; + mutex_unlock(&dbs_mutex); + + return count; +} + +static ssize_t store_down_threshold(struct cpufreq_policy *unused, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf (buf, "%u", &input); + + mutex_lock(&dbs_mutex); + if (ret != 1 || input > 100 || input >= dbs_tuners_ins.up_threshold) { + mutex_unlock(&dbs_mutex); + return -EINVAL; + } + + dbs_tuners_ins.down_threshold = input; + mutex_unlock(&dbs_mutex); + + return count; +} + +static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + + unsigned int j; + + ret = sscanf(buf, "%u", &input); + if (ret != 1) + return -EINVAL; + + if (input > 1) + input = 1; + + mutex_lock(&dbs_mutex); + if (input == dbs_tuners_ins.ignore_nice) { /* nothing to do */ + mutex_unlock(&dbs_mutex); + return count; + } + dbs_tuners_ins.ignore_nice = input; + + /* we need to re-evaluate prev_cpu_idle_up and prev_cpu_idle_down */ + for_each_online_cpu(j) { + struct cpu_dbs_info_s *j_dbs_info; + j_dbs_info = &per_cpu(cpu_dbs_info, j); + j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(j); + j_dbs_info->prev_cpu_idle_down = j_dbs_info->prev_cpu_idle_up; + } + mutex_unlock(&dbs_mutex); + + return count; +} + +static ssize_t store_freq_step(struct cpufreq_policy *policy, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + + ret = sscanf(buf, "%u", &input); + + if (ret != 1) + return -EINVAL; + + if (input > 100) + input = 100; + + /* no need to test here if freq_step is zero as the user might actually + * want this, they would be crazy though :) */ + mutex_lock(&dbs_mutex); + dbs_tuners_ins.freq_step = input; + mutex_unlock(&dbs_mutex); + + return count; +} + +#define define_one_rw(_name) \ +static struct freq_attr _name = \ +__ATTR(_name, 0644, show_##_name, store_##_name) + +define_one_rw(sampling_rate); +define_one_rw(sampling_down_factor); +define_one_rw(up_threshold); +define_one_rw(down_threshold); +define_one_rw(ignore_nice_load); +define_one_rw(freq_step); + +static struct attribute * dbs_attributes[] = { + &sampling_rate_max.attr, + &sampling_rate_min.attr, + &sampling_rate.attr, + &sampling_down_factor.attr, + &up_threshold.attr, + &down_threshold.attr, + &ignore_nice_load.attr, + &freq_step.attr, + NULL +}; + +static struct attribute_group dbs_attr_group = { + .attrs = dbs_attributes, + .name = "conservative", +}; + +/************************** sysfs end ************************/ + +static void dbs_check_cpu(int cpu) +{ + unsigned int idle_ticks, up_idle_ticks, down_idle_ticks; + unsigned int tmp_idle_ticks, total_idle_ticks; + unsigned int freq_target; + unsigned int freq_down_sampling_rate; + struct cpu_dbs_info_s *this_dbs_info = &per_cpu(cpu_dbs_info, cpu); + struct cpufreq_policy *policy; + + if (!this_dbs_info->enable) + return; + + policy = this_dbs_info->cur_policy; + + /* + * The default safe range is 20% to 80% + * Every sampling_rate, we check + * - If current idle time is less than 20%, then we try to + * increase frequency + * Every sampling_rate*sampling_down_factor, we check + * - If current idle time is more than 80%, then we try to + * decrease frequency + * + * Any frequency increase takes it to the maximum frequency. + * Frequency reduction happens at minimum steps of + * 5% (default) of max_frequency + */ + + /* Check for frequency increase */ + idle_ticks = UINT_MAX; + + /* Check for frequency increase */ + total_idle_ticks = get_cpu_idle_time(cpu); + tmp_idle_ticks = total_idle_ticks - + this_dbs_info->prev_cpu_idle_up; + this_dbs_info->prev_cpu_idle_up = total_idle_ticks; + + if (tmp_idle_ticks < idle_ticks) + idle_ticks = tmp_idle_ticks; + + /* Scale idle ticks by 100 and compare with up and down ticks */ + idle_ticks *= 100; + up_idle_ticks = (100 - dbs_tuners_ins.up_threshold) * + usecs_to_jiffies(dbs_tuners_ins.sampling_rate); + + if (idle_ticks < up_idle_ticks) { + this_dbs_info->down_skip = 0; + this_dbs_info->prev_cpu_idle_down = + this_dbs_info->prev_cpu_idle_up; + + /* if we are already at full speed then break out early */ + if (this_dbs_info->requested_freq == policy->max) + return; + + freq_target = (dbs_tuners_ins.freq_step * policy->max) / 100; + + /* max freq cannot be less than 100. But who knows.... */ + if (unlikely(freq_target == 0)) + freq_target = 5; + + this_dbs_info->requested_freq += freq_target; + if (this_dbs_info->requested_freq > policy->max) + this_dbs_info->requested_freq = policy->max; + + __cpufreq_driver_target(policy, this_dbs_info->requested_freq, + CPUFREQ_RELATION_H); + return; + } + + /* Check for frequency decrease */ + this_dbs_info->down_skip++; + if (this_dbs_info->down_skip < dbs_tuners_ins.sampling_down_factor) + return; + + /* Check for frequency decrease */ + total_idle_ticks = this_dbs_info->prev_cpu_idle_up; + tmp_idle_ticks = total_idle_ticks - + this_dbs_info->prev_cpu_idle_down; + this_dbs_info->prev_cpu_idle_down = total_idle_ticks; + + if (tmp_idle_ticks < idle_ticks) + idle_ticks = tmp_idle_ticks; + + /* Scale idle ticks by 100 and compare with up and down ticks */ + idle_ticks *= 100; + this_dbs_info->down_skip = 0; + + freq_down_sampling_rate = dbs_tuners_ins.sampling_rate * + dbs_tuners_ins.sampling_down_factor; + down_idle_ticks = (100 - dbs_tuners_ins.down_threshold) * + usecs_to_jiffies(freq_down_sampling_rate); + + if (idle_ticks > down_idle_ticks) { + /* + * if we are already at the lowest speed then break out early + * or if we 'cannot' reduce the speed as the user might want + * freq_target to be zero + */ + if (this_dbs_info->requested_freq == policy->min + || dbs_tuners_ins.freq_step == 0) + return; + + freq_target = (dbs_tuners_ins.freq_step * policy->max) / 100; + + /* max freq cannot be less than 100. But who knows.... */ + if (unlikely(freq_target == 0)) + freq_target = 5; + + this_dbs_info->requested_freq -= freq_target; + if (this_dbs_info->requested_freq < policy->min) + this_dbs_info->requested_freq = policy->min; + + __cpufreq_driver_target(policy, this_dbs_info->requested_freq, + CPUFREQ_RELATION_H); + return; + } +} + +static void do_dbs_timer(struct work_struct *work) +{ + int i; + mutex_lock(&dbs_mutex); + for_each_online_cpu(i) + dbs_check_cpu(i); + schedule_delayed_work(&dbs_work, + usecs_to_jiffies(dbs_tuners_ins.sampling_rate)); + mutex_unlock(&dbs_mutex); +} + +static inline void dbs_timer_init(void) +{ + init_timer_deferrable(&dbs_work.timer); + schedule_delayed_work(&dbs_work, + usecs_to_jiffies(dbs_tuners_ins.sampling_rate)); + return; +} + +static inline void dbs_timer_exit(void) +{ + cancel_delayed_work(&dbs_work); + return; +} + +static int cpufreq_governor_dbs(struct cpufreq_policy *policy, + unsigned int event) +{ + unsigned int cpu = policy->cpu; + struct cpu_dbs_info_s *this_dbs_info; + unsigned int j; + int rc; + + this_dbs_info = &per_cpu(cpu_dbs_info, cpu); + + switch (event) { + case CPUFREQ_GOV_START: + if ((!cpu_online(cpu)) || (!policy->cur)) + return -EINVAL; + + if (this_dbs_info->enable) /* Already enabled */ + break; + + mutex_lock(&dbs_mutex); + + rc = sysfs_create_group(&policy->kobj, &dbs_attr_group); + if (rc) { + mutex_unlock(&dbs_mutex); + return rc; + } + + for_each_cpu_mask_nr(j, policy->cpus) { + struct cpu_dbs_info_s *j_dbs_info; + j_dbs_info = &per_cpu(cpu_dbs_info, j); + j_dbs_info->cur_policy = policy; + + j_dbs_info->prev_cpu_idle_up = get_cpu_idle_time(cpu); + j_dbs_info->prev_cpu_idle_down + = j_dbs_info->prev_cpu_idle_up; + } + this_dbs_info->enable = 1; + this_dbs_info->down_skip = 0; + this_dbs_info->requested_freq = policy->cur; + + dbs_enable++; + /* + * Start the timerschedule work, when this governor + * is used for first time + */ + if (dbs_enable == 1) { + unsigned int latency; + /* policy latency is in nS. Convert it to uS first */ + latency = policy->cpuinfo.transition_latency / 1000; + if (latency == 0) + latency = 1; + + def_sampling_rate = 10 * latency * + DEF_SAMPLING_RATE_LATENCY_MULTIPLIER; + + if (def_sampling_rate < MIN_STAT_SAMPLING_RATE) + def_sampling_rate = MIN_STAT_SAMPLING_RATE; + + dbs_tuners_ins.sampling_rate = def_sampling_rate; + + dbs_timer_init(); + cpufreq_register_notifier( + &dbs_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + } + + mutex_unlock(&dbs_mutex); + break; + + case CPUFREQ_GOV_STOP: + mutex_lock(&dbs_mutex); + this_dbs_info->enable = 0; + sysfs_remove_group(&policy->kobj, &dbs_attr_group); + dbs_enable--; + /* + * Stop the timerschedule work, when this governor + * is used for first time + */ + if (dbs_enable == 0) { + dbs_timer_exit(); + cpufreq_unregister_notifier( + &dbs_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + } + + mutex_unlock(&dbs_mutex); + + break; + + case CPUFREQ_GOV_LIMITS: + mutex_lock(&dbs_mutex); + if (policy->max < this_dbs_info->cur_policy->cur) + __cpufreq_driver_target( + this_dbs_info->cur_policy, + policy->max, CPUFREQ_RELATION_H); + else if (policy->min > this_dbs_info->cur_policy->cur) + __cpufreq_driver_target( + this_dbs_info->cur_policy, + policy->min, CPUFREQ_RELATION_L); + mutex_unlock(&dbs_mutex); + break; + } + return 0; +} + +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE +static +#endif +struct cpufreq_governor cpufreq_gov_conservative = { + .name = "conservative", + .governor = cpufreq_governor_dbs, + .max_transition_latency = TRANSITION_LATENCY_LIMIT, + .owner = THIS_MODULE, +}; + +static int __init cpufreq_gov_dbs_init(void) +{ + return cpufreq_register_governor(&cpufreq_gov_conservative); +} + +static void __exit cpufreq_gov_dbs_exit(void) +{ + /* Make sure that the scheduled work is indeed not running */ + flush_scheduled_work(); + + cpufreq_unregister_governor(&cpufreq_gov_conservative); +} + + +MODULE_AUTHOR ("Alexander Clouter <alex-kernel@digriz.org.uk>"); +MODULE_DESCRIPTION ("'cpufreq_conservative' - A dynamic cpufreq governor for " + "Low Latency Frequency Transition capable processors " + "optimised for use in a battery environment"); +MODULE_LICENSE ("GPL"); + +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_CONSERVATIVE +fs_initcall(cpufreq_gov_dbs_init); +#else +module_init(cpufreq_gov_dbs_init); +#endif +module_exit(cpufreq_gov_dbs_exit); diff --git a/drivers/cpufreq/cpufreq_ondemand.c b/drivers/cpufreq/cpufreq_ondemand.c new file mode 100644 index 0000000..2ab3c12 --- /dev/null +++ b/drivers/cpufreq/cpufreq_ondemand.c @@ -0,0 +1,685 @@ +/* + * drivers/cpufreq/cpufreq_ondemand.c + * + * Copyright (C) 2001 Russell King + * (C) 2003 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>. + * Jun Nakajima <jun.nakajima@intel.com> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/cpufreq.h> +#include <linux/cpu.h> +#include <linux/jiffies.h> +#include <linux/kernel_stat.h> +#include <linux/mutex.h> +#include <linux/hrtimer.h> +#include <linux/tick.h> +#include <linux/ktime.h> + +/* + * dbs is used in this file as a shortform for demandbased switching + * It helps to keep variable names smaller, simpler + */ + +#define DEF_FREQUENCY_DOWN_DIFFERENTIAL (10) +#define DEF_FREQUENCY_UP_THRESHOLD (80) +#define MICRO_FREQUENCY_DOWN_DIFFERENTIAL (3) +#define MICRO_FREQUENCY_UP_THRESHOLD (95) +#define MIN_FREQUENCY_UP_THRESHOLD (11) +#define MAX_FREQUENCY_UP_THRESHOLD (100) + +/* + * The polling frequency of this governor depends on the capability of + * the processor. Default polling frequency is 1000 times the transition + * latency of the processor. The governor will work on any processor with + * transition latency <= 10mS, using appropriate sampling + * rate. + * For CPUs with transition latency > 10mS (mostly drivers with CPUFREQ_ETERNAL) + * this governor will not work. + * All times here are in uS. + */ +static unsigned int def_sampling_rate; +#define MIN_SAMPLING_RATE_RATIO (2) +/* for correct statistics, we need at least 10 ticks between each measure */ +#define MIN_STAT_SAMPLING_RATE \ + (MIN_SAMPLING_RATE_RATIO * jiffies_to_usecs(10)) +#define MIN_SAMPLING_RATE \ + (def_sampling_rate / MIN_SAMPLING_RATE_RATIO) +#define MAX_SAMPLING_RATE (500 * def_sampling_rate) +#define DEF_SAMPLING_RATE_LATENCY_MULTIPLIER (1000) +#define TRANSITION_LATENCY_LIMIT (10 * 1000 * 1000) + +static void do_dbs_timer(struct work_struct *work); + +/* Sampling types */ +enum {DBS_NORMAL_SAMPLE, DBS_SUB_SAMPLE}; + +struct cpu_dbs_info_s { + cputime64_t prev_cpu_idle; + cputime64_t prev_cpu_wall; + cputime64_t prev_cpu_nice; + struct cpufreq_policy *cur_policy; + struct delayed_work work; + struct cpufreq_frequency_table *freq_table; + unsigned int freq_lo; + unsigned int freq_lo_jiffies; + unsigned int freq_hi_jiffies; + int cpu; + unsigned int enable:1, + sample_type:1; +}; +static DEFINE_PER_CPU(struct cpu_dbs_info_s, cpu_dbs_info); + +static unsigned int dbs_enable; /* number of CPUs using this policy */ + +/* + * DEADLOCK ALERT! There is a ordering requirement between cpu_hotplug + * lock and dbs_mutex. cpu_hotplug lock should always be held before + * dbs_mutex. If any function that can potentially take cpu_hotplug lock + * (like __cpufreq_driver_target()) is being called with dbs_mutex taken, then + * cpu_hotplug lock should be taken before that. Note that cpu_hotplug lock + * is recursive for the same process. -Venki + */ +static DEFINE_MUTEX(dbs_mutex); + +static struct workqueue_struct *kondemand_wq; + +static struct dbs_tuners { + unsigned int sampling_rate; + unsigned int up_threshold; + unsigned int down_differential; + unsigned int ignore_nice; + unsigned int powersave_bias; +} dbs_tuners_ins = { + .up_threshold = DEF_FREQUENCY_UP_THRESHOLD, + .down_differential = DEF_FREQUENCY_DOWN_DIFFERENTIAL, + .ignore_nice = 0, + .powersave_bias = 0, +}; + +static inline cputime64_t get_cpu_idle_time_jiffy(unsigned int cpu, + cputime64_t *wall) +{ + cputime64_t idle_time; + cputime64_t cur_wall_time; + cputime64_t busy_time; + + cur_wall_time = jiffies64_to_cputime64(get_jiffies_64()); + busy_time = cputime64_add(kstat_cpu(cpu).cpustat.user, + kstat_cpu(cpu).cpustat.system); + + busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.irq); + busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.softirq); + busy_time = cputime64_add(busy_time, kstat_cpu(cpu).cpustat.steal); + + if (!dbs_tuners_ins.ignore_nice) { + busy_time = cputime64_add(busy_time, + kstat_cpu(cpu).cpustat.nice); + } + + idle_time = cputime64_sub(cur_wall_time, busy_time); + if (wall) + *wall = cur_wall_time; + + return idle_time; +} + +static inline cputime64_t get_cpu_idle_time(unsigned int cpu, cputime64_t *wall) +{ + u64 idle_time = get_cpu_idle_time_us(cpu, wall); + + if (idle_time == -1ULL) + return get_cpu_idle_time_jiffy(cpu, wall); + + if (dbs_tuners_ins.ignore_nice) { + cputime64_t cur_nice; + unsigned long cur_nice_jiffies; + struct cpu_dbs_info_s *dbs_info; + + dbs_info = &per_cpu(cpu_dbs_info, cpu); + cur_nice = cputime64_sub(kstat_cpu(cpu).cpustat.nice, + dbs_info->prev_cpu_nice); + /* + * Assumption: nice time between sampling periods will be + * less than 2^32 jiffies for 32 bit sys + */ + cur_nice_jiffies = (unsigned long) + cputime64_to_jiffies64(cur_nice); + dbs_info->prev_cpu_nice = kstat_cpu(cpu).cpustat.nice; + return idle_time + jiffies_to_usecs(cur_nice_jiffies); + } + return idle_time; +} + +/* + * Find right freq to be set now with powersave_bias on. + * Returns the freq_hi to be used right now and will set freq_hi_jiffies, + * freq_lo, and freq_lo_jiffies in percpu area for averaging freqs. + */ +static unsigned int powersave_bias_target(struct cpufreq_policy *policy, + unsigned int freq_next, + unsigned int relation) +{ + unsigned int freq_req, freq_reduc, freq_avg; + unsigned int freq_hi, freq_lo; + unsigned int index = 0; + unsigned int jiffies_total, jiffies_hi, jiffies_lo; + struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, policy->cpu); + + if (!dbs_info->freq_table) { + dbs_info->freq_lo = 0; + dbs_info->freq_lo_jiffies = 0; + return freq_next; + } + + cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_next, + relation, &index); + freq_req = dbs_info->freq_table[index].frequency; + freq_reduc = freq_req * dbs_tuners_ins.powersave_bias / 1000; + freq_avg = freq_req - freq_reduc; + + /* Find freq bounds for freq_avg in freq_table */ + index = 0; + cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg, + CPUFREQ_RELATION_H, &index); + freq_lo = dbs_info->freq_table[index].frequency; + index = 0; + cpufreq_frequency_table_target(policy, dbs_info->freq_table, freq_avg, + CPUFREQ_RELATION_L, &index); + freq_hi = dbs_info->freq_table[index].frequency; + + /* Find out how long we have to be in hi and lo freqs */ + if (freq_hi == freq_lo) { + dbs_info->freq_lo = 0; + dbs_info->freq_lo_jiffies = 0; + return freq_lo; + } + jiffies_total = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); + jiffies_hi = (freq_avg - freq_lo) * jiffies_total; + jiffies_hi += ((freq_hi - freq_lo) / 2); + jiffies_hi /= (freq_hi - freq_lo); + jiffies_lo = jiffies_total - jiffies_hi; + dbs_info->freq_lo = freq_lo; + dbs_info->freq_lo_jiffies = jiffies_lo; + dbs_info->freq_hi_jiffies = jiffies_hi; + return freq_hi; +} + +static void ondemand_powersave_bias_init(void) +{ + int i; + for_each_online_cpu(i) { + struct cpu_dbs_info_s *dbs_info = &per_cpu(cpu_dbs_info, i); + dbs_info->freq_table = cpufreq_frequency_get_table(i); + dbs_info->freq_lo = 0; + } +} + +/************************** sysfs interface ************************/ +static ssize_t show_sampling_rate_max(struct cpufreq_policy *policy, char *buf) +{ + return sprintf (buf, "%u\n", MAX_SAMPLING_RATE); +} + +static ssize_t show_sampling_rate_min(struct cpufreq_policy *policy, char *buf) +{ + return sprintf (buf, "%u\n", MIN_SAMPLING_RATE); +} + +#define define_one_ro(_name) \ +static struct freq_attr _name = \ +__ATTR(_name, 0444, show_##_name, NULL) + +define_one_ro(sampling_rate_max); +define_one_ro(sampling_rate_min); + +/* cpufreq_ondemand Governor Tunables */ +#define show_one(file_name, object) \ +static ssize_t show_##file_name \ +(struct cpufreq_policy *unused, char *buf) \ +{ \ + return sprintf(buf, "%u\n", dbs_tuners_ins.object); \ +} +show_one(sampling_rate, sampling_rate); +show_one(up_threshold, up_threshold); +show_one(ignore_nice_load, ignore_nice); +show_one(powersave_bias, powersave_bias); + +static ssize_t store_sampling_rate(struct cpufreq_policy *unused, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + mutex_lock(&dbs_mutex); + if (ret != 1 || input > MAX_SAMPLING_RATE + || input < MIN_SAMPLING_RATE) { + mutex_unlock(&dbs_mutex); + return -EINVAL; + } + + dbs_tuners_ins.sampling_rate = input; + mutex_unlock(&dbs_mutex); + + return count; +} + +static ssize_t store_up_threshold(struct cpufreq_policy *unused, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + mutex_lock(&dbs_mutex); + if (ret != 1 || input > MAX_FREQUENCY_UP_THRESHOLD || + input < MIN_FREQUENCY_UP_THRESHOLD) { + mutex_unlock(&dbs_mutex); + return -EINVAL; + } + + dbs_tuners_ins.up_threshold = input; + mutex_unlock(&dbs_mutex); + + return count; +} + +static ssize_t store_ignore_nice_load(struct cpufreq_policy *policy, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + + unsigned int j; + + ret = sscanf(buf, "%u", &input); + if ( ret != 1 ) + return -EINVAL; + + if ( input > 1 ) + input = 1; + + mutex_lock(&dbs_mutex); + if ( input == dbs_tuners_ins.ignore_nice ) { /* nothing to do */ + mutex_unlock(&dbs_mutex); + return count; + } + dbs_tuners_ins.ignore_nice = input; + + /* we need to re-evaluate prev_cpu_idle */ + for_each_online_cpu(j) { + struct cpu_dbs_info_s *dbs_info; + dbs_info = &per_cpu(cpu_dbs_info, j); + dbs_info->prev_cpu_idle = get_cpu_idle_time(j, + &dbs_info->prev_cpu_wall); + } + mutex_unlock(&dbs_mutex); + + return count; +} + +static ssize_t store_powersave_bias(struct cpufreq_policy *unused, + const char *buf, size_t count) +{ + unsigned int input; + int ret; + ret = sscanf(buf, "%u", &input); + + if (ret != 1) + return -EINVAL; + + if (input > 1000) + input = 1000; + + mutex_lock(&dbs_mutex); + dbs_tuners_ins.powersave_bias = input; + ondemand_powersave_bias_init(); + mutex_unlock(&dbs_mutex); + + return count; +} + +#define define_one_rw(_name) \ +static struct freq_attr _name = \ +__ATTR(_name, 0644, show_##_name, store_##_name) + +define_one_rw(sampling_rate); +define_one_rw(up_threshold); +define_one_rw(ignore_nice_load); +define_one_rw(powersave_bias); + +static struct attribute * dbs_attributes[] = { + &sampling_rate_max.attr, + &sampling_rate_min.attr, + &sampling_rate.attr, + &up_threshold.attr, + &ignore_nice_load.attr, + &powersave_bias.attr, + NULL +}; + +static struct attribute_group dbs_attr_group = { + .attrs = dbs_attributes, + .name = "ondemand", +}; + +/************************** sysfs end ************************/ + +static void dbs_check_cpu(struct cpu_dbs_info_s *this_dbs_info) +{ + unsigned int max_load_freq; + + struct cpufreq_policy *policy; + unsigned int j; + + if (!this_dbs_info->enable) + return; + + this_dbs_info->freq_lo = 0; + policy = this_dbs_info->cur_policy; + + /* + * Every sampling_rate, we check, if current idle time is less + * than 20% (default), then we try to increase frequency + * Every sampling_rate, we look for a the lowest + * frequency which can sustain the load while keeping idle time over + * 30%. If such a frequency exist, we try to decrease to this frequency. + * + * Any frequency increase takes it to the maximum frequency. + * Frequency reduction happens at minimum steps of + * 5% (default) of current frequency + */ + + /* Get Absolute Load - in terms of freq */ + max_load_freq = 0; + + for_each_cpu_mask_nr(j, policy->cpus) { + struct cpu_dbs_info_s *j_dbs_info; + cputime64_t cur_wall_time, cur_idle_time; + unsigned int idle_time, wall_time; + unsigned int load, load_freq; + int freq_avg; + + j_dbs_info = &per_cpu(cpu_dbs_info, j); + + cur_idle_time = get_cpu_idle_time(j, &cur_wall_time); + + wall_time = (unsigned int) cputime64_sub(cur_wall_time, + j_dbs_info->prev_cpu_wall); + j_dbs_info->prev_cpu_wall = cur_wall_time; + + idle_time = (unsigned int) cputime64_sub(cur_idle_time, + j_dbs_info->prev_cpu_idle); + j_dbs_info->prev_cpu_idle = cur_idle_time; + + if (unlikely(!wall_time || wall_time < idle_time)) + continue; + + load = 100 * (wall_time - idle_time) / wall_time; + + freq_avg = __cpufreq_driver_getavg(policy, j); + if (freq_avg <= 0) + freq_avg = policy->cur; + + load_freq = load * freq_avg; + if (load_freq > max_load_freq) + max_load_freq = load_freq; + } + + /* Check for frequency increase */ + if (max_load_freq > dbs_tuners_ins.up_threshold * policy->cur) { + /* if we are already at full speed then break out early */ + if (!dbs_tuners_ins.powersave_bias) { + if (policy->cur == policy->max) + return; + + __cpufreq_driver_target(policy, policy->max, + CPUFREQ_RELATION_H); + } else { + int freq = powersave_bias_target(policy, policy->max, + CPUFREQ_RELATION_H); + __cpufreq_driver_target(policy, freq, + CPUFREQ_RELATION_L); + } + return; + } + + /* Check for frequency decrease */ + /* if we cannot reduce the frequency anymore, break out early */ + if (policy->cur == policy->min) + return; + + /* + * The optimal frequency is the frequency that is the lowest that + * can support the current CPU usage without triggering the up + * policy. To be safe, we focus 10 points under the threshold. + */ + if (max_load_freq < + (dbs_tuners_ins.up_threshold - dbs_tuners_ins.down_differential) * + policy->cur) { + unsigned int freq_next; + freq_next = max_load_freq / + (dbs_tuners_ins.up_threshold - + dbs_tuners_ins.down_differential); + + if (!dbs_tuners_ins.powersave_bias) { + __cpufreq_driver_target(policy, freq_next, + CPUFREQ_RELATION_L); + } else { + int freq = powersave_bias_target(policy, freq_next, + CPUFREQ_RELATION_L); + __cpufreq_driver_target(policy, freq, + CPUFREQ_RELATION_L); + } + } +} + +static void do_dbs_timer(struct work_struct *work) +{ + struct cpu_dbs_info_s *dbs_info = + container_of(work, struct cpu_dbs_info_s, work.work); + unsigned int cpu = dbs_info->cpu; + int sample_type = dbs_info->sample_type; + + /* We want all CPUs to do sampling nearly on same jiffy */ + int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); + + delay -= jiffies % delay; + + if (lock_policy_rwsem_write(cpu) < 0) + return; + + if (!dbs_info->enable) { + unlock_policy_rwsem_write(cpu); + return; + } + + /* Common NORMAL_SAMPLE setup */ + dbs_info->sample_type = DBS_NORMAL_SAMPLE; + if (!dbs_tuners_ins.powersave_bias || + sample_type == DBS_NORMAL_SAMPLE) { + dbs_check_cpu(dbs_info); + if (dbs_info->freq_lo) { + /* Setup timer for SUB_SAMPLE */ + dbs_info->sample_type = DBS_SUB_SAMPLE; + delay = dbs_info->freq_hi_jiffies; + } + } else { + __cpufreq_driver_target(dbs_info->cur_policy, + dbs_info->freq_lo, + CPUFREQ_RELATION_H); + } + queue_delayed_work_on(cpu, kondemand_wq, &dbs_info->work, delay); + unlock_policy_rwsem_write(cpu); +} + +static inline void dbs_timer_init(struct cpu_dbs_info_s *dbs_info) +{ + /* We want all CPUs to do sampling nearly on same jiffy */ + int delay = usecs_to_jiffies(dbs_tuners_ins.sampling_rate); + delay -= jiffies % delay; + + dbs_info->enable = 1; + ondemand_powersave_bias_init(); + dbs_info->sample_type = DBS_NORMAL_SAMPLE; + INIT_DELAYED_WORK_DEFERRABLE(&dbs_info->work, do_dbs_timer); + queue_delayed_work_on(dbs_info->cpu, kondemand_wq, &dbs_info->work, + delay); +} + +static inline void dbs_timer_exit(struct cpu_dbs_info_s *dbs_info) +{ + dbs_info->enable = 0; + cancel_delayed_work(&dbs_info->work); +} + +static int cpufreq_governor_dbs(struct cpufreq_policy *policy, + unsigned int event) +{ + unsigned int cpu = policy->cpu; + struct cpu_dbs_info_s *this_dbs_info; + unsigned int j; + int rc; + + this_dbs_info = &per_cpu(cpu_dbs_info, cpu); + + switch (event) { + case CPUFREQ_GOV_START: + if ((!cpu_online(cpu)) || (!policy->cur)) + return -EINVAL; + + if (this_dbs_info->enable) /* Already enabled */ + break; + + mutex_lock(&dbs_mutex); + dbs_enable++; + + rc = sysfs_create_group(&policy->kobj, &dbs_attr_group); + if (rc) { + dbs_enable--; + mutex_unlock(&dbs_mutex); + return rc; + } + + for_each_cpu_mask_nr(j, policy->cpus) { + struct cpu_dbs_info_s *j_dbs_info; + j_dbs_info = &per_cpu(cpu_dbs_info, j); + j_dbs_info->cur_policy = policy; + + j_dbs_info->prev_cpu_idle = get_cpu_idle_time(j, + &j_dbs_info->prev_cpu_wall); + } + this_dbs_info->cpu = cpu; + /* + * Start the timerschedule work, when this governor + * is used for first time + */ + if (dbs_enable == 1) { + unsigned int latency; + /* policy latency is in nS. Convert it to uS first */ + latency = policy->cpuinfo.transition_latency / 1000; + if (latency == 0) + latency = 1; + + def_sampling_rate = latency * + DEF_SAMPLING_RATE_LATENCY_MULTIPLIER; + + if (def_sampling_rate < MIN_STAT_SAMPLING_RATE) + def_sampling_rate = MIN_STAT_SAMPLING_RATE; + + dbs_tuners_ins.sampling_rate = def_sampling_rate; + } + dbs_timer_init(this_dbs_info); + + mutex_unlock(&dbs_mutex); + break; + + case CPUFREQ_GOV_STOP: + mutex_lock(&dbs_mutex); + dbs_timer_exit(this_dbs_info); + sysfs_remove_group(&policy->kobj, &dbs_attr_group); + dbs_enable--; + mutex_unlock(&dbs_mutex); + + break; + + case CPUFREQ_GOV_LIMITS: + mutex_lock(&dbs_mutex); + if (policy->max < this_dbs_info->cur_policy->cur) + __cpufreq_driver_target(this_dbs_info->cur_policy, + policy->max, + CPUFREQ_RELATION_H); + else if (policy->min > this_dbs_info->cur_policy->cur) + __cpufreq_driver_target(this_dbs_info->cur_policy, + policy->min, + CPUFREQ_RELATION_L); + mutex_unlock(&dbs_mutex); + break; + } + return 0; +} + +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND +static +#endif +struct cpufreq_governor cpufreq_gov_ondemand = { + .name = "ondemand", + .governor = cpufreq_governor_dbs, + .max_transition_latency = TRANSITION_LATENCY_LIMIT, + .owner = THIS_MODULE, +}; + +static int __init cpufreq_gov_dbs_init(void) +{ + int err; + cputime64_t wall; + u64 idle_time; + int cpu = get_cpu(); + + idle_time = get_cpu_idle_time_us(cpu, &wall); + put_cpu(); + if (idle_time != -1ULL) { + /* Idle micro accounting is supported. Use finer thresholds */ + dbs_tuners_ins.up_threshold = MICRO_FREQUENCY_UP_THRESHOLD; + dbs_tuners_ins.down_differential = + MICRO_FREQUENCY_DOWN_DIFFERENTIAL; + } + + kondemand_wq = create_workqueue("kondemand"); + if (!kondemand_wq) { + printk(KERN_ERR "Creation of kondemand failed\n"); + return -EFAULT; + } + err = cpufreq_register_governor(&cpufreq_gov_ondemand); + if (err) + destroy_workqueue(kondemand_wq); + + return err; +} + +static void __exit cpufreq_gov_dbs_exit(void) +{ + cpufreq_unregister_governor(&cpufreq_gov_ondemand); + destroy_workqueue(kondemand_wq); +} + + +MODULE_AUTHOR("Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>"); +MODULE_AUTHOR("Alexey Starikovskiy <alexey.y.starikovskiy@intel.com>"); +MODULE_DESCRIPTION("'cpufreq_ondemand' - A dynamic cpufreq governor for " + "Low Latency Frequency Transition capable processors"); +MODULE_LICENSE("GPL"); + +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_ONDEMAND +fs_initcall(cpufreq_gov_dbs_init); +#else +module_init(cpufreq_gov_dbs_init); +#endif +module_exit(cpufreq_gov_dbs_exit); diff --git a/drivers/cpufreq/cpufreq_performance.c b/drivers/cpufreq/cpufreq_performance.c new file mode 100644 index 0000000..7e2e515 --- /dev/null +++ b/drivers/cpufreq/cpufreq_performance.c @@ -0,0 +1,66 @@ +/* + * linux/drivers/cpufreq/cpufreq_performance.c + * + * Copyright (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de> + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/cpufreq.h> +#include <linux/init.h> + +#define dprintk(msg...) \ + cpufreq_debug_printk(CPUFREQ_DEBUG_GOVERNOR, "performance", msg) + + +static int cpufreq_governor_performance(struct cpufreq_policy *policy, + unsigned int event) +{ + switch (event) { + case CPUFREQ_GOV_START: + case CPUFREQ_GOV_LIMITS: + dprintk("setting to %u kHz because of event %u\n", + policy->max, event); + __cpufreq_driver_target(policy, policy->max, + CPUFREQ_RELATION_H); + break; + default: + break; + } + return 0; +} + +#ifdef CONFIG_CPU_FREQ_GOV_PERFORMANCE_MODULE +static +#endif +struct cpufreq_governor cpufreq_gov_performance = { + .name = "performance", + .governor = cpufreq_governor_performance, + .owner = THIS_MODULE, +}; + + +static int __init cpufreq_gov_performance_init(void) +{ + return cpufreq_register_governor(&cpufreq_gov_performance); +} + + +static void __exit cpufreq_gov_performance_exit(void) +{ + cpufreq_unregister_governor(&cpufreq_gov_performance); +} + + +MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>"); +MODULE_DESCRIPTION("CPUfreq policy governor 'performance'"); +MODULE_LICENSE("GPL"); + +fs_initcall(cpufreq_gov_performance_init); +module_exit(cpufreq_gov_performance_exit); diff --git a/drivers/cpufreq/cpufreq_powersave.c b/drivers/cpufreq/cpufreq_powersave.c new file mode 100644 index 0000000..e6db5fa --- /dev/null +++ b/drivers/cpufreq/cpufreq_powersave.c @@ -0,0 +1,68 @@ +/* + * linux/drivers/cpufreq/cpufreq_powersave.c + * + * Copyright (C) 2002 - 2003 Dominik Brodowski <linux@brodo.de> + * + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/cpufreq.h> +#include <linux/init.h> + +#define dprintk(msg...) \ + cpufreq_debug_printk(CPUFREQ_DEBUG_GOVERNOR, "powersave", msg) + +static int cpufreq_governor_powersave(struct cpufreq_policy *policy, + unsigned int event) +{ + switch (event) { + case CPUFREQ_GOV_START: + case CPUFREQ_GOV_LIMITS: + dprintk("setting to %u kHz because of event %u\n", + policy->min, event); + __cpufreq_driver_target(policy, policy->min, + CPUFREQ_RELATION_L); + break; + default: + break; + } + return 0; +} + +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE +static +#endif +struct cpufreq_governor cpufreq_gov_powersave = { + .name = "powersave", + .governor = cpufreq_governor_powersave, + .owner = THIS_MODULE, +}; + +static int __init cpufreq_gov_powersave_init(void) +{ + return cpufreq_register_governor(&cpufreq_gov_powersave); +} + + +static void __exit cpufreq_gov_powersave_exit(void) +{ + cpufreq_unregister_governor(&cpufreq_gov_powersave); +} + + +MODULE_AUTHOR("Dominik Brodowski <linux@brodo.de>"); +MODULE_DESCRIPTION("CPUfreq policy governor 'powersave'"); +MODULE_LICENSE("GPL"); + +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_POWERSAVE +fs_initcall(cpufreq_gov_powersave_init); +#else +module_init(cpufreq_gov_powersave_init); +#endif +module_exit(cpufreq_gov_powersave_exit); diff --git a/drivers/cpufreq/cpufreq_stats.c b/drivers/cpufreq/cpufreq_stats.c new file mode 100644 index 0000000..c0ff97d --- /dev/null +++ b/drivers/cpufreq/cpufreq_stats.c @@ -0,0 +1,388 @@ +/* + * drivers/cpufreq/cpufreq_stats.c + * + * Copyright (C) 2003-2004 Venkatesh Pallipadi <venkatesh.pallipadi@intel.com>. + * (C) 2004 Zou Nan hai <nanhai.zou@intel.com>. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + */ + +#include <linux/kernel.h> +#include <linux/sysdev.h> +#include <linux/cpu.h> +#include <linux/sysfs.h> +#include <linux/cpufreq.h> +#include <linux/jiffies.h> +#include <linux/percpu.h> +#include <linux/kobject.h> +#include <linux/spinlock.h> +#include <linux/notifier.h> +#include <asm/cputime.h> + +static spinlock_t cpufreq_stats_lock; + +#define CPUFREQ_STATDEVICE_ATTR(_name,_mode,_show) \ +static struct freq_attr _attr_##_name = {\ + .attr = {.name = __stringify(_name), .mode = _mode, }, \ + .show = _show,\ +}; + +struct cpufreq_stats { + unsigned int cpu; + unsigned int total_trans; + unsigned long long last_time; + unsigned int max_state; + unsigned int state_num; + unsigned int last_index; + cputime64_t *time_in_state; + unsigned int *freq_table; +#ifdef CONFIG_CPU_FREQ_STAT_DETAILS + unsigned int *trans_table; +#endif +}; + +static DEFINE_PER_CPU(struct cpufreq_stats *, cpufreq_stats_table); + +struct cpufreq_stats_attribute { + struct attribute attr; + ssize_t(*show) (struct cpufreq_stats *, char *); +}; + +static int +cpufreq_stats_update (unsigned int cpu) +{ + struct cpufreq_stats *stat; + unsigned long long cur_time; + + cur_time = get_jiffies_64(); + spin_lock(&cpufreq_stats_lock); + stat = per_cpu(cpufreq_stats_table, cpu); + if (stat->time_in_state) + stat->time_in_state[stat->last_index] = + cputime64_add(stat->time_in_state[stat->last_index], + cputime_sub(cur_time, stat->last_time)); + stat->last_time = cur_time; + spin_unlock(&cpufreq_stats_lock); + return 0; +} + +static ssize_t +show_total_trans(struct cpufreq_policy *policy, char *buf) +{ + struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table, policy->cpu); + if (!stat) + return 0; + return sprintf(buf, "%d\n", + per_cpu(cpufreq_stats_table, stat->cpu)->total_trans); +} + +static ssize_t +show_time_in_state(struct cpufreq_policy *policy, char *buf) +{ + ssize_t len = 0; + int i; + struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table, policy->cpu); + if (!stat) + return 0; + cpufreq_stats_update(stat->cpu); + for (i = 0; i < stat->state_num; i++) { + len += sprintf(buf + len, "%u %llu\n", stat->freq_table[i], + (unsigned long long)cputime64_to_clock_t(stat->time_in_state[i])); + } + return len; +} + +#ifdef CONFIG_CPU_FREQ_STAT_DETAILS +static ssize_t +show_trans_table(struct cpufreq_policy *policy, char *buf) +{ + ssize_t len = 0; + int i, j; + + struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table, policy->cpu); + if (!stat) + return 0; + cpufreq_stats_update(stat->cpu); + len += snprintf(buf + len, PAGE_SIZE - len, " From : To\n"); + len += snprintf(buf + len, PAGE_SIZE - len, " : "); + for (i = 0; i < stat->state_num; i++) { + if (len >= PAGE_SIZE) + break; + len += snprintf(buf + len, PAGE_SIZE - len, "%9u ", + stat->freq_table[i]); + } + if (len >= PAGE_SIZE) + return PAGE_SIZE; + + len += snprintf(buf + len, PAGE_SIZE - len, "\n"); + + for (i = 0; i < stat->state_num; i++) { + if (len >= PAGE_SIZE) + break; + + len += snprintf(buf + len, PAGE_SIZE - len, "%9u: ", + stat->freq_table[i]); + + for (j = 0; j < stat->state_num; j++) { + if (len >= PAGE_SIZE) + break; + len += snprintf(buf + len, PAGE_SIZE - len, "%9u ", + stat->trans_table[i*stat->max_state+j]); + } + if (len >= PAGE_SIZE) + break; + len += snprintf(buf + len, PAGE_SIZE - len, "\n"); + } + if (len >= PAGE_SIZE) + return PAGE_SIZE; + return len; +} +CPUFREQ_STATDEVICE_ATTR(trans_table,0444,show_trans_table); +#endif + +CPUFREQ_STATDEVICE_ATTR(total_trans,0444,show_total_trans); +CPUFREQ_STATDEVICE_ATTR(time_in_state,0444,show_time_in_state); + +static struct attribute *default_attrs[] = { + &_attr_total_trans.attr, + &_attr_time_in_state.attr, +#ifdef CONFIG_CPU_FREQ_STAT_DETAILS + &_attr_trans_table.attr, +#endif + NULL +}; +static struct attribute_group stats_attr_group = { + .attrs = default_attrs, + .name = "stats" +}; + +static int +freq_table_get_index(struct cpufreq_stats *stat, unsigned int freq) +{ + int index; + for (index = 0; index < stat->max_state; index++) + if (stat->freq_table[index] == freq) + return index; + return -1; +} + +static void cpufreq_stats_free_table(unsigned int cpu) +{ + struct cpufreq_stats *stat = per_cpu(cpufreq_stats_table, cpu); + struct cpufreq_policy *policy = cpufreq_cpu_get(cpu); + if (policy && policy->cpu == cpu) + sysfs_remove_group(&policy->kobj, &stats_attr_group); + if (stat) { + kfree(stat->time_in_state); + kfree(stat); + } + per_cpu(cpufreq_stats_table, cpu) = NULL; + if (policy) + cpufreq_cpu_put(policy); +} + +static int +cpufreq_stats_create_table (struct cpufreq_policy *policy, + struct cpufreq_frequency_table *table) +{ + unsigned int i, j, count = 0, ret = 0; + struct cpufreq_stats *stat; + struct cpufreq_policy *data; + unsigned int alloc_size; + unsigned int cpu = policy->cpu; + if (per_cpu(cpufreq_stats_table, cpu)) + return -EBUSY; + if ((stat = kzalloc(sizeof(struct cpufreq_stats), GFP_KERNEL)) == NULL) + return -ENOMEM; + + data = cpufreq_cpu_get(cpu); + if (data == NULL) { + ret = -EINVAL; + goto error_get_fail; + } + + if ((ret = sysfs_create_group(&data->kobj, &stats_attr_group))) + goto error_out; + + stat->cpu = cpu; + per_cpu(cpufreq_stats_table, cpu) = stat; + + for (i=0; table[i].frequency != CPUFREQ_TABLE_END; i++) { + unsigned int freq = table[i].frequency; + if (freq == CPUFREQ_ENTRY_INVALID) + continue; + count++; + } + + alloc_size = count * sizeof(int) + count * sizeof(cputime64_t); + +#ifdef CONFIG_CPU_FREQ_STAT_DETAILS + alloc_size += count * count * sizeof(int); +#endif + stat->max_state = count; + stat->time_in_state = kzalloc(alloc_size, GFP_KERNEL); + if (!stat->time_in_state) { + ret = -ENOMEM; + goto error_out; + } + stat->freq_table = (unsigned int *)(stat->time_in_state + count); + +#ifdef CONFIG_CPU_FREQ_STAT_DETAILS + stat->trans_table = stat->freq_table + count; +#endif + j = 0; + for (i = 0; table[i].frequency != CPUFREQ_TABLE_END; i++) { + unsigned int freq = table[i].frequency; + if (freq == CPUFREQ_ENTRY_INVALID) + continue; + if (freq_table_get_index(stat, freq) == -1) + stat->freq_table[j++] = freq; + } + stat->state_num = j; + spin_lock(&cpufreq_stats_lock); + stat->last_time = get_jiffies_64(); + stat->last_index = freq_table_get_index(stat, policy->cur); + spin_unlock(&cpufreq_stats_lock); + cpufreq_cpu_put(data); + return 0; +error_out: + cpufreq_cpu_put(data); +error_get_fail: + kfree(stat); + per_cpu(cpufreq_stats_table, cpu) = NULL; + return ret; +} + +static int +cpufreq_stat_notifier_policy (struct notifier_block *nb, unsigned long val, + void *data) +{ + int ret; + struct cpufreq_policy *policy = data; + struct cpufreq_frequency_table *table; + unsigned int cpu = policy->cpu; + if (val != CPUFREQ_NOTIFY) + return 0; + table = cpufreq_frequency_get_table(cpu); + if (!table) + return 0; + if ((ret = cpufreq_stats_create_table(policy, table))) + return ret; + return 0; +} + +static int +cpufreq_stat_notifier_trans (struct notifier_block *nb, unsigned long val, + void *data) +{ + struct cpufreq_freqs *freq = data; + struct cpufreq_stats *stat; + int old_index, new_index; + + if (val != CPUFREQ_POSTCHANGE) + return 0; + + stat = per_cpu(cpufreq_stats_table, freq->cpu); + if (!stat) + return 0; + + old_index = stat->last_index; + new_index = freq_table_get_index(stat, freq->new); + + cpufreq_stats_update(freq->cpu); + if (old_index == new_index) + return 0; + + if (old_index == -1 || new_index == -1) + return 0; + + spin_lock(&cpufreq_stats_lock); + stat->last_index = new_index; +#ifdef CONFIG_CPU_FREQ_STAT_DETAILS + stat->trans_table[old_index * stat->max_state + new_index]++; +#endif + stat->total_trans++; + spin_unlock(&cpufreq_stats_lock); + return 0; +} + +static int __cpuinit cpufreq_stat_cpu_callback(struct notifier_block *nfb, + unsigned long action, + void *hcpu) +{ + unsigned int cpu = (unsigned long)hcpu; + + switch (action) { + case CPU_ONLINE: + case CPU_ONLINE_FROZEN: + cpufreq_update_policy(cpu); + break; + case CPU_DEAD: + case CPU_DEAD_FROZEN: + cpufreq_stats_free_table(cpu); + break; + } + return NOTIFY_OK; +} + +static struct notifier_block cpufreq_stat_cpu_notifier __refdata = +{ + .notifier_call = cpufreq_stat_cpu_callback, +}; + +static struct notifier_block notifier_policy_block = { + .notifier_call = cpufreq_stat_notifier_policy +}; + +static struct notifier_block notifier_trans_block = { + .notifier_call = cpufreq_stat_notifier_trans +}; + +static int +__init cpufreq_stats_init(void) +{ + int ret; + unsigned int cpu; + + spin_lock_init(&cpufreq_stats_lock); + if ((ret = cpufreq_register_notifier(¬ifier_policy_block, + CPUFREQ_POLICY_NOTIFIER))) + return ret; + + if ((ret = cpufreq_register_notifier(¬ifier_trans_block, + CPUFREQ_TRANSITION_NOTIFIER))) { + cpufreq_unregister_notifier(¬ifier_policy_block, + CPUFREQ_POLICY_NOTIFIER); + return ret; + } + + register_hotcpu_notifier(&cpufreq_stat_cpu_notifier); + for_each_online_cpu(cpu) { + cpufreq_update_policy(cpu); + } + return 0; +} +static void +__exit cpufreq_stats_exit(void) +{ + unsigned int cpu; + + cpufreq_unregister_notifier(¬ifier_policy_block, + CPUFREQ_POLICY_NOTIFIER); + cpufreq_unregister_notifier(¬ifier_trans_block, + CPUFREQ_TRANSITION_NOTIFIER); + unregister_hotcpu_notifier(&cpufreq_stat_cpu_notifier); + for_each_online_cpu(cpu) { + cpufreq_stats_free_table(cpu); + } +} + +MODULE_AUTHOR ("Zou Nan hai <nanhai.zou@intel.com>"); +MODULE_DESCRIPTION ("'cpufreq_stats' - A driver to export cpufreq stats " + "through sysfs filesystem"); +MODULE_LICENSE ("GPL"); + +module_init(cpufreq_stats_init); +module_exit(cpufreq_stats_exit); diff --git a/drivers/cpufreq/cpufreq_userspace.c b/drivers/cpufreq/cpufreq_userspace.c new file mode 100644 index 0000000..1442bba --- /dev/null +++ b/drivers/cpufreq/cpufreq_userspace.c @@ -0,0 +1,222 @@ + +/* + * linux/drivers/cpufreq/cpufreq_userspace.c + * + * Copyright (C) 2001 Russell King + * (C) 2002 - 2004 Dominik Brodowski <linux@brodo.de> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/smp.h> +#include <linux/init.h> +#include <linux/spinlock.h> +#include <linux/interrupt.h> +#include <linux/cpufreq.h> +#include <linux/cpu.h> +#include <linux/types.h> +#include <linux/fs.h> +#include <linux/sysfs.h> +#include <linux/mutex.h> + +#include <asm/uaccess.h> + + +/** + * A few values needed by the userspace governor + */ +static DEFINE_PER_CPU(unsigned int, cpu_max_freq); +static DEFINE_PER_CPU(unsigned int, cpu_min_freq); +static DEFINE_PER_CPU(unsigned int, cpu_cur_freq); /* current CPU freq */ +static DEFINE_PER_CPU(unsigned int, cpu_set_freq); /* CPU freq desired by + userspace */ +static DEFINE_PER_CPU(unsigned int, cpu_is_managed); + +static DEFINE_MUTEX (userspace_mutex); +static int cpus_using_userspace_governor; + +#define dprintk(msg...) \ + cpufreq_debug_printk(CPUFREQ_DEBUG_GOVERNOR, "userspace", msg) + +/* keep track of frequency transitions */ +static int +userspace_cpufreq_notifier(struct notifier_block *nb, unsigned long val, + void *data) +{ + struct cpufreq_freqs *freq = data; + + if (!per_cpu(cpu_is_managed, freq->cpu)) + return 0; + + dprintk("saving cpu_cur_freq of cpu %u to be %u kHz\n", + freq->cpu, freq->new); + per_cpu(cpu_cur_freq, freq->cpu) = freq->new; + + return 0; +} + +static struct notifier_block userspace_cpufreq_notifier_block = { + .notifier_call = userspace_cpufreq_notifier +}; + + +/** + * cpufreq_set - set the CPU frequency + * @policy: pointer to policy struct where freq is being set + * @freq: target frequency in kHz + * + * Sets the CPU frequency to freq. + */ +static int cpufreq_set(struct cpufreq_policy *policy, unsigned int freq) +{ + int ret = -EINVAL; + + dprintk("cpufreq_set for cpu %u, freq %u kHz\n", policy->cpu, freq); + + mutex_lock(&userspace_mutex); + if (!per_cpu(cpu_is_managed, policy->cpu)) + goto err; + + per_cpu(cpu_set_freq, policy->cpu) = freq; + + if (freq < per_cpu(cpu_min_freq, policy->cpu)) + freq = per_cpu(cpu_min_freq, policy->cpu); + if (freq > per_cpu(cpu_max_freq, policy->cpu)) + freq = per_cpu(cpu_max_freq, policy->cpu); + + /* + * We're safe from concurrent calls to ->target() here + * as we hold the userspace_mutex lock. If we were calling + * cpufreq_driver_target, a deadlock situation might occur: + * A: cpufreq_set (lock userspace_mutex) -> cpufreq_driver_target(lock policy->lock) + * B: cpufreq_set_policy(lock policy->lock) -> __cpufreq_governor -> cpufreq_governor_userspace (lock userspace_mutex) + */ + ret = __cpufreq_driver_target(policy, freq, CPUFREQ_RELATION_L); + + err: + mutex_unlock(&userspace_mutex); + return ret; +} + + +static ssize_t show_speed(struct cpufreq_policy *policy, char *buf) +{ + return sprintf(buf, "%u\n", per_cpu(cpu_cur_freq, policy->cpu)); +} + +static int cpufreq_governor_userspace(struct cpufreq_policy *policy, + unsigned int event) +{ + unsigned int cpu = policy->cpu; + int rc = 0; + + switch (event) { + case CPUFREQ_GOV_START: + if (!cpu_online(cpu)) + return -EINVAL; + BUG_ON(!policy->cur); + mutex_lock(&userspace_mutex); + + if (cpus_using_userspace_governor == 0) { + cpufreq_register_notifier( + &userspace_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + } + cpus_using_userspace_governor++; + + per_cpu(cpu_is_managed, cpu) = 1; + per_cpu(cpu_min_freq, cpu) = policy->min; + per_cpu(cpu_max_freq, cpu) = policy->max; + per_cpu(cpu_cur_freq, cpu) = policy->cur; + per_cpu(cpu_set_freq, cpu) = policy->cur; + dprintk("managing cpu %u started " + "(%u - %u kHz, currently %u kHz)\n", + cpu, + per_cpu(cpu_min_freq, cpu), + per_cpu(cpu_max_freq, cpu), + per_cpu(cpu_cur_freq, cpu)); + + mutex_unlock(&userspace_mutex); + break; + case CPUFREQ_GOV_STOP: + mutex_lock(&userspace_mutex); + cpus_using_userspace_governor--; + if (cpus_using_userspace_governor == 0) { + cpufreq_unregister_notifier( + &userspace_cpufreq_notifier_block, + CPUFREQ_TRANSITION_NOTIFIER); + } + + per_cpu(cpu_is_managed, cpu) = 0; + per_cpu(cpu_min_freq, cpu) = 0; + per_cpu(cpu_max_freq, cpu) = 0; + per_cpu(cpu_set_freq, cpu) = 0; + dprintk("managing cpu %u stopped\n", cpu); + mutex_unlock(&userspace_mutex); + break; + case CPUFREQ_GOV_LIMITS: + mutex_lock(&userspace_mutex); + dprintk("limit event for cpu %u: %u - %u kHz, " + "currently %u kHz, last set to %u kHz\n", + cpu, policy->min, policy->max, + per_cpu(cpu_cur_freq, cpu), + per_cpu(cpu_set_freq, cpu)); + if (policy->max < per_cpu(cpu_set_freq, cpu)) { + __cpufreq_driver_target(policy, policy->max, + CPUFREQ_RELATION_H); + } else if (policy->min > per_cpu(cpu_set_freq, cpu)) { + __cpufreq_driver_target(policy, policy->min, + CPUFREQ_RELATION_L); + } else { + __cpufreq_driver_target(policy, + per_cpu(cpu_set_freq, cpu), + CPUFREQ_RELATION_L); + } + per_cpu(cpu_min_freq, cpu) = policy->min; + per_cpu(cpu_max_freq, cpu) = policy->max; + per_cpu(cpu_cur_freq, cpu) = policy->cur; + mutex_unlock(&userspace_mutex); + break; + } + return rc; +} + + +#ifndef CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE +static +#endif +struct cpufreq_governor cpufreq_gov_userspace = { + .name = "userspace", + .governor = cpufreq_governor_userspace, + .store_setspeed = cpufreq_set, + .show_setspeed = show_speed, + .owner = THIS_MODULE, +}; + +static int __init cpufreq_gov_userspace_init(void) +{ + return cpufreq_register_governor(&cpufreq_gov_userspace); +} + + +static void __exit cpufreq_gov_userspace_exit(void) +{ + cpufreq_unregister_governor(&cpufreq_gov_userspace); +} + + +MODULE_AUTHOR ("Dominik Brodowski <linux@brodo.de>, Russell King <rmk@arm.linux.org.uk>"); +MODULE_DESCRIPTION ("CPUfreq policy governor 'userspace'"); +MODULE_LICENSE ("GPL"); + +#ifdef CONFIG_CPU_FREQ_DEFAULT_GOV_USERSPACE +fs_initcall(cpufreq_gov_userspace_init); +#else +module_init(cpufreq_gov_userspace_init); +#endif +module_exit(cpufreq_gov_userspace_exit); diff --git a/drivers/cpufreq/freq_table.c b/drivers/cpufreq/freq_table.c new file mode 100644 index 0000000..9071d80 --- /dev/null +++ b/drivers/cpufreq/freq_table.c @@ -0,0 +1,239 @@ +/* + * linux/drivers/cpufreq/freq_table.c + * + * Copyright (C) 2002 - 2003 Dominik Brodowski + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + */ + +#include <linux/kernel.h> +#include <linux/module.h> +#include <linux/init.h> +#include <linux/cpufreq.h> + +#define dprintk(msg...) \ + cpufreq_debug_printk(CPUFREQ_DEBUG_CORE, "freq-table", msg) + +/********************************************************************* + * FREQUENCY TABLE HELPERS * + *********************************************************************/ + +int cpufreq_frequency_table_cpuinfo(struct cpufreq_policy *policy, + struct cpufreq_frequency_table *table) +{ + unsigned int min_freq = ~0; + unsigned int max_freq = 0; + unsigned int i; + + for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) { + unsigned int freq = table[i].frequency; + if (freq == CPUFREQ_ENTRY_INVALID) { + dprintk("table entry %u is invalid, skipping\n", i); + + continue; + } + dprintk("table entry %u: %u kHz, %u index\n", + i, freq, table[i].index); + if (freq < min_freq) + min_freq = freq; + if (freq > max_freq) + max_freq = freq; + } + + policy->min = policy->cpuinfo.min_freq = min_freq; + policy->max = policy->cpuinfo.max_freq = max_freq; + + if (policy->min == ~0) + return -EINVAL; + else + return 0; +} +EXPORT_SYMBOL_GPL(cpufreq_frequency_table_cpuinfo); + + +int cpufreq_frequency_table_verify(struct cpufreq_policy *policy, + struct cpufreq_frequency_table *table) +{ + unsigned int next_larger = ~0; + unsigned int i; + unsigned int count = 0; + + dprintk("request for verification of policy (%u - %u kHz) for cpu %u\n", + policy->min, policy->max, policy->cpu); + + if (!cpu_online(policy->cpu)) + return -EINVAL; + + cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, + policy->cpuinfo.max_freq); + + for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) { + unsigned int freq = table[i].frequency; + if (freq == CPUFREQ_ENTRY_INVALID) + continue; + if ((freq >= policy->min) && (freq <= policy->max)) + count++; + else if ((next_larger > freq) && (freq > policy->max)) + next_larger = freq; + } + + if (!count) + policy->max = next_larger; + + cpufreq_verify_within_limits(policy, policy->cpuinfo.min_freq, + policy->cpuinfo.max_freq); + + dprintk("verification lead to (%u - %u kHz) for cpu %u\n", + policy->min, policy->max, policy->cpu); + + return 0; +} +EXPORT_SYMBOL_GPL(cpufreq_frequency_table_verify); + + +int cpufreq_frequency_table_target(struct cpufreq_policy *policy, + struct cpufreq_frequency_table *table, + unsigned int target_freq, + unsigned int relation, + unsigned int *index) +{ + struct cpufreq_frequency_table optimal = { + .index = ~0, + .frequency = 0, + }; + struct cpufreq_frequency_table suboptimal = { + .index = ~0, + .frequency = 0, + }; + unsigned int i; + + dprintk("request for target %u kHz (relation: %u) for cpu %u\n", + target_freq, relation, policy->cpu); + + switch (relation) { + case CPUFREQ_RELATION_H: + suboptimal.frequency = ~0; + break; + case CPUFREQ_RELATION_L: + optimal.frequency = ~0; + break; + } + + if (!cpu_online(policy->cpu)) + return -EINVAL; + + for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) { + unsigned int freq = table[i].frequency; + if (freq == CPUFREQ_ENTRY_INVALID) + continue; + if ((freq < policy->min) || (freq > policy->max)) + continue; + switch(relation) { + case CPUFREQ_RELATION_H: + if (freq <= target_freq) { + if (freq >= optimal.frequency) { + optimal.frequency = freq; + optimal.index = i; + } + } else { + if (freq <= suboptimal.frequency) { + suboptimal.frequency = freq; + suboptimal.index = i; + } + } + break; + case CPUFREQ_RELATION_L: + if (freq >= target_freq) { + if (freq <= optimal.frequency) { + optimal.frequency = freq; + optimal.index = i; + } + } else { + if (freq >= suboptimal.frequency) { + suboptimal.frequency = freq; + suboptimal.index = i; + } + } + break; + } + } + if (optimal.index > i) { + if (suboptimal.index > i) + return -EINVAL; + *index = suboptimal.index; + } else + *index = optimal.index; + + dprintk("target is %u (%u kHz, %u)\n", *index, table[*index].frequency, + table[*index].index); + + return 0; +} +EXPORT_SYMBOL_GPL(cpufreq_frequency_table_target); + +static DEFINE_PER_CPU(struct cpufreq_frequency_table *, show_table); +/** + * show_available_freqs - show available frequencies for the specified CPU + */ +static ssize_t show_available_freqs (struct cpufreq_policy *policy, char *buf) +{ + unsigned int i = 0; + unsigned int cpu = policy->cpu; + ssize_t count = 0; + struct cpufreq_frequency_table *table; + + if (!per_cpu(show_table, cpu)) + return -ENODEV; + + table = per_cpu(show_table, cpu); + + for (i=0; (table[i].frequency != CPUFREQ_TABLE_END); i++) { + if (table[i].frequency == CPUFREQ_ENTRY_INVALID) + continue; + count += sprintf(&buf[count], "%d ", table[i].frequency); + } + count += sprintf(&buf[count], "\n"); + + return count; + +} + +struct freq_attr cpufreq_freq_attr_scaling_available_freqs = { + .attr = { .name = "scaling_available_frequencies", + .mode = 0444, + }, + .show = show_available_freqs, +}; +EXPORT_SYMBOL_GPL(cpufreq_freq_attr_scaling_available_freqs); + +/* + * if you use these, you must assure that the frequency table is valid + * all the time between get_attr and put_attr! + */ +void cpufreq_frequency_table_get_attr(struct cpufreq_frequency_table *table, + unsigned int cpu) +{ + dprintk("setting show_table for cpu %u to %p\n", cpu, table); + per_cpu(show_table, cpu) = table; +} +EXPORT_SYMBOL_GPL(cpufreq_frequency_table_get_attr); + +void cpufreq_frequency_table_put_attr(unsigned int cpu) +{ + dprintk("clearing show_table for cpu %u\n", cpu); + per_cpu(show_table, cpu) = NULL; +} +EXPORT_SYMBOL_GPL(cpufreq_frequency_table_put_attr); + +struct cpufreq_frequency_table *cpufreq_frequency_get_table(unsigned int cpu) +{ + return per_cpu(show_table, cpu); +} +EXPORT_SYMBOL_GPL(cpufreq_frequency_get_table); + +MODULE_AUTHOR ("Dominik Brodowski <linux@brodo.de>"); +MODULE_DESCRIPTION ("CPUfreq frequency table helpers"); +MODULE_LICENSE ("GPL"); |