diff options
Diffstat (limited to 'drivers')
-rw-r--r-- | drivers/base/power/opp.c | 74 | ||||
-rw-r--r-- | drivers/cpufreq/intel_pstate.c | 38 |
2 files changed, 78 insertions, 34 deletions
diff --git a/drivers/base/power/opp.c b/drivers/base/power/opp.c index 1bbef8e..d24dd614a 100644 --- a/drivers/base/power/opp.c +++ b/drivers/base/power/opp.c @@ -84,7 +84,11 @@ struct dev_pm_opp { * * This is an internal data structure maintaining the link to opps attached to * a device. This structure is not meant to be shared to users as it is - * meant for book keeping and private to OPP library + * meant for book keeping and private to OPP library. + * + * Because the opp structures can be used from both rcu and srcu readers, we + * need to wait for the grace period of both of them before freeing any + * resources. And so we have used kfree_rcu() from within call_srcu() handlers. */ struct device_opp { struct list_head node; @@ -382,12 +386,34 @@ struct dev_pm_opp *dev_pm_opp_find_freq_floor(struct device *dev, } EXPORT_SYMBOL_GPL(dev_pm_opp_find_freq_floor); +static struct device_opp *add_device_opp(struct device *dev) +{ + struct device_opp *dev_opp; + + /* + * Allocate a new device OPP table. In the infrequent case where a new + * device is needed to be added, we pay this penalty. + */ + dev_opp = kzalloc(sizeof(*dev_opp), GFP_KERNEL); + if (!dev_opp) + return NULL; + + dev_opp->dev = dev; + srcu_init_notifier_head(&dev_opp->srcu_head); + INIT_LIST_HEAD(&dev_opp->opp_list); + + /* Secure the device list modification */ + list_add_rcu(&dev_opp->node, &dev_opp_list); + return dev_opp; +} + static int dev_pm_opp_add_dynamic(struct device *dev, unsigned long freq, unsigned long u_volt, bool dynamic) { struct device_opp *dev_opp = NULL; struct dev_pm_opp *opp, *new_opp; struct list_head *head; + int ret; /* allocate new OPP node */ new_opp = kzalloc(sizeof(*new_opp), GFP_KERNEL); @@ -408,27 +434,12 @@ static int dev_pm_opp_add_dynamic(struct device *dev, unsigned long freq, /* Check for existing list for 'dev' */ dev_opp = find_device_opp(dev); if (IS_ERR(dev_opp)) { - /* - * Allocate a new device OPP table. In the infrequent case - * where a new device is needed to be added, we pay this - * penalty. - */ - dev_opp = kzalloc(sizeof(struct device_opp), GFP_KERNEL); + dev_opp = add_device_opp(dev); if (!dev_opp) { - mutex_unlock(&dev_opp_list_lock); - kfree(new_opp); - dev_warn(dev, - "%s: Unable to create device OPP structure\n", - __func__); - return -ENOMEM; + ret = -ENOMEM; + goto free_opp; } - dev_opp->dev = dev; - srcu_init_notifier_head(&dev_opp->srcu_head); - INIT_LIST_HEAD(&dev_opp->opp_list); - - /* Secure the device list modification */ - list_add_rcu(&dev_opp->node, &dev_opp_list); head = &dev_opp->opp_list; goto list_add; } @@ -447,15 +458,13 @@ static int dev_pm_opp_add_dynamic(struct device *dev, unsigned long freq, /* Duplicate OPPs ? */ if (new_opp->rate == opp->rate) { - int ret = opp->available && new_opp->u_volt == opp->u_volt ? + ret = opp->available && new_opp->u_volt == opp->u_volt ? 0 : -EEXIST; dev_warn(dev, "%s: duplicate OPPs detected. Existing: freq: %lu, volt: %lu, enabled: %d. New: freq: %lu, volt: %lu, enabled: %d\n", __func__, opp->rate, opp->u_volt, opp->available, new_opp->rate, new_opp->u_volt, new_opp->available); - mutex_unlock(&dev_opp_list_lock); - kfree(new_opp); - return ret; + goto free_opp; } list_add: @@ -469,6 +478,11 @@ list_add: */ srcu_notifier_call_chain(&dev_opp->srcu_head, OPP_EVENT_ADD, new_opp); return 0; + +free_opp: + mutex_unlock(&dev_opp_list_lock); + kfree(new_opp); + return ret; } /** @@ -511,10 +525,11 @@ static void kfree_device_rcu(struct rcu_head *head) { struct device_opp *device_opp = container_of(head, struct device_opp, rcu_head); - kfree(device_opp); + kfree_rcu(device_opp, rcu_head); } -void __dev_pm_opp_remove(struct device_opp *dev_opp, struct dev_pm_opp *opp) +static void __dev_pm_opp_remove(struct device_opp *dev_opp, + struct dev_pm_opp *opp) { /* * Notify the changes in the availability of the operable @@ -592,7 +607,7 @@ EXPORT_SYMBOL_GPL(dev_pm_opp_remove); static int opp_set_availability(struct device *dev, unsigned long freq, bool availability_req) { - struct device_opp *tmp_dev_opp, *dev_opp = ERR_PTR(-ENODEV); + struct device_opp *dev_opp; struct dev_pm_opp *new_opp, *tmp_opp, *opp = ERR_PTR(-ENODEV); int r = 0; @@ -606,12 +621,7 @@ static int opp_set_availability(struct device *dev, unsigned long freq, mutex_lock(&dev_opp_list_lock); /* Find the device_opp */ - list_for_each_entry(tmp_dev_opp, &dev_opp_list, node) { - if (dev == tmp_dev_opp->dev) { - dev_opp = tmp_dev_opp; - break; - } - } + dev_opp = find_device_opp(dev); if (IS_ERR(dev_opp)) { r = PTR_ERR(dev_opp); dev_warn(dev, "%s: Device OPP not found (%d)\n", __func__, r); diff --git a/drivers/cpufreq/intel_pstate.c b/drivers/cpufreq/intel_pstate.c index 1405b39..742eefb 100644 --- a/drivers/cpufreq/intel_pstate.c +++ b/drivers/cpufreq/intel_pstate.c @@ -199,7 +199,14 @@ static signed int pid_calc(struct _pid *pid, int32_t busy) pid->integral += fp_error; - /* limit the integral term */ + /* + * We limit the integral here so that it will never + * get higher than 30. This prevents it from becoming + * too large an input over long periods of time and allows + * it to get factored out sooner. + * + * The value of 30 was chosen through experimentation. + */ integral_limit = int_tofp(30); if (pid->integral > integral_limit) pid->integral = integral_limit; @@ -616,6 +623,11 @@ static void intel_pstate_get_min_max(struct cpudata *cpu, int *min, int *max) if (limits.no_turbo || limits.turbo_disabled) max_perf = cpu->pstate.max_pstate; + /* + * performance can be limited by user through sysfs, by cpufreq + * policy, or by cpu specific default values determined through + * experimentation. + */ max_perf_adj = fp_toint(mul_fp(int_tofp(max_perf), limits.max_perf)); *max = clamp_t(int, max_perf_adj, cpu->pstate.min_pstate, cpu->pstate.turbo_pstate); @@ -717,11 +729,29 @@ static inline int32_t intel_pstate_get_scaled_busy(struct cpudata *cpu) u32 duration_us; u32 sample_time; + /* + * core_busy is the ratio of actual performance to max + * max_pstate is the max non turbo pstate available + * current_pstate was the pstate that was requested during + * the last sample period. + * + * We normalize core_busy, which was our actual percent + * performance to what we requested during the last sample + * period. The result will be a percentage of busy at a + * specified pstate. + */ core_busy = cpu->sample.core_pct_busy; max_pstate = int_tofp(cpu->pstate.max_pstate); current_pstate = int_tofp(cpu->pstate.current_pstate); core_busy = mul_fp(core_busy, div_fp(max_pstate, current_pstate)); + /* + * Since we have a deferred timer, it will not fire unless + * we are in C0. So, determine if the actual elapsed time + * is significantly greater (3x) than our sample interval. If it + * is, then we were idle for a long enough period of time + * to adjust our busyness. + */ sample_time = pid_params.sample_rate_ms * USEC_PER_MSEC; duration_us = (u32) ktime_us_delta(cpu->sample.time, cpu->last_sample_time); @@ -948,6 +978,7 @@ static struct cpufreq_driver intel_pstate_driver = { static int __initdata no_load; static int __initdata no_hwp; +static unsigned int force_load; static int intel_pstate_msrs_not_valid(void) { @@ -1094,7 +1125,8 @@ static bool intel_pstate_platform_pwr_mgmt_exists(void) case PSS: return intel_pstate_no_acpi_pss(); case PPC: - return intel_pstate_has_acpi_ppc(); + return intel_pstate_has_acpi_ppc() && + (!force_load); } } @@ -1175,6 +1207,8 @@ static int __init intel_pstate_setup(char *str) no_load = 1; if (!strcmp(str, "no_hwp")) no_hwp = 1; + if (!strcmp(str, "force")) + force_load = 1; return 0; } early_param("intel_pstate", intel_pstate_setup); |