diff options
29 files changed, 3359 insertions, 137 deletions
diff --git a/Documentation/devicetree/bindings/thermal/hisilicon-thermal.txt b/Documentation/devicetree/bindings/thermal/hisilicon-thermal.txt new file mode 100644 index 0000000..d48fc52 --- /dev/null +++ b/Documentation/devicetree/bindings/thermal/hisilicon-thermal.txt @@ -0,0 +1,23 @@ +* Temperature Sensor on hisilicon SoCs + +** Required properties : + +- compatible: "hisilicon,tsensor". +- reg: physical base address of thermal sensor and length of memory mapped + region. +- interrupt: The interrupt number to the cpu. Defines the interrupt used + by /SOCTHERM/tsensor. +- clock-names: Input clock name, should be 'thermal_clk'. +- clocks: phandles for clock specified in "clock-names" property. +- #thermal-sensor-cells: Should be 1. See ./thermal.txt for a description. + +Example : + + tsensor: tsensor@0,f7030700 { + compatible = "hisilicon,tsensor"; + reg = <0x0 0xf7030700 0x0 0x1000>; + interrupts = <0 7 0x4>; + clocks = <&sys_ctrl HI6220_TSENSOR_CLK>; + clock-names = "thermal_clk"; + #thermal-sensor-cells = <1>; + } diff --git a/Documentation/devicetree/bindings/thermal/qcom-spmi-temp-alarm.txt b/Documentation/devicetree/bindings/thermal/qcom-spmi-temp-alarm.txt new file mode 100644 index 0000000..290ec06 --- /dev/null +++ b/Documentation/devicetree/bindings/thermal/qcom-spmi-temp-alarm.txt @@ -0,0 +1,57 @@ +Qualcomm QPNP PMIC Temperature Alarm + +QPNP temperature alarm peripherals are found inside of Qualcomm PMIC chips +that utilize the Qualcomm SPMI implementation. These peripherals provide an +interrupt signal and status register to identify high PMIC die temperature. + +Required properties: +- compatible: Should contain "qcom,spmi-temp-alarm". +- reg: Specifies the SPMI address and length of the controller's + registers. +- interrupts: PMIC temperature alarm interrupt. +- #thermal-sensor-cells: Should be 0. See thermal.txt for a description. + +Optional properties: +- io-channels: Should contain IIO channel specifier for the ADC channel, + which report chip die temperature. +- io-channel-names: Should contain "thermal". + +Example: + + pm8941_temp: thermal-alarm@2400 { + compatible = "qcom,spmi-temp-alarm"; + reg = <0x2400 0x100>; + interrupts = <0 0x24 0 IRQ_TYPE_EDGE_RISING>; + #thermal-sensor-cells = <0>; + + io-channels = <&pm8941_vadc VADC_DIE_TEMP>; + io-channel-names = "thermal"; + }; + + thermal-zones { + pm8941 { + polling-delay-passive = <250>; + polling-delay = <1000>; + + thermal-sensors = <&pm8941_temp>; + + trips { + passive { + temperature = <1050000>; + hysteresis = <2000>; + type = "passive"; + }; + alert { + temperature = <125000>; + hysteresis = <2000>; + type = "hot"; + }; + crit { + temperature = <145000>; + hysteresis = <2000>; + type = "critical"; + }; + }; + }; + }; + diff --git a/Documentation/devicetree/bindings/thermal/thermal.txt b/Documentation/devicetree/bindings/thermal/thermal.txt index 29fe0bf..8a49362 100644 --- a/Documentation/devicetree/bindings/thermal/thermal.txt +++ b/Documentation/devicetree/bindings/thermal/thermal.txt @@ -167,6 +167,13 @@ Optional property: by means of sensor ID. Additional coefficients are interpreted as constant offset. +- sustainable-power: An estimate of the sustainable power (in mW) that the + Type: unsigned thermal zone can dissipate at the desired + Size: one cell control temperature. For reference, the + sustainable power of a 4'' phone is typically + 2000mW, while on a 10'' tablet is around + 4500mW. + Note: The delay properties are bound to the maximum dT/dt (temperature derivative over time) in two situations for a thermal zone: (i) - when passive cooling is activated (polling-delay-passive); and @@ -546,6 +553,8 @@ thermal-zones { */ coefficients = <1200 -345 890>; + sustainable-power = <2500>; + trips { /* Trips are based on resulting linear equation */ cpu_trip: cpu-trip { diff --git a/Documentation/thermal/cpu-cooling-api.txt b/Documentation/thermal/cpu-cooling-api.txt index 753e47c..7165358 100644 --- a/Documentation/thermal/cpu-cooling-api.txt +++ b/Documentation/thermal/cpu-cooling-api.txt @@ -36,8 +36,162 @@ the user. The registration APIs returns the cooling device pointer. np: pointer to the cooling device device tree node clip_cpus: cpumask of cpus where the frequency constraints will happen. -1.1.3 void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev) +1.1.3 struct thermal_cooling_device *cpufreq_power_cooling_register( + const struct cpumask *clip_cpus, u32 capacitance, + get_static_t plat_static_func) + +Similar to cpufreq_cooling_register, this function registers a cpufreq +cooling device. Using this function, the cooling device will +implement the power extensions by using a simple cpu power model. The +cpus must have registered their OPPs using the OPP library. + +The additional parameters are needed for the power model (See 2. Power +models). "capacitance" is the dynamic power coefficient (See 2.1 +Dynamic power). "plat_static_func" is a function to calculate the +static power consumed by these cpus (See 2.2 Static power). + +1.1.4 struct thermal_cooling_device *of_cpufreq_power_cooling_register( + struct device_node *np, const struct cpumask *clip_cpus, u32 capacitance, + get_static_t plat_static_func) + +Similar to cpufreq_power_cooling_register, this function register a +cpufreq cooling device with power extensions using the device tree +information supplied by the np parameter. + +1.1.5 void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev) This interface function unregisters the "thermal-cpufreq-%x" cooling device. cdev: Cooling device pointer which has to be unregistered. + +2. Power models + +The power API registration functions provide a simple power model for +CPUs. The current power is calculated as dynamic + (optionally) +static power. This power model requires that the operating-points of +the CPUs are registered using the kernel's opp library and the +`cpufreq_frequency_table` is assigned to the `struct device` of the +cpu. If you are using CONFIG_CPUFREQ_DT then the +`cpufreq_frequency_table` should already be assigned to the cpu +device. + +The `plat_static_func` parameter of `cpufreq_power_cooling_register()` +and `of_cpufreq_power_cooling_register()` is optional. If you don't +provide it, only dynamic power will be considered. + +2.1 Dynamic power + +The dynamic power consumption of a processor depends on many factors. +For a given processor implementation the primary factors are: + +- The time the processor spends running, consuming dynamic power, as + compared to the time in idle states where dynamic consumption is + negligible. Herein we refer to this as 'utilisation'. +- The voltage and frequency levels as a result of DVFS. The DVFS + level is a dominant factor governing power consumption. +- In running time the 'execution' behaviour (instruction types, memory + access patterns and so forth) causes, in most cases, a second order + variation. In pathological cases this variation can be significant, + but typically it is of a much lesser impact than the factors above. + +A high level dynamic power consumption model may then be represented as: + +Pdyn = f(run) * Voltage^2 * Frequency * Utilisation + +f(run) here represents the described execution behaviour and its +result has a units of Watts/Hz/Volt^2 (this often expressed in +mW/MHz/uVolt^2) + +The detailed behaviour for f(run) could be modelled on-line. However, +in practice, such an on-line model has dependencies on a number of +implementation specific processor support and characterisation +factors. Therefore, in initial implementation that contribution is +represented as a constant coefficient. This is a simplification +consistent with the relative contribution to overall power variation. + +In this simplified representation our model becomes: + +Pdyn = Capacitance * Voltage^2 * Frequency * Utilisation + +Where `capacitance` is a constant that represents an indicative +running time dynamic power coefficient in fundamental units of +mW/MHz/uVolt^2. Typical values for mobile CPUs might lie in range +from 100 to 500. For reference, the approximate values for the SoC in +ARM's Juno Development Platform are 530 for the Cortex-A57 cluster and +140 for the Cortex-A53 cluster. + + +2.2 Static power + +Static leakage power consumption depends on a number of factors. For a +given circuit implementation the primary factors are: + +- Time the circuit spends in each 'power state' +- Temperature +- Operating voltage +- Process grade + +The time the circuit spends in each 'power state' for a given +evaluation period at first order means OFF or ON. However, +'retention' states can also be supported that reduce power during +inactive periods without loss of context. + +Note: The visibility of state entries to the OS can vary, according to +platform specifics, and this can then impact the accuracy of a model +based on OS state information alone. It might be possible in some +cases to extract more accurate information from system resources. + +The temperature, operating voltage and process 'grade' (slow to fast) +of the circuit are all significant factors in static leakage power +consumption. All of these have complex relationships to static power. + +Circuit implementation specific factors include the chosen silicon +process as well as the type, number and size of transistors in both +the logic gates and any RAM elements included. + +The static power consumption modelling must take into account the +power managed regions that are implemented. Taking the example of an +ARM processor cluster, the modelling would take into account whether +each CPU can be powered OFF separately or if only a single power +region is implemented for the complete cluster. + +In one view, there are others, a static power consumption model can +then start from a set of reference values for each power managed +region (e.g. CPU, Cluster/L2) in each state (e.g. ON, OFF) at an +arbitrary process grade, voltage and temperature point. These values +are then scaled for all of the following: the time in each state, the +process grade, the current temperature and the operating voltage. +However, since both implementation specific and complex relationships +dominate the estimate, the appropriate interface to the model from the +cpu cooling device is to provide a function callback that calculates +the static power in this platform. When registering the cpu cooling +device pass a function pointer that follows the `get_static_t` +prototype: + + int plat_get_static(cpumask_t *cpumask, int interval, + unsigned long voltage, u32 &power); + +`cpumask` is the cpumask of the cpus involved in the calculation. +`voltage` is the voltage at which they are operating. The function +should calculate the average static power for the last `interval` +milliseconds. It returns 0 on success, -E* on error. If it +succeeds, it should store the static power in `power`. Reading the +temperature of the cpus described by `cpumask` is left for +plat_get_static() to do as the platform knows best which thermal +sensor is closest to the cpu. + +If `plat_static_func` is NULL, static power is considered to be +negligible for this platform and only dynamic power is considered. + +The platform specific callback can then use any combination of tables +and/or equations to permute the estimated value. Process grade +information is not passed to the model since access to such data, from +on-chip measurement capability or manufacture time data, is platform +specific. + +Note: the significance of static power for CPUs in comparison to +dynamic power is highly dependent on implementation. Given the +potential complexity in implementation, the importance and accuracy of +its inclusion when using cpu cooling devices should be assessed on a +case by case basis. + diff --git a/Documentation/thermal/power_allocator.txt b/Documentation/thermal/power_allocator.txt new file mode 100644 index 0000000..c3797b5 --- /dev/null +++ b/Documentation/thermal/power_allocator.txt @@ -0,0 +1,247 @@ +Power allocator governor tunables +================================= + +Trip points +----------- + +The governor requires the following two passive trip points: + +1. "switch on" trip point: temperature above which the governor + control loop starts operating. This is the first passive trip + point of the thermal zone. + +2. "desired temperature" trip point: it should be higher than the + "switch on" trip point. This the target temperature the governor + is controlling for. This is the last passive trip point of the + thermal zone. + +PID Controller +-------------- + +The power allocator governor implements a +Proportional-Integral-Derivative controller (PID controller) with +temperature as the control input and power as the controlled output: + + P_max = k_p * e + k_i * err_integral + k_d * diff_err + sustainable_power + +where + e = desired_temperature - current_temperature + err_integral is the sum of previous errors + diff_err = e - previous_error + +It is similar to the one depicted below: + + k_d + | +current_temp | + | v + | +----------+ +---+ + | +----->| diff_err |-->| X |------+ + | | +----------+ +---+ | + | | | tdp actor + | | k_i | | get_requested_power() + | | | | | | | + | | | | | | | ... + v | v v v v v + +---+ | +-------+ +---+ +---+ +---+ +----------+ + | S |-------+----->| sum e |----->| X |--->| S |-->| S |-->|power | + +---+ | +-------+ +---+ +---+ +---+ |allocation| + ^ | ^ +----------+ + | | | | | + | | +---+ | | | + | +------->| X |-------------------+ v v + | +---+ granted performance +desired_temperature ^ + | + | + k_po/k_pu + +Sustainable power +----------------- + +An estimate of the sustainable dissipatable power (in mW) should be +provided while registering the thermal zone. This estimates the +sustained power that can be dissipated at the desired control +temperature. This is the maximum sustained power for allocation at +the desired maximum temperature. The actual sustained power can vary +for a number of reasons. The closed loop controller will take care of +variations such as environmental conditions, and some factors related +to the speed-grade of the silicon. `sustainable_power` is therefore +simply an estimate, and may be tuned to affect the aggressiveness of +the thermal ramp. For reference, the sustainable power of a 4" phone +is typically 2000mW, while on a 10" tablet is around 4500mW (may vary +depending on screen size). + +If you are using device tree, do add it as a property of the +thermal-zone. For example: + + thermal-zones { + soc_thermal { + polling-delay = <1000>; + polling-delay-passive = <100>; + sustainable-power = <2500>; + ... + +Instead, if the thermal zone is registered from the platform code, pass a +`thermal_zone_params` that has a `sustainable_power`. If no +`thermal_zone_params` were being passed, then something like below +will suffice: + + static const struct thermal_zone_params tz_params = { + .sustainable_power = 3500, + }; + +and then pass `tz_params` as the 5th parameter to +`thermal_zone_device_register()` + +k_po and k_pu +------------- + +The implementation of the PID controller in the power allocator +thermal governor allows the configuration of two proportional term +constants: `k_po` and `k_pu`. `k_po` is the proportional term +constant during temperature overshoot periods (current temperature is +above "desired temperature" trip point). Conversely, `k_pu` is the +proportional term constant during temperature undershoot periods +(current temperature below "desired temperature" trip point). + +These controls are intended as the primary mechanism for configuring +the permitted thermal "ramp" of the system. For instance, a lower +`k_pu` value will provide a slower ramp, at the cost of capping +available capacity at a low temperature. On the other hand, a high +value of `k_pu` will result in the governor granting very high power +whilst temperature is low, and may lead to temperature overshooting. + +The default value for `k_pu` is: + + 2 * sustainable_power / (desired_temperature - switch_on_temp) + +This means that at `switch_on_temp` the output of the controller's +proportional term will be 2 * `sustainable_power`. The default value +for `k_po` is: + + sustainable_power / (desired_temperature - switch_on_temp) + +Focusing on the proportional and feed forward values of the PID +controller equation we have: + + P_max = k_p * e + sustainable_power + +The proportional term is proportional to the difference between the +desired temperature and the current one. When the current temperature +is the desired one, then the proportional component is zero and +`P_max` = `sustainable_power`. That is, the system should operate in +thermal equilibrium under constant load. `sustainable_power` is only +an estimate, which is the reason for closed-loop control such as this. + +Expanding `k_pu` we get: + P_max = 2 * sustainable_power * (T_set - T) / (T_set - T_on) + + sustainable_power + +where + T_set is the desired temperature + T is the current temperature + T_on is the switch on temperature + +When the current temperature is the switch_on temperature, the above +formula becomes: + + P_max = 2 * sustainable_power * (T_set - T_on) / (T_set - T_on) + + sustainable_power = 2 * sustainable_power + sustainable_power = + 3 * sustainable_power + +Therefore, the proportional term alone linearly decreases power from +3 * `sustainable_power` to `sustainable_power` as the temperature +rises from the switch on temperature to the desired temperature. + +k_i and integral_cutoff +----------------------- + +`k_i` configures the PID loop's integral term constant. This term +allows the PID controller to compensate for long term drift and for +the quantized nature of the output control: cooling devices can't set +the exact power that the governor requests. When the temperature +error is below `integral_cutoff`, errors are accumulated in the +integral term. This term is then multiplied by `k_i` and the result +added to the output of the controller. Typically `k_i` is set low (1 +or 2) and `integral_cutoff` is 0. + +k_d +--- + +`k_d` configures the PID loop's derivative term constant. It's +recommended to leave it as the default: 0. + +Cooling device power API +======================== + +Cooling devices controlled by this governor must supply the additional +"power" API in their `cooling_device_ops`. It consists on three ops: + +1. int get_requested_power(struct thermal_cooling_device *cdev, + struct thermal_zone_device *tz, u32 *power); +@cdev: The `struct thermal_cooling_device` pointer +@tz: thermal zone in which we are currently operating +@power: pointer in which to store the calculated power + +`get_requested_power()` calculates the power requested by the device +in milliwatts and stores it in @power . It should return 0 on +success, -E* on failure. This is currently used by the power +allocator governor to calculate how much power to give to each cooling +device. + +2. int state2power(struct thermal_cooling_device *cdev, struct + thermal_zone_device *tz, unsigned long state, u32 *power); +@cdev: The `struct thermal_cooling_device` pointer +@tz: thermal zone in which we are currently operating +@state: A cooling device state +@power: pointer in which to store the equivalent power + +Convert cooling device state @state into power consumption in +milliwatts and store it in @power. It should return 0 on success, -E* +on failure. This is currently used by thermal core to calculate the +maximum power that an actor can consume. + +3. int power2state(struct thermal_cooling_device *cdev, u32 power, + unsigned long *state); +@cdev: The `struct thermal_cooling_device` pointer +@power: power in milliwatts +@state: pointer in which to store the resulting state + +Calculate a cooling device state that would make the device consume at +most @power mW and store it in @state. It should return 0 on success, +-E* on failure. This is currently used by the thermal core to convert +a given power set by the power allocator governor to a state that the +cooling device can set. It is a function because this conversion may +depend on external factors that may change so this function should the +best conversion given "current circumstances". + +Cooling device weights +---------------------- + +Weights are a mechanism to bias the allocation among cooling +devices. They express the relative power efficiency of different +cooling devices. Higher weight can be used to express higher power +efficiency. Weighting is relative such that if each cooling device +has a weight of one they are considered equal. This is particularly +useful in heterogeneous systems where two cooling devices may perform +the same kind of compute, but with different efficiency. For example, +a system with two different types of processors. + +If the thermal zone is registered using +`thermal_zone_device_register()` (i.e., platform code), then weights +are passed as part of the thermal zone's `thermal_bind_parameters`. +If the platform is registered using device tree, then they are passed +as the `contribution` property of each map in the `cooling-maps` node. + +Limitations of the power allocator governor +=========================================== + +The power allocator governor's PID controller works best if there is a +periodic tick. If you have a driver that calls +`thermal_zone_device_update()` (or anything that ends up calling the +governor's `throttle()` function) repetitively, the governor response +won't be very good. Note that this is not particular to this +governor, step-wise will also misbehave if you call its throttle() +faster than the normal thermal framework tick (due to interrupts for +example) as it will overreact. diff --git a/Documentation/thermal/sysfs-api.txt b/Documentation/thermal/sysfs-api.txt index 87519cb..c1f6864 100644 --- a/Documentation/thermal/sysfs-api.txt +++ b/Documentation/thermal/sysfs-api.txt @@ -95,7 +95,7 @@ temperature) and throttle appropriate devices. 1.3 interface for binding a thermal zone device with a thermal cooling device 1.3.1 int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz, int trip, struct thermal_cooling_device *cdev, - unsigned long upper, unsigned long lower); + unsigned long upper, unsigned long lower, unsigned int weight); This interface function bind a thermal cooling device to the certain trip point of a thermal zone device. @@ -110,6 +110,8 @@ temperature) and throttle appropriate devices. lower:the Minimum cooling state can be used for this trip point. THERMAL_NO_LIMIT means no lower limit, and the cooling device can be in cooling state 0. + weight: the influence of this cooling device in this thermal + zone. See 1.4.1 below for more information. 1.3.2 int thermal_zone_unbind_cooling_device(struct thermal_zone_device *tz, int trip, struct thermal_cooling_device *cdev); @@ -127,9 +129,15 @@ temperature) and throttle appropriate devices. This structure defines the following parameters that are used to bind a zone with a cooling device for a particular trip point. .cdev: The cooling device pointer - .weight: The 'influence' of a particular cooling device on this zone. - This is on a percentage scale. The sum of all these weights - (for a particular zone) cannot exceed 100. + .weight: The 'influence' of a particular cooling device on this + zone. This is relative to the rest of the cooling + devices. For example, if all cooling devices have a + weight of 1, then they all contribute the same. You can + use percentages if you want, but it's not mandatory. A + weight of 0 means that this cooling device doesn't + contribute to the cooling of this zone unless all cooling + devices have a weight of 0. If all weights are 0, then + they all contribute the same. .trip_mask:This is a bit mask that gives the binding relation between this thermal zone and cdev, for a particular trip point. If nth bit is set, then the cdev and thermal zone are bound @@ -176,6 +184,14 @@ Thermal zone device sys I/F, created once it's registered: |---trip_point_[0-*]_type: Trip point type |---trip_point_[0-*]_hyst: Hysteresis value for this trip point |---emul_temp: Emulated temperature set node + |---sustainable_power: Sustainable dissipatable power + |---k_po: Proportional term during temperature overshoot + |---k_pu: Proportional term during temperature undershoot + |---k_i: PID's integral term in the power allocator gov + |---k_d: PID's derivative term in the power allocator + |---integral_cutoff: Offset above which errors are accumulated + |---slope: Slope constant applied as linear extrapolation + |---offset: Offset constant applied as linear extrapolation Thermal cooling device sys I/F, created once it's registered: /sys/class/thermal/cooling_device[0-*]: @@ -192,6 +208,8 @@ thermal_zone_bind_cooling_device/thermal_zone_unbind_cooling_device. /sys/class/thermal/thermal_zone[0-*]: |---cdev[0-*]: [0-*]th cooling device in current thermal zone |---cdev[0-*]_trip_point: Trip point that cdev[0-*] is associated with + |---cdev[0-*]_weight: Influence of the cooling device in + this thermal zone Besides the thermal zone device sysfs I/F and cooling device sysfs I/F, the generic thermal driver also creates a hwmon sysfs I/F for each _type_ @@ -265,6 +283,14 @@ cdev[0-*]_trip_point point. RO, Optional +cdev[0-*]_weight + The influence of cdev[0-*] in this thermal zone. This value + is relative to the rest of cooling devices in the thermal + zone. For example, if a cooling device has a weight double + than that of other, it's twice as effective in cooling the + thermal zone. + RW, Optional + passive Attribute is only present for zones in which the passive cooling policy is not supported by native thermal driver. Default is zero @@ -289,6 +315,66 @@ emul_temp because userland can easily disable the thermal policy by simply flooding this sysfs node with low temperature values. +sustainable_power + An estimate of the sustained power that can be dissipated by + the thermal zone. Used by the power allocator governor. For + more information see Documentation/thermal/power_allocator.txt + Unit: milliwatts + RW, Optional + +k_po + The proportional term of the power allocator governor's PID + controller during temperature overshoot. Temperature overshoot + is when the current temperature is above the "desired + temperature" trip point. For more information see + Documentation/thermal/power_allocator.txt + RW, Optional + +k_pu + The proportional term of the power allocator governor's PID + controller during temperature undershoot. Temperature undershoot + is when the current temperature is below the "desired + temperature" trip point. For more information see + Documentation/thermal/power_allocator.txt + RW, Optional + +k_i + The integral term of the power allocator governor's PID + controller. This term allows the PID controller to compensate + for long term drift. For more information see + Documentation/thermal/power_allocator.txt + RW, Optional + +k_d + The derivative term of the power allocator governor's PID + controller. For more information see + Documentation/thermal/power_allocator.txt + RW, Optional + +integral_cutoff + Temperature offset from the desired temperature trip point + above which the integral term of the power allocator + governor's PID controller starts accumulating errors. For + example, if integral_cutoff is 0, then the integral term only + accumulates error when temperature is above the desired + temperature trip point. For more information see + Documentation/thermal/power_allocator.txt + RW, Optional + +slope + The slope constant used in a linear extrapolation model + to determine a hotspot temperature based off the sensor's + raw readings. It is up to the device driver to determine + the usage of these values. + RW, Optional + +offset + The offset constant used in a linear extrapolation model + to determine a hotspot temperature based off the sensor's + raw readings. It is up to the device driver to determine + the usage of these values. + RW, Optional + ***************************** * Cooling device attributes * ***************************** @@ -318,7 +404,8 @@ passive, active. If an ACPI thermal zone supports critical, passive, active[0] and active[1] at the same time, it may register itself as a thermal_zone_device (thermal_zone1) with 4 trip points in all. It has one processor and one fan, which are both registered as -thermal_cooling_device. +thermal_cooling_device. Both are considered to have the same +effectiveness in cooling the thermal zone. If the processor is listed in _PSL method, and the fan is listed in _AL0 method, the sys I/F structure will be built like this: @@ -340,8 +427,10 @@ method, the sys I/F structure will be built like this: |---trip_point_3_type: active1 |---cdev0: --->/sys/class/thermal/cooling_device0 |---cdev0_trip_point: 1 /* cdev0 can be used for passive */ + |---cdev0_weight: 1024 |---cdev1: --->/sys/class/thermal/cooling_device3 |---cdev1_trip_point: 2 /* cdev1 can be used for active[0]*/ + |---cdev1_weight: 1024 |cooling_device0: |---type: Processor diff --git a/drivers/acpi/thermal.c b/drivers/acpi/thermal.c index d24fa19..6d4e44e 100644 --- a/drivers/acpi/thermal.c +++ b/drivers/acpi/thermal.c @@ -800,7 +800,8 @@ static int acpi_thermal_cooling_device_cb(struct thermal_zone_device *thermal, result = thermal_zone_bind_cooling_device (thermal, trip, cdev, - THERMAL_NO_LIMIT, THERMAL_NO_LIMIT); + THERMAL_NO_LIMIT, THERMAL_NO_LIMIT, + THERMAL_WEIGHT_DEFAULT); else result = thermal_zone_unbind_cooling_device @@ -824,7 +825,8 @@ static int acpi_thermal_cooling_device_cb(struct thermal_zone_device *thermal, if (bind) result = thermal_zone_bind_cooling_device (thermal, trip, cdev, - THERMAL_NO_LIMIT, THERMAL_NO_LIMIT); + THERMAL_NO_LIMIT, THERMAL_NO_LIMIT, + THERMAL_WEIGHT_DEFAULT); else result = thermal_zone_unbind_cooling_device (thermal, trip, cdev); @@ -841,7 +843,8 @@ static int acpi_thermal_cooling_device_cb(struct thermal_zone_device *thermal, result = thermal_zone_bind_cooling_device (thermal, THERMAL_TRIPS_NONE, cdev, THERMAL_NO_LIMIT, - THERMAL_NO_LIMIT); + THERMAL_NO_LIMIT, + THERMAL_WEIGHT_DEFAULT); else result = thermal_zone_unbind_cooling_device (thermal, THERMAL_TRIPS_NONE, diff --git a/drivers/platform/x86/acerhdf.c b/drivers/platform/x86/acerhdf.c index 594c918..1ef02da 100644 --- a/drivers/platform/x86/acerhdf.c +++ b/drivers/platform/x86/acerhdf.c @@ -372,7 +372,8 @@ static int acerhdf_bind(struct thermal_zone_device *thermal, return 0; if (thermal_zone_bind_cooling_device(thermal, 0, cdev, - THERMAL_NO_LIMIT, THERMAL_NO_LIMIT)) { + THERMAL_NO_LIMIT, THERMAL_NO_LIMIT, + THERMAL_WEIGHT_DEFAULT)) { pr_err("error binding cooling dev\n"); return -EINVAL; } diff --git a/drivers/thermal/Kconfig b/drivers/thermal/Kconfig index af40db0..80e9c45 100644 --- a/drivers/thermal/Kconfig +++ b/drivers/thermal/Kconfig @@ -42,6 +42,17 @@ config THERMAL_OF Say 'Y' here if you need to build thermal infrastructure based on device tree. +config THERMAL_WRITABLE_TRIPS + bool "Enable writable trip points" + help + This option allows the system integrator to choose whether + trip temperatures can be changed from userspace. The + writable trips need to be specified when setting up the + thermal zone but the choice here takes precedence. + + Say 'Y' here if you would like to allow userspace tools to + change trip temperatures. + choice prompt "Default Thermal governor" default THERMAL_DEFAULT_GOV_STEP_WISE @@ -71,6 +82,14 @@ config THERMAL_DEFAULT_GOV_USER_SPACE Select this if you want to let the user space manage the platform thermals. +config THERMAL_DEFAULT_GOV_POWER_ALLOCATOR + bool "power_allocator" + select THERMAL_GOV_POWER_ALLOCATOR + help + Select this if you want to control temperature based on + system and device power allocation. This governor can only + operate on cooling devices that implement the power API. + endchoice config THERMAL_GOV_FAIR_SHARE @@ -99,6 +118,12 @@ config THERMAL_GOV_USER_SPACE help Enable this to let the user space manage the platform thermals. +config THERMAL_GOV_POWER_ALLOCATOR + bool "Power allocator thermal governor" + help + Enable this to manage platform thermals by dynamically + allocating and limiting power to devices. + config CPU_THERMAL bool "generic cpu cooling support" depends on CPU_FREQ @@ -136,6 +161,14 @@ config THERMAL_EMULATION because userland can easily disable the thermal policy by simply flooding this sysfs node with low temperature values. +config HISI_THERMAL + tristate "Hisilicon thermal driver" + depends on ARCH_HISI && CPU_THERMAL && OF + help + Enable this to plug hisilicon's thermal sensor driver into the Linux + thermal framework. cpufreq is used as the cooling device to throttle + CPUs when the passive trip is crossed. + config IMX_THERMAL tristate "Temperature sensor driver for Freescale i.MX SoCs" depends on CPU_THERMAL @@ -299,4 +332,15 @@ depends on ARCH_STI && OF source "drivers/thermal/st/Kconfig" endmenu +config QCOM_SPMI_TEMP_ALARM + tristate "Qualcomm SPMI PMIC Temperature Alarm" + depends on OF && SPMI && IIO + select REGMAP_SPMI + help + This enables a thermal sysfs driver for Qualcomm plug-and-play (QPNP) + PMIC devices. It shows up in sysfs as a thermal sensor with multiple + trip points. The temperature reported by the thermal sensor reflects the + real time die temperature if an ADC is present or an estimate of the + temperature based upon the over temperature stage value. + endif diff --git a/drivers/thermal/Makefile b/drivers/thermal/Makefile index fa0dc48..ff6422e 100644 --- a/drivers/thermal/Makefile +++ b/drivers/thermal/Makefile @@ -14,6 +14,7 @@ thermal_sys-$(CONFIG_THERMAL_GOV_FAIR_SHARE) += fair_share.o thermal_sys-$(CONFIG_THERMAL_GOV_BANG_BANG) += gov_bang_bang.o thermal_sys-$(CONFIG_THERMAL_GOV_STEP_WISE) += step_wise.o thermal_sys-$(CONFIG_THERMAL_GOV_USER_SPACE) += user_space.o +thermal_sys-$(CONFIG_THERMAL_GOV_POWER_ALLOCATOR) += power_allocator.o # cpufreq cooling thermal_sys-$(CONFIG_CPU_THERMAL) += cpu_cooling.o @@ -22,6 +23,7 @@ thermal_sys-$(CONFIG_CPU_THERMAL) += cpu_cooling.o thermal_sys-$(CONFIG_CLOCK_THERMAL) += clock_cooling.o # platform thermal drivers +obj-$(CONFIG_QCOM_SPMI_TEMP_ALARM) += qcom-spmi-temp-alarm.o obj-$(CONFIG_SPEAR_THERMAL) += spear_thermal.o obj-$(CONFIG_ROCKCHIP_THERMAL) += rockchip_thermal.o obj-$(CONFIG_RCAR_THERMAL) += rcar_thermal.o @@ -39,3 +41,4 @@ obj-$(CONFIG_TI_SOC_THERMAL) += ti-soc-thermal/ obj-$(CONFIG_INT340X_THERMAL) += int340x_thermal/ obj-$(CONFIG_ST_THERMAL) += st/ obj-$(CONFIG_TEGRA_SOCTHERM) += tegra_soctherm.o +obj-$(CONFIG_HISI_THERMAL) += hisi_thermal.o diff --git a/drivers/thermal/cpu_cooling.c b/drivers/thermal/cpu_cooling.c index f65f0d1..6509c61 100644 --- a/drivers/thermal/cpu_cooling.c +++ b/drivers/thermal/cpu_cooling.c @@ -26,10 +26,13 @@ #include <linux/thermal.h> #include <linux/cpufreq.h> #include <linux/err.h> +#include <linux/pm_opp.h> #include <linux/slab.h> #include <linux/cpu.h> #include <linux/cpu_cooling.h> +#include <trace/events/thermal.h> + /* * Cooling state <-> CPUFreq frequency * @@ -45,6 +48,19 @@ */ /** + * struct power_table - frequency to power conversion + * @frequency: frequency in KHz + * @power: power in mW + * + * This structure is built when the cooling device registers and helps + * in translating frequency to power and viceversa. + */ +struct power_table { + u32 frequency; + u32 power; +}; + +/** * struct cpufreq_cooling_device - data for cooling device with cpufreq * @id: unique integer value corresponding to each cpufreq_cooling_device * registered. @@ -58,6 +74,15 @@ * cpufreq frequencies. * @allowed_cpus: all the cpus involved for this cpufreq_cooling_device. * @node: list_head to link all cpufreq_cooling_device together. + * @last_load: load measured by the latest call to cpufreq_get_actual_power() + * @time_in_idle: previous reading of the absolute time that this cpu was idle + * @time_in_idle_timestamp: wall time of the last invocation of + * get_cpu_idle_time_us() + * @dyn_power_table: array of struct power_table for frequency to power + * conversion, sorted in ascending order. + * @dyn_power_table_entries: number of entries in the @dyn_power_table array + * @cpu_dev: the first cpu_device from @allowed_cpus that has OPPs registered + * @plat_get_static_power: callback to calculate the static power * * This structure is required for keeping information of each registered * cpufreq_cooling_device. @@ -71,6 +96,13 @@ struct cpufreq_cooling_device { unsigned int *freq_table; /* In descending order */ struct cpumask allowed_cpus; struct list_head node; + u32 last_load; + u64 *time_in_idle; + u64 *time_in_idle_timestamp; + struct power_table *dyn_power_table; + int dyn_power_table_entries; + struct device *cpu_dev; + get_static_t plat_get_static_power; }; static DEFINE_IDR(cpufreq_idr); static DEFINE_MUTEX(cooling_cpufreq_lock); @@ -186,23 +218,237 @@ static int cpufreq_thermal_notifier(struct notifier_block *nb, unsigned long max_freq = 0; struct cpufreq_cooling_device *cpufreq_dev; - if (event != CPUFREQ_ADJUST) - return 0; + switch (event) { - mutex_lock(&cooling_cpufreq_lock); - list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) { - if (!cpumask_test_cpu(policy->cpu, - &cpufreq_dev->allowed_cpus)) + case CPUFREQ_ADJUST: + mutex_lock(&cooling_cpufreq_lock); + list_for_each_entry(cpufreq_dev, &cpufreq_dev_list, node) { + if (!cpumask_test_cpu(policy->cpu, + &cpufreq_dev->allowed_cpus)) + continue; + + max_freq = cpufreq_dev->cpufreq_val; + + if (policy->max != max_freq) + cpufreq_verify_within_limits(policy, 0, + max_freq); + } + mutex_unlock(&cooling_cpufreq_lock); + break; + default: + return NOTIFY_DONE; + } + + return NOTIFY_OK; +} + +/** + * build_dyn_power_table() - create a dynamic power to frequency table + * @cpufreq_device: the cpufreq cooling device in which to store the table + * @capacitance: dynamic power coefficient for these cpus + * + * Build a dynamic power to frequency table for this cpu and store it + * in @cpufreq_device. This table will be used in cpu_power_to_freq() and + * cpu_freq_to_power() to convert between power and frequency + * efficiently. Power is stored in mW, frequency in KHz. The + * resulting table is in ascending order. + * + * Return: 0 on success, -E* on error. + */ +static int build_dyn_power_table(struct cpufreq_cooling_device *cpufreq_device, + u32 capacitance) +{ + struct power_table *power_table; + struct dev_pm_opp *opp; + struct device *dev = NULL; + int num_opps = 0, cpu, i, ret = 0; + unsigned long freq; + + rcu_read_lock(); + + for_each_cpu(cpu, &cpufreq_device->allowed_cpus) { + dev = get_cpu_device(cpu); + if (!dev) { + dev_warn(&cpufreq_device->cool_dev->device, + "No cpu device for cpu %d\n", cpu); continue; + } + + num_opps = dev_pm_opp_get_opp_count(dev); + if (num_opps > 0) { + break; + } else if (num_opps < 0) { + ret = num_opps; + goto unlock; + } + } - max_freq = cpufreq_dev->cpufreq_val; + if (num_opps == 0) { + ret = -EINVAL; + goto unlock; + } - if (policy->max != max_freq) - cpufreq_verify_within_limits(policy, 0, max_freq); + power_table = kcalloc(num_opps, sizeof(*power_table), GFP_KERNEL); + if (!power_table) { + ret = -ENOMEM; + goto unlock; } - mutex_unlock(&cooling_cpufreq_lock); - return 0; + for (freq = 0, i = 0; + opp = dev_pm_opp_find_freq_ceil(dev, &freq), !IS_ERR(opp); + freq++, i++) { + u32 freq_mhz, voltage_mv; + u64 power; + + freq_mhz = freq / 1000000; + voltage_mv = dev_pm_opp_get_voltage(opp) / 1000; + + /* + * Do the multiplication with MHz and millivolt so as + * to not overflow. + */ + power = (u64)capacitance * freq_mhz * voltage_mv * voltage_mv; + do_div(power, 1000000000); + + /* frequency is stored in power_table in KHz */ + power_table[i].frequency = freq / 1000; + + /* power is stored in mW */ + power_table[i].power = power; + } + + if (i == 0) { + ret = PTR_ERR(opp); + goto unlock; + } + + cpufreq_device->cpu_dev = dev; + cpufreq_device->dyn_power_table = power_table; + cpufreq_device->dyn_power_table_entries = i; + +unlock: + rcu_read_unlock(); + return ret; +} + +static u32 cpu_freq_to_power(struct cpufreq_cooling_device *cpufreq_device, + u32 freq) +{ + int i; + struct power_table *pt = cpufreq_device->dyn_power_table; + + for (i = 1; i < cpufreq_device->dyn_power_table_entries; i++) + if (freq < pt[i].frequency) + break; + + return pt[i - 1].power; +} + +static u32 cpu_power_to_freq(struct cpufreq_cooling_device *cpufreq_device, + u32 power) +{ + int i; + struct power_table *pt = cpufreq_device->dyn_power_table; + + for (i = 1; i < cpufreq_device->dyn_power_table_entries; i++) + if (power < pt[i].power) + break; + + return pt[i - 1].frequency; +} + +/** + * get_load() - get load for a cpu since last updated + * @cpufreq_device: &struct cpufreq_cooling_device for this cpu + * @cpu: cpu number + * + * Return: The average load of cpu @cpu in percentage since this + * function was last called. + */ +static u32 get_load(struct cpufreq_cooling_device *cpufreq_device, int cpu) +{ + u32 load; + u64 now, now_idle, delta_time, delta_idle; + + now_idle = get_cpu_idle_time(cpu, &now, 0); + delta_idle = now_idle - cpufreq_device->time_in_idle[cpu]; + delta_time = now - cpufreq_device->time_in_idle_timestamp[cpu]; + + if (delta_time <= delta_idle) + load = 0; + else + load = div64_u64(100 * (delta_time - delta_idle), delta_time); + + cpufreq_device->time_in_idle[cpu] = now_idle; + cpufreq_device->time_in_idle_timestamp[cpu] = now; + + return load; +} + +/** + * get_static_power() - calculate the static power consumed by the cpus + * @cpufreq_device: struct &cpufreq_cooling_device for this cpu cdev + * @tz: thermal zone device in which we're operating + * @freq: frequency in KHz + * @power: pointer in which to store the calculated static power + * + * Calculate the static power consumed by the cpus described by + * @cpu_actor running at frequency @freq. This function relies on a + * platform specific function that should have been provided when the + * actor was registered. If it wasn't, the static power is assumed to + * be negligible. The calculated static power is stored in @power. + * + * Return: 0 on success, -E* on failure. + */ +static int get_static_power(struct cpufreq_cooling_device *cpufreq_device, + struct thermal_zone_device *tz, unsigned long freq, + u32 *power) +{ + struct dev_pm_opp *opp; + unsigned long voltage; + struct cpumask *cpumask = &cpufreq_device->allowed_cpus; + unsigned long freq_hz = freq * 1000; + + if (!cpufreq_device->plat_get_static_power || + !cpufreq_device->cpu_dev) { + *power = 0; + return 0; + } + + rcu_read_lock(); + + opp = dev_pm_opp_find_freq_exact(cpufreq_device->cpu_dev, freq_hz, + true); + voltage = dev_pm_opp_get_voltage(opp); + + rcu_read_unlock(); + + if (voltage == 0) { + dev_warn_ratelimited(cpufreq_device->cpu_dev, + "Failed to get voltage for frequency %lu: %ld\n", + freq_hz, IS_ERR(opp) ? PTR_ERR(opp) : 0); + return -EINVAL; + } + + return cpufreq_device->plat_get_static_power(cpumask, tz->passive_delay, + voltage, power); +} + +/** + * get_dynamic_power() - calculate the dynamic power + * @cpufreq_device: &cpufreq_cooling_device for this cdev + * @freq: current frequency + * + * Return: the dynamic power consumed by the cpus described by + * @cpufreq_device. + */ +static u32 get_dynamic_power(struct cpufreq_cooling_device *cpufreq_device, + unsigned long freq) +{ + u32 raw_cpu_power; + + raw_cpu_power = cpu_freq_to_power(cpufreq_device, freq); + return (raw_cpu_power * cpufreq_device->last_load) / 100; } /* cpufreq cooling device callback functions are defined below */ @@ -280,8 +526,205 @@ static int cpufreq_set_cur_state(struct thermal_cooling_device *cdev, return 0; } +/** + * cpufreq_get_requested_power() - get the current power + * @cdev: &thermal_cooling_device pointer + * @tz: a valid thermal zone device pointer + * @power: pointer in which to store the resulting power + * + * Calculate the current power consumption of the cpus in milliwatts + * and store it in @power. This function should actually calculate + * the requested power, but it's hard to get the frequency that + * cpufreq would have assigned if there were no thermal limits. + * Instead, we calculate the current power on the assumption that the + * immediate future will look like the immediate past. + * + * We use the current frequency and the average load since this + * function was last called. In reality, there could have been + * multiple opps since this function was last called and that affects + * the load calculation. While it's not perfectly accurate, this + * simplification is good enough and works. REVISIT this, as more + * complex code may be needed if experiments show that it's not + * accurate enough. + * + * Return: 0 on success, -E* if getting the static power failed. + */ +static int cpufreq_get_requested_power(struct thermal_cooling_device *cdev, + struct thermal_zone_device *tz, + u32 *power) +{ + unsigned long freq; + int i = 0, cpu, ret; + u32 static_power, dynamic_power, total_load = 0; + struct cpufreq_cooling_device *cpufreq_device = cdev->devdata; + u32 *load_cpu = NULL; + + cpu = cpumask_any_and(&cpufreq_device->allowed_cpus, cpu_online_mask); + + /* + * All the CPUs are offline, thus the requested power by + * the cdev is 0 + */ + if (cpu >= nr_cpu_ids) { + *power = 0; + return 0; + } + + freq = cpufreq_quick_get(cpu); + + if (trace_thermal_power_cpu_get_power_enabled()) { + u32 ncpus = cpumask_weight(&cpufreq_device->allowed_cpus); + + load_cpu = devm_kcalloc(&cdev->device, ncpus, sizeof(*load_cpu), + GFP_KERNEL); + } + + for_each_cpu(cpu, &cpufreq_device->allowed_cpus) { + u32 load; + + if (cpu_online(cpu)) + load = get_load(cpufreq_device, cpu); + else + load = 0; + + total_load += load; + if (trace_thermal_power_cpu_limit_enabled() && load_cpu) + load_cpu[i] = load; + + i++; + } + + cpufreq_device->last_load = total_load; + + dynamic_power = get_dynamic_power(cpufreq_device, freq); + ret = get_static_power(cpufreq_device, tz, freq, &static_power); + if (ret) { + if (load_cpu) + devm_kfree(&cdev->device, load_cpu); + return ret; + } + + if (load_cpu) { + trace_thermal_power_cpu_get_power( + &cpufreq_device->allowed_cpus, + freq, load_cpu, i, dynamic_power, static_power); + + devm_kfree(&cdev->device, load_cpu); + } + + *power = static_power + dynamic_power; + return 0; +} + +/** + * cpufreq_state2power() - convert a cpu cdev state to power consumed + * @cdev: &thermal_cooling_device pointer + * @tz: a valid thermal zone device pointer + * @state: cooling device state to be converted + * @power: pointer in which to store the resulting power + * + * Convert cooling device state @state into power consumption in + * milliwatts assuming 100% load. Store the calculated power in + * @power. + * + * Return: 0 on success, -EINVAL if the cooling device state could not + * be converted into a frequency or other -E* if there was an error + * when calculating the static power. + */ +static int cpufreq_state2power(struct thermal_cooling_device *cdev, + struct thermal_zone_device *tz, + unsigned long state, u32 *power) +{ + unsigned int freq, num_cpus; + cpumask_t cpumask; + u32 static_power, dynamic_power; + int ret; + struct cpufreq_cooling_device *cpufreq_device = cdev->devdata; + + cpumask_and(&cpumask, &cpufreq_device->allowed_cpus, cpu_online_mask); + num_cpus = cpumask_weight(&cpumask); + + /* None of our cpus are online, so no power */ + if (num_cpus == 0) { + *power = 0; + return 0; + } + + freq = cpufreq_device->freq_table[state]; + if (!freq) + return -EINVAL; + + dynamic_power = cpu_freq_to_power(cpufreq_device, freq) * num_cpus; + ret = get_static_power(cpufreq_device, tz, freq, &static_power); + if (ret) + return ret; + + *power = static_power + dynamic_power; + return 0; +} + +/** + * cpufreq_power2state() - convert power to a cooling device state + * @cdev: &thermal_cooling_device pointer + * @tz: a valid thermal zone device pointer + * @power: power in milliwatts to be converted + * @state: pointer in which to store the resulting state + * + * Calculate a cooling device state for the cpus described by @cdev + * that would allow them to consume at most @power mW and store it in + * @state. Note that this calculation depends on external factors + * such as the cpu load or the current static power. Calling this + * function with the same power as input can yield different cooling + * device states depending on those external factors. + * + * Return: 0 on success, -ENODEV if no cpus are online or -EINVAL if + * the calculated frequency could not be converted to a valid state. + * The latter should not happen unless the frequencies available to + * cpufreq have changed since the initialization of the cpu cooling + * device. + */ +static int cpufreq_power2state(struct thermal_cooling_device *cdev, + struct thermal_zone_device *tz, u32 power, + unsigned long *state) +{ + unsigned int cpu, cur_freq, target_freq; + int ret; + s32 dyn_power; + u32 last_load, normalised_power, static_power; + struct cpufreq_cooling_device *cpufreq_device = cdev->devdata; + + cpu = cpumask_any_and(&cpufreq_device->allowed_cpus, cpu_online_mask); + + /* None of our cpus are online */ + if (cpu >= nr_cpu_ids) + return -ENODEV; + + cur_freq = cpufreq_quick_get(cpu); + ret = get_static_power(cpufreq_device, tz, cur_freq, &static_power); + if (ret) + return ret; + + dyn_power = power - static_power; + dyn_power = dyn_power > 0 ? dyn_power : 0; + last_load = cpufreq_device->last_load ?: 1; + normalised_power = (dyn_power * 100) / last_load; + target_freq = cpu_power_to_freq(cpufreq_device, normalised_power); + + *state = cpufreq_cooling_get_level(cpu, target_freq); + if (*state == THERMAL_CSTATE_INVALID) { + dev_warn_ratelimited(&cdev->device, + "Failed to convert %dKHz for cpu %d into a cdev state\n", + target_freq, cpu); + return -EINVAL; + } + + trace_thermal_power_cpu_limit(&cpufreq_device->allowed_cpus, + target_freq, *state, power); + return 0; +} + /* Bind cpufreq callbacks to thermal cooling device ops */ -static struct thermal_cooling_device_ops const cpufreq_cooling_ops = { +static struct thermal_cooling_device_ops cpufreq_cooling_ops = { .get_max_state = cpufreq_get_max_state, .get_cur_state = cpufreq_get_cur_state, .set_cur_state = cpufreq_set_cur_state, @@ -311,6 +754,9 @@ static unsigned int find_next_max(struct cpufreq_frequency_table *table, * @np: a valid struct device_node to the cooling device device tree node * @clip_cpus: cpumask of cpus where the frequency constraints will happen. * Normally this should be same as cpufreq policy->related_cpus. + * @capacitance: dynamic power coefficient for these cpus + * @plat_static_func: function to calculate the static power consumed by these + * cpus (optional) * * This interface function registers the cpufreq cooling device with the name * "thermal-cpufreq-%x". This api can support multiple instances of cpufreq @@ -322,13 +768,14 @@ static unsigned int find_next_max(struct cpufreq_frequency_table *table, */ static struct thermal_cooling_device * __cpufreq_cooling_register(struct device_node *np, - const struct cpumask *clip_cpus) + const struct cpumask *clip_cpus, u32 capacitance, + get_static_t plat_static_func) { struct thermal_cooling_device *cool_dev; struct cpufreq_cooling_device *cpufreq_dev; char dev_name[THERMAL_NAME_LENGTH]; struct cpufreq_frequency_table *pos, *table; - unsigned int freq, i; + unsigned int freq, i, num_cpus; int ret; table = cpufreq_frequency_get_table(cpumask_first(clip_cpus)); @@ -341,6 +788,23 @@ __cpufreq_cooling_register(struct device_node *np, if (!cpufreq_dev) return ERR_PTR(-ENOMEM); + num_cpus = cpumask_weight(clip_cpus); + cpufreq_dev->time_in_idle = kcalloc(num_cpus, + sizeof(*cpufreq_dev->time_in_idle), + GFP_KERNEL); + if (!cpufreq_dev->time_in_idle) { + cool_dev = ERR_PTR(-ENOMEM); + goto free_cdev; + } + + cpufreq_dev->time_in_idle_timestamp = + kcalloc(num_cpus, sizeof(*cpufreq_dev->time_in_idle_timestamp), + GFP_KERNEL); + if (!cpufreq_dev->time_in_idle_timestamp) { + cool_dev = ERR_PTR(-ENOMEM); + goto free_time_in_idle; + } + /* Find max levels */ cpufreq_for_each_valid_entry(pos, table) cpufreq_dev->max_level++; @@ -349,7 +813,7 @@ __cpufreq_cooling_register(struct device_node *np, cpufreq_dev->max_level, GFP_KERNEL); if (!cpufreq_dev->freq_table) { cool_dev = ERR_PTR(-ENOMEM); - goto free_cdev; + goto free_time_in_idle_timestamp; } /* max_level is an index, not a counter */ @@ -357,6 +821,20 @@ __cpufreq_cooling_register(struct device_node *np, cpumask_copy(&cpufreq_dev->allowed_cpus, clip_cpus); + if (capacitance) { + cpufreq_cooling_ops.get_requested_power = + cpufreq_get_requested_power; + cpufreq_cooling_ops.state2power = cpufreq_state2power; + cpufreq_cooling_ops.power2state = cpufreq_power2state; + cpufreq_dev->plat_get_static_power = plat_static_func; + + ret = build_dyn_power_table(cpufreq_dev, capacitance); + if (ret) { + cool_dev = ERR_PTR(ret); + goto free_table; + } + } + ret = get_idr(&cpufreq_idr, &cpufreq_dev->id); if (ret) { cool_dev = ERR_PTR(ret); @@ -402,6 +880,10 @@ remove_idr: release_idr(&cpufreq_idr, cpufreq_dev->id); free_table: kfree(cpufreq_dev->freq_table); +free_time_in_idle_timestamp: + kfree(cpufreq_dev->time_in_idle_timestamp); +free_time_in_idle: + kfree(cpufreq_dev->time_in_idle); free_cdev: kfree(cpufreq_dev); @@ -422,7 +904,7 @@ free_cdev: struct thermal_cooling_device * cpufreq_cooling_register(const struct cpumask *clip_cpus) { - return __cpufreq_cooling_register(NULL, clip_cpus); + return __cpufreq_cooling_register(NULL, clip_cpus, 0, NULL); } EXPORT_SYMBOL_GPL(cpufreq_cooling_register); @@ -446,11 +928,78 @@ of_cpufreq_cooling_register(struct device_node *np, if (!np) return ERR_PTR(-EINVAL); - return __cpufreq_cooling_register(np, clip_cpus); + return __cpufreq_cooling_register(np, clip_cpus, 0, NULL); } EXPORT_SYMBOL_GPL(of_cpufreq_cooling_register); /** + * cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions + * @clip_cpus: cpumask of cpus where the frequency constraints will happen + * @capacitance: dynamic power coefficient for these cpus + * @plat_static_func: function to calculate the static power consumed by these + * cpus (optional) + * + * This interface function registers the cpufreq cooling device with + * the name "thermal-cpufreq-%x". This api can support multiple + * instances of cpufreq cooling devices. Using this function, the + * cooling device will implement the power extensions by using a + * simple cpu power model. The cpus must have registered their OPPs + * using the OPP library. + * + * An optional @plat_static_func may be provided to calculate the + * static power consumed by these cpus. If the platform's static + * power consumption is unknown or negligible, make it NULL. + * + * Return: a valid struct thermal_cooling_device pointer on success, + * on failure, it returns a corresponding ERR_PTR(). + */ +struct thermal_cooling_device * +cpufreq_power_cooling_register(const struct cpumask *clip_cpus, u32 capacitance, + get_static_t plat_static_func) +{ + return __cpufreq_cooling_register(NULL, clip_cpus, capacitance, + plat_static_func); +} +EXPORT_SYMBOL(cpufreq_power_cooling_register); + +/** + * of_cpufreq_power_cooling_register() - create cpufreq cooling device with power extensions + * @np: a valid struct device_node to the cooling device device tree node + * @clip_cpus: cpumask of cpus where the frequency constraints will happen + * @capacitance: dynamic power coefficient for these cpus + * @plat_static_func: function to calculate the static power consumed by these + * cpus (optional) + * + * This interface function registers the cpufreq cooling device with + * the name "thermal-cpufreq-%x". This api can support multiple + * instances of cpufreq cooling devices. Using this API, the cpufreq + * cooling device will be linked to the device tree node provided. + * Using this function, the cooling device will implement the power + * extensions by using a simple cpu power model. The cpus must have + * registered their OPPs using the OPP library. + * + * An optional @plat_static_func may be provided to calculate the + * static power consumed by these cpus. If the platform's static + * power consumption is unknown or negligible, make it NULL. + * + * Return: a valid struct thermal_cooling_device pointer on success, + * on failure, it returns a corresponding ERR_PTR(). + */ +struct thermal_cooling_device * +of_cpufreq_power_cooling_register(struct device_node *np, + const struct cpumask *clip_cpus, + u32 capacitance, + get_static_t plat_static_func) +{ + if (!np) + return ERR_PTR(-EINVAL); + + return __cpufreq_cooling_register(np, clip_cpus, capacitance, + plat_static_func); +} +EXPORT_SYMBOL(of_cpufreq_power_cooling_register); + +/** * cpufreq_cooling_unregister - function to remove cpufreq cooling device. * @cdev: thermal cooling device pointer. * @@ -475,6 +1024,8 @@ void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev) thermal_cooling_device_unregister(cpufreq_dev->cool_dev); release_idr(&cpufreq_idr, cpufreq_dev->id); + kfree(cpufreq_dev->time_in_idle_timestamp); + kfree(cpufreq_dev->time_in_idle); kfree(cpufreq_dev->freq_table); kfree(cpufreq_dev); } diff --git a/drivers/thermal/db8500_thermal.c b/drivers/thermal/db8500_thermal.c index 20adfbe..2fb273c 100644 --- a/drivers/thermal/db8500_thermal.c +++ b/drivers/thermal/db8500_thermal.c @@ -76,7 +76,7 @@ static int db8500_cdev_bind(struct thermal_zone_device *thermal, upper = lower = i > max_state ? max_state : i; ret = thermal_zone_bind_cooling_device(thermal, i, cdev, - upper, lower); + upper, lower, THERMAL_WEIGHT_DEFAULT); dev_info(&cdev->device, "%s bind to %d: %d-%s\n", cdev->type, i, ret, ret ? "fail" : "succeed"); diff --git a/drivers/thermal/fair_share.c b/drivers/thermal/fair_share.c index 6e0a3fb..c2c10bb 100644 --- a/drivers/thermal/fair_share.c +++ b/drivers/thermal/fair_share.c @@ -59,17 +59,17 @@ static int get_trip_level(struct thermal_zone_device *tz) } static long get_target_state(struct thermal_zone_device *tz, - struct thermal_cooling_device *cdev, int weight, int level) + struct thermal_cooling_device *cdev, int percentage, int level) { unsigned long max_state; cdev->ops->get_max_state(cdev, &max_state); - return (long)(weight * level * max_state) / (100 * tz->trips); + return (long)(percentage * level * max_state) / (100 * tz->trips); } /** - * fair_share_throttle - throttles devices asscciated with the given zone + * fair_share_throttle - throttles devices associated with the given zone * @tz - thermal_zone_device * * Throttling Logic: This uses three parameters to calculate the new @@ -77,7 +77,7 @@ static long get_target_state(struct thermal_zone_device *tz, * * Parameters used for Throttling: * P1. max_state: Maximum throttle state exposed by the cooling device. - * P2. weight[i]/100: + * P2. percentage[i]/100: * How 'effective' the 'i'th device is, in cooling the given zone. * P3. cur_trip_level/max_no_of_trips: * This describes the extent to which the devices should be throttled. @@ -88,28 +88,33 @@ static long get_target_state(struct thermal_zone_device *tz, */ static int fair_share_throttle(struct thermal_zone_device *tz, int trip) { - const struct thermal_zone_params *tzp; - struct thermal_cooling_device *cdev; struct thermal_instance *instance; - int i; + int total_weight = 0; + int total_instance = 0; int cur_trip_level = get_trip_level(tz); - if (!tz->tzp || !tz->tzp->tbp) - return -EINVAL; + list_for_each_entry(instance, &tz->thermal_instances, tz_node) { + if (instance->trip != trip) + continue; + + total_weight += instance->weight; + total_instance++; + } - tzp = tz->tzp; + list_for_each_entry(instance, &tz->thermal_instances, tz_node) { + int percentage; + struct thermal_cooling_device *cdev = instance->cdev; - for (i = 0; i < tzp->num_tbps; i++) { - if (!tzp->tbp[i].cdev) + if (instance->trip != trip) continue; - cdev = tzp->tbp[i].cdev; - instance = get_thermal_instance(tz, cdev, trip); - if (!instance) - continue; + if (!total_weight) + percentage = 100 / total_instance; + else + percentage = (instance->weight * 100) / total_weight; - instance->target = get_target_state(tz, cdev, - tzp->tbp[i].weight, cur_trip_level); + instance->target = get_target_state(tz, cdev, percentage, + cur_trip_level); instance->cdev->updated = false; thermal_cdev_update(cdev); diff --git a/drivers/thermal/hisi_thermal.c b/drivers/thermal/hisi_thermal.c new file mode 100644 index 0000000..d5dd357 --- /dev/null +++ b/drivers/thermal/hisi_thermal.c @@ -0,0 +1,421 @@ +/* + * Hisilicon thermal sensor driver + * + * Copyright (c) 2014-2015 Hisilicon Limited. + * Copyright (c) 2014-2015 Linaro Limited. + * + * Xinwei Kong <kong.kongxinwei@hisilicon.com> + * Leo Yan <leo.yan@linaro.org> + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/cpufreq.h> +#include <linux/delay.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/platform_device.h> +#include <linux/io.h> + +#include "thermal_core.h" + +#define TEMP0_TH (0x4) +#define TEMP0_RST_TH (0x8) +#define TEMP0_CFG (0xC) +#define TEMP0_EN (0x10) +#define TEMP0_INT_EN (0x14) +#define TEMP0_INT_CLR (0x18) +#define TEMP0_RST_MSK (0x1C) +#define TEMP0_VALUE (0x28) + +#define HISI_TEMP_BASE (-60) +#define HISI_TEMP_RESET (100000) + +#define HISI_MAX_SENSORS 4 + +struct hisi_thermal_sensor { + struct hisi_thermal_data *thermal; + struct thermal_zone_device *tzd; + + long sensor_temp; + uint32_t id; + uint32_t thres_temp; +}; + +struct hisi_thermal_data { + struct mutex thermal_lock; /* protects register data */ + struct platform_device *pdev; + struct clk *clk; + struct hisi_thermal_sensor sensors[HISI_MAX_SENSORS]; + + int irq, irq_bind_sensor; + bool irq_enabled; + + void __iomem *regs; +}; + +/* in millicelsius */ +static inline int _step_to_temp(int step) +{ + /* + * Every step equals (1 * 200) / 255 celsius, and finally + * need convert to millicelsius. + */ + return (HISI_TEMP_BASE + (step * 200 / 255)) * 1000; +} + +static inline long _temp_to_step(long temp) +{ + return ((temp / 1000 - HISI_TEMP_BASE) * 255 / 200); +} + +static long hisi_thermal_get_sensor_temp(struct hisi_thermal_data *data, + struct hisi_thermal_sensor *sensor) +{ + long val; + + mutex_lock(&data->thermal_lock); + + /* disable interrupt */ + writel(0x0, data->regs + TEMP0_INT_EN); + writel(0x1, data->regs + TEMP0_INT_CLR); + + /* disable module firstly */ + writel(0x0, data->regs + TEMP0_EN); + + /* select sensor id */ + writel((sensor->id << 12), data->regs + TEMP0_CFG); + + /* enable module */ + writel(0x1, data->regs + TEMP0_EN); + + usleep_range(3000, 5000); + + val = readl(data->regs + TEMP0_VALUE); + val = _step_to_temp(val); + + mutex_unlock(&data->thermal_lock); + + return val; +} + +static void hisi_thermal_enable_bind_irq_sensor + (struct hisi_thermal_data *data) +{ + struct hisi_thermal_sensor *sensor; + + mutex_lock(&data->thermal_lock); + + sensor = &data->sensors[data->irq_bind_sensor]; + + /* setting the hdak time */ + writel(0x0, data->regs + TEMP0_CFG); + + /* disable module firstly */ + writel(0x0, data->regs + TEMP0_RST_MSK); + writel(0x0, data->regs + TEMP0_EN); + + /* select sensor id */ + writel((sensor->id << 12), data->regs + TEMP0_CFG); + + /* enable for interrupt */ + writel(_temp_to_step(sensor->thres_temp) | 0x0FFFFFF00, + data->regs + TEMP0_TH); + + writel(_temp_to_step(HISI_TEMP_RESET), data->regs + TEMP0_RST_TH); + + /* enable module */ + writel(0x1, data->regs + TEMP0_RST_MSK); + writel(0x1, data->regs + TEMP0_EN); + + writel(0x0, data->regs + TEMP0_INT_CLR); + writel(0x1, data->regs + TEMP0_INT_EN); + + usleep_range(3000, 5000); + + mutex_unlock(&data->thermal_lock); +} + +static void hisi_thermal_disable_sensor(struct hisi_thermal_data *data) +{ + mutex_lock(&data->thermal_lock); + + /* disable sensor module */ + writel(0x0, data->regs + TEMP0_INT_EN); + writel(0x0, data->regs + TEMP0_RST_MSK); + writel(0x0, data->regs + TEMP0_EN); + + mutex_unlock(&data->thermal_lock); +} + +static int hisi_thermal_get_temp(void *_sensor, long *temp) +{ + struct hisi_thermal_sensor *sensor = _sensor; + struct hisi_thermal_data *data = sensor->thermal; + + int sensor_id = 0, i; + long max_temp = 0; + + *temp = hisi_thermal_get_sensor_temp(data, sensor); + + sensor->sensor_temp = *temp; + + for (i = 0; i < HISI_MAX_SENSORS; i++) { + if (data->sensors[i].sensor_temp >= max_temp) { + max_temp = data->sensors[i].sensor_temp; + sensor_id = i; + } + } + + mutex_lock(&data->thermal_lock); + data->irq_bind_sensor = sensor_id; + mutex_unlock(&data->thermal_lock); + + dev_dbg(&data->pdev->dev, "id=%d, irq=%d, temp=%ld, thres=%d\n", + sensor->id, data->irq_enabled, *temp, sensor->thres_temp); + /* + * Bind irq to sensor for two cases: + * Reenable alarm IRQ if temperature below threshold; + * if irq has been enabled, always set it; + */ + if (data->irq_enabled) { + hisi_thermal_enable_bind_irq_sensor(data); + return 0; + } + + if (max_temp < sensor->thres_temp) { + data->irq_enabled = true; + hisi_thermal_enable_bind_irq_sensor(data); + enable_irq(data->irq); + } + + return 0; +} + +static struct thermal_zone_of_device_ops hisi_of_thermal_ops = { + .get_temp = hisi_thermal_get_temp, +}; + +static irqreturn_t hisi_thermal_alarm_irq(int irq, void *dev) +{ + struct hisi_thermal_data *data = dev; + + disable_irq_nosync(irq); + data->irq_enabled = false; + + return IRQ_WAKE_THREAD; +} + +static irqreturn_t hisi_thermal_alarm_irq_thread(int irq, void *dev) +{ + struct hisi_thermal_data *data = dev; + struct hisi_thermal_sensor *sensor; + int i; + + mutex_lock(&data->thermal_lock); + sensor = &data->sensors[data->irq_bind_sensor]; + + dev_crit(&data->pdev->dev, "THERMAL ALARM: T > %d\n", + sensor->thres_temp / 1000); + mutex_unlock(&data->thermal_lock); + + for (i = 0; i < HISI_MAX_SENSORS; i++) + thermal_zone_device_update(data->sensors[i].tzd); + + return IRQ_HANDLED; +} + +static int hisi_thermal_register_sensor(struct platform_device *pdev, + struct hisi_thermal_data *data, + struct hisi_thermal_sensor *sensor, + int index) +{ + int ret, i; + const struct thermal_trip *trip; + + sensor->id = index; + sensor->thermal = data; + + sensor->tzd = thermal_zone_of_sensor_register(&pdev->dev, sensor->id, + sensor, &hisi_of_thermal_ops); + if (IS_ERR(sensor->tzd)) { + ret = PTR_ERR(sensor->tzd); + dev_err(&pdev->dev, "failed to register sensor id %d: %d\n", + sensor->id, ret); + return ret; + } + + trip = of_thermal_get_trip_points(sensor->tzd); + + for (i = 0; i < of_thermal_get_ntrips(sensor->tzd); i++) { + if (trip[i].type == THERMAL_TRIP_PASSIVE) { + sensor->thres_temp = trip[i].temperature; + break; + } + } + + return 0; +} + +static const struct of_device_id of_hisi_thermal_match[] = { + { .compatible = "hisilicon,tsensor" }, + { /* end */ } +}; +MODULE_DEVICE_TABLE(of, of_hisi_thermal_match); + +static void hisi_thermal_toggle_sensor(struct hisi_thermal_sensor *sensor, + bool on) +{ + struct thermal_zone_device *tzd = sensor->tzd; + + tzd->ops->set_mode(tzd, + on ? THERMAL_DEVICE_ENABLED : THERMAL_DEVICE_DISABLED); +} + +static int hisi_thermal_probe(struct platform_device *pdev) +{ + struct hisi_thermal_data *data; + struct resource *res; + int i; + int ret; + + data = devm_kzalloc(&pdev->dev, sizeof(*data), GFP_KERNEL); + if (!data) + return -ENOMEM; + + mutex_init(&data->thermal_lock); + data->pdev = pdev; + + res = platform_get_resource(pdev, IORESOURCE_MEM, 0); + data->regs = devm_ioremap_resource(&pdev->dev, res); + if (IS_ERR(data->regs)) { + dev_err(&pdev->dev, "failed to get io address\n"); + return PTR_ERR(data->regs); + } + + data->irq = platform_get_irq(pdev, 0); + if (data->irq < 0) + return data->irq; + + ret = devm_request_threaded_irq(&pdev->dev, data->irq, + hisi_thermal_alarm_irq, + hisi_thermal_alarm_irq_thread, + 0, "hisi_thermal", data); + if (ret < 0) { + dev_err(&pdev->dev, "failed to request alarm irq: %d\n", ret); + return ret; + } + + platform_set_drvdata(pdev, data); + + data->clk = devm_clk_get(&pdev->dev, "thermal_clk"); + if (IS_ERR(data->clk)) { + ret = PTR_ERR(data->clk); + if (ret != -EPROBE_DEFER) + dev_err(&pdev->dev, + "failed to get thermal clk: %d\n", ret); + return ret; + } + + /* enable clock for thermal */ + ret = clk_prepare_enable(data->clk); + if (ret) { + dev_err(&pdev->dev, "failed to enable thermal clk: %d\n", ret); + return ret; + } + + for (i = 0; i < HISI_MAX_SENSORS; ++i) { + ret = hisi_thermal_register_sensor(pdev, data, + &data->sensors[i], i); + if (ret) { + dev_err(&pdev->dev, + "failed to register thermal sensor: %d\n", ret); + goto err_get_sensor_data; + } + } + + hisi_thermal_enable_bind_irq_sensor(data); + data->irq_enabled = true; + + for (i = 0; i < HISI_MAX_SENSORS; i++) + hisi_thermal_toggle_sensor(&data->sensors[i], true); + + return 0; + +err_get_sensor_data: + clk_disable_unprepare(data->clk); + + return ret; +} + +static int hisi_thermal_remove(struct platform_device *pdev) +{ + struct hisi_thermal_data *data = platform_get_drvdata(pdev); + int i; + + for (i = 0; i < HISI_MAX_SENSORS; i++) { + struct hisi_thermal_sensor *sensor = &data->sensors[i]; + + hisi_thermal_toggle_sensor(sensor, false); + thermal_zone_of_sensor_unregister(&pdev->dev, sensor->tzd); + } + + hisi_thermal_disable_sensor(data); + clk_disable_unprepare(data->clk); + + return 0; +} + +#ifdef CONFIG_PM_SLEEP +static int hisi_thermal_suspend(struct device *dev) +{ + struct hisi_thermal_data *data = dev_get_drvdata(dev); + + hisi_thermal_disable_sensor(data); + data->irq_enabled = false; + + clk_disable_unprepare(data->clk); + + return 0; +} + +static int hisi_thermal_resume(struct device *dev) +{ + struct hisi_thermal_data *data = dev_get_drvdata(dev); + + clk_prepare_enable(data->clk); + + data->irq_enabled = true; + hisi_thermal_enable_bind_irq_sensor(data); + + return 0; +} +#endif + +static SIMPLE_DEV_PM_OPS(hisi_thermal_pm_ops, + hisi_thermal_suspend, hisi_thermal_resume); + +static struct platform_driver hisi_thermal_driver = { + .driver = { + .name = "hisi_thermal", + .owner = THIS_MODULE, + .pm = &hisi_thermal_pm_ops, + .of_match_table = of_hisi_thermal_match, + }, + .probe = hisi_thermal_probe, + .remove = hisi_thermal_remove, +}; + +module_platform_driver(hisi_thermal_driver); + +MODULE_AUTHOR("Xinwei Kong <kong.kongxinwei@hisilicon.com>"); +MODULE_AUTHOR("Leo Yan <leo.yan@linaro.org>"); +MODULE_DESCRIPTION("Hisilicon thermal driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/thermal/imx_thermal.c b/drivers/thermal/imx_thermal.c index 2ccbc07..fde4c28 100644 --- a/drivers/thermal/imx_thermal.c +++ b/drivers/thermal/imx_thermal.c @@ -306,7 +306,8 @@ static int imx_bind(struct thermal_zone_device *tz, ret = thermal_zone_bind_cooling_device(tz, IMX_TRIP_PASSIVE, cdev, THERMAL_NO_LIMIT, - THERMAL_NO_LIMIT); + THERMAL_NO_LIMIT, + THERMAL_WEIGHT_DEFAULT); if (ret) { dev_err(&tz->device, "binding zone %s with cdev %s failed:%d\n", diff --git a/drivers/thermal/of-thermal.c b/drivers/thermal/of-thermal.c index 668fb1b..b295b2b 100644 --- a/drivers/thermal/of-thermal.c +++ b/drivers/thermal/of-thermal.c @@ -58,6 +58,8 @@ struct __thermal_bind_params { * @mode: current thermal zone device mode (enabled/disabled) * @passive_delay: polling interval while passive cooling is activated * @polling_delay: zone polling interval + * @slope: slope of the temperature adjustment curve + * @offset: offset of the temperature adjustment curve * @ntrips: number of trip points * @trips: an array of trip points (0..ntrips - 1) * @num_tbps: number of thermal bind params @@ -70,6 +72,8 @@ struct __thermal_zone { enum thermal_device_mode mode; int passive_delay; int polling_delay; + int slope; + int offset; /* trip data */ int ntrips; @@ -227,7 +231,8 @@ static int of_thermal_bind(struct thermal_zone_device *thermal, ret = thermal_zone_bind_cooling_device(thermal, tbp->trip_id, cdev, tbp->max, - tbp->min); + tbp->min, + tbp->usage); if (ret) return ret; } @@ -581,7 +586,7 @@ static int thermal_of_populate_bind_params(struct device_node *np, u32 prop; /* Default weight. Usage is optional */ - __tbp->usage = 0; + __tbp->usage = THERMAL_WEIGHT_DEFAULT; ret = of_property_read_u32(np, "contribution", &prop); if (ret == 0) __tbp->usage = prop; @@ -715,7 +720,7 @@ static int thermal_of_populate_trip(struct device_node *np, * @np parameter and fills the read data into a __thermal_zone data structure * and return this pointer. * - * TODO: Missing properties to parse: thermal-sensor-names and coefficients + * TODO: Missing properties to parse: thermal-sensor-names * * Return: On success returns a valid struct __thermal_zone, * otherwise, it returns a corresponding ERR_PTR(). Caller must @@ -727,7 +732,7 @@ thermal_of_build_thermal_zone(struct device_node *np) struct device_node *child = NULL, *gchild; struct __thermal_zone *tz; int ret, i; - u32 prop; + u32 prop, coef[2]; if (!np) { pr_err("no thermal zone np\n"); @@ -752,6 +757,20 @@ thermal_of_build_thermal_zone(struct device_node *np) } tz->polling_delay = prop; + /* + * REVIST: for now, the thermal framework supports only + * one sensor per thermal zone. Thus, we are considering + * only the first two values as slope and offset. + */ + ret = of_property_read_u32_array(np, "coefficients", coef, 2); + if (ret == 0) { + tz->slope = coef[0]; + tz->offset = coef[1]; + } else { + tz->slope = 1; + tz->offset = 0; + } + /* trips */ child = of_get_child_by_name(np, "trips"); @@ -865,6 +884,8 @@ int __init of_parse_thermal_zones(void) for_each_child_of_node(np, child) { struct thermal_zone_device *zone; struct thermal_zone_params *tzp; + int i, mask = 0; + u32 prop; /* Check whether child is enabled or not */ if (!of_device_is_available(child)) @@ -891,8 +912,18 @@ int __init of_parse_thermal_zones(void) /* No hwmon because there might be hwmon drivers registering */ tzp->no_hwmon = true; + if (!of_property_read_u32(child, "sustainable-power", &prop)) + tzp->sustainable_power = prop; + + for (i = 0; i < tz->ntrips; i++) + mask |= 1 << i; + + /* these two are left for temperature drivers to use */ + tzp->slope = tz->slope; + tzp->offset = tz->offset; + zone = thermal_zone_device_register(child->name, tz->ntrips, - 0, tz, + mask, tz, ops, tzp, tz->passive_delay, tz->polling_delay); diff --git a/drivers/thermal/power_allocator.c b/drivers/thermal/power_allocator.c new file mode 100644 index 0000000..4672250 --- /dev/null +++ b/drivers/thermal/power_allocator.c @@ -0,0 +1,539 @@ +/* + * A power allocator to manage temperature + * + * Copyright (C) 2014 ARM Ltd. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 as + * published by the Free Software Foundation. + * + * This program is distributed "as is" WITHOUT ANY WARRANTY of any + * kind, whether express or implied; without even the implied warranty + * of MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#define pr_fmt(fmt) "Power allocator: " fmt + +#include <linux/rculist.h> +#include <linux/slab.h> +#include <linux/thermal.h> + +#define CREATE_TRACE_POINTS +#include <trace/events/thermal_power_allocator.h> + +#include "thermal_core.h" + +#define FRAC_BITS 10 +#define int_to_frac(x) ((x) << FRAC_BITS) +#define frac_to_int(x) ((x) >> FRAC_BITS) + +/** + * mul_frac() - multiply two fixed-point numbers + * @x: first multiplicand + * @y: second multiplicand + * + * Return: the result of multiplying two fixed-point numbers. The + * result is also a fixed-point number. + */ +static inline s64 mul_frac(s64 x, s64 y) +{ + return (x * y) >> FRAC_BITS; +} + +/** + * div_frac() - divide two fixed-point numbers + * @x: the dividend + * @y: the divisor + * + * Return: the result of dividing two fixed-point numbers. The + * result is also a fixed-point number. + */ +static inline s64 div_frac(s64 x, s64 y) +{ + return div_s64(x << FRAC_BITS, y); +} + +/** + * struct power_allocator_params - parameters for the power allocator governor + * @err_integral: accumulated error in the PID controller. + * @prev_err: error in the previous iteration of the PID controller. + * Used to calculate the derivative term. + * @trip_switch_on: first passive trip point of the thermal zone. The + * governor switches on when this trip point is crossed. + * @trip_max_desired_temperature: last passive trip point of the thermal + * zone. The temperature we are + * controlling for. + */ +struct power_allocator_params { + s64 err_integral; + s32 prev_err; + int trip_switch_on; + int trip_max_desired_temperature; +}; + +/** + * pid_controller() - PID controller + * @tz: thermal zone we are operating in + * @current_temp: the current temperature in millicelsius + * @control_temp: the target temperature in millicelsius + * @max_allocatable_power: maximum allocatable power for this thermal zone + * + * This PID controller increases the available power budget so that the + * temperature of the thermal zone gets as close as possible to + * @control_temp and limits the power if it exceeds it. k_po is the + * proportional term when we are overshooting, k_pu is the + * proportional term when we are undershooting. integral_cutoff is a + * threshold below which we stop accumulating the error. The + * accumulated error is only valid if the requested power will make + * the system warmer. If the system is mostly idle, there's no point + * in accumulating positive error. + * + * Return: The power budget for the next period. + */ +static u32 pid_controller(struct thermal_zone_device *tz, + unsigned long current_temp, + unsigned long control_temp, + u32 max_allocatable_power) +{ + s64 p, i, d, power_range; + s32 err, max_power_frac; + struct power_allocator_params *params = tz->governor_data; + + max_power_frac = int_to_frac(max_allocatable_power); + + err = ((s32)control_temp - (s32)current_temp); + err = int_to_frac(err); + + /* Calculate the proportional term */ + p = mul_frac(err < 0 ? tz->tzp->k_po : tz->tzp->k_pu, err); + + /* + * Calculate the integral term + * + * if the error is less than cut off allow integration (but + * the integral is limited to max power) + */ + i = mul_frac(tz->tzp->k_i, params->err_integral); + + if (err < int_to_frac(tz->tzp->integral_cutoff)) { + s64 i_next = i + mul_frac(tz->tzp->k_i, err); + + if (abs64(i_next) < max_power_frac) { + i = i_next; + params->err_integral += err; + } + } + + /* + * Calculate the derivative term + * + * We do err - prev_err, so with a positive k_d, a decreasing + * error (i.e. driving closer to the line) results in less + * power being applied, slowing down the controller) + */ + d = mul_frac(tz->tzp->k_d, err - params->prev_err); + d = div_frac(d, tz->passive_delay); + params->prev_err = err; + + power_range = p + i + d; + + /* feed-forward the known sustainable dissipatable power */ + power_range = tz->tzp->sustainable_power + frac_to_int(power_range); + + power_range = clamp(power_range, (s64)0, (s64)max_allocatable_power); + + trace_thermal_power_allocator_pid(tz, frac_to_int(err), + frac_to_int(params->err_integral), + frac_to_int(p), frac_to_int(i), + frac_to_int(d), power_range); + + return power_range; +} + +/** + * divvy_up_power() - divvy the allocated power between the actors + * @req_power: each actor's requested power + * @max_power: each actor's maximum available power + * @num_actors: size of the @req_power, @max_power and @granted_power's array + * @total_req_power: sum of @req_power + * @power_range: total allocated power + * @granted_power: output array: each actor's granted power + * @extra_actor_power: an appropriately sized array to be used in the + * function as temporary storage of the extra power given + * to the actors + * + * This function divides the total allocated power (@power_range) + * fairly between the actors. It first tries to give each actor a + * share of the @power_range according to how much power it requested + * compared to the rest of the actors. For example, if only one actor + * requests power, then it receives all the @power_range. If + * three actors each requests 1mW, each receives a third of the + * @power_range. + * + * If any actor received more than their maximum power, then that + * surplus is re-divvied among the actors based on how far they are + * from their respective maximums. + * + * Granted power for each actor is written to @granted_power, which + * should've been allocated by the calling function. + */ +static void divvy_up_power(u32 *req_power, u32 *max_power, int num_actors, + u32 total_req_power, u32 power_range, + u32 *granted_power, u32 *extra_actor_power) +{ + u32 extra_power, capped_extra_power; + int i; + + /* + * Prevent division by 0 if none of the actors request power. + */ + if (!total_req_power) + total_req_power = 1; + + capped_extra_power = 0; + extra_power = 0; + for (i = 0; i < num_actors; i++) { + u64 req_range = req_power[i] * power_range; + + granted_power[i] = DIV_ROUND_CLOSEST_ULL(req_range, + total_req_power); + + if (granted_power[i] > max_power[i]) { + extra_power += granted_power[i] - max_power[i]; + granted_power[i] = max_power[i]; + } + + extra_actor_power[i] = max_power[i] - granted_power[i]; + capped_extra_power += extra_actor_power[i]; + } + + if (!extra_power) + return; + + /* + * Re-divvy the reclaimed extra among actors based on + * how far they are from the max + */ + extra_power = min(extra_power, capped_extra_power); + if (capped_extra_power > 0) + for (i = 0; i < num_actors; i++) + granted_power[i] += (extra_actor_power[i] * + extra_power) / capped_extra_power; +} + +static int allocate_power(struct thermal_zone_device *tz, + unsigned long current_temp, + unsigned long control_temp) +{ + struct thermal_instance *instance; + struct power_allocator_params *params = tz->governor_data; + u32 *req_power, *max_power, *granted_power, *extra_actor_power; + u32 total_req_power, max_allocatable_power; + u32 total_granted_power, power_range; + int i, num_actors, total_weight, ret = 0; + int trip_max_desired_temperature = params->trip_max_desired_temperature; + + mutex_lock(&tz->lock); + + num_actors = 0; + total_weight = 0; + list_for_each_entry(instance, &tz->thermal_instances, tz_node) { + if ((instance->trip == trip_max_desired_temperature) && + cdev_is_power_actor(instance->cdev)) { + num_actors++; + total_weight += instance->weight; + } + } + + /* + * We need to allocate three arrays of the same size: + * req_power, max_power and granted_power. They are going to + * be needed until this function returns. Allocate them all + * in one go to simplify the allocation and deallocation + * logic. + */ + BUILD_BUG_ON(sizeof(*req_power) != sizeof(*max_power)); + BUILD_BUG_ON(sizeof(*req_power) != sizeof(*granted_power)); + BUILD_BUG_ON(sizeof(*req_power) != sizeof(*extra_actor_power)); + req_power = devm_kcalloc(&tz->device, num_actors * 4, + sizeof(*req_power), GFP_KERNEL); + if (!req_power) { + ret = -ENOMEM; + goto unlock; + } + + max_power = &req_power[num_actors]; + granted_power = &req_power[2 * num_actors]; + extra_actor_power = &req_power[3 * num_actors]; + + i = 0; + total_req_power = 0; + max_allocatable_power = 0; + + list_for_each_entry(instance, &tz->thermal_instances, tz_node) { + int weight; + struct thermal_cooling_device *cdev = instance->cdev; + + if (instance->trip != trip_max_desired_temperature) + continue; + + if (!cdev_is_power_actor(cdev)) + continue; + + if (cdev->ops->get_requested_power(cdev, tz, &req_power[i])) + continue; + + if (!total_weight) + weight = 1 << FRAC_BITS; + else + weight = instance->weight; + + req_power[i] = frac_to_int(weight * req_power[i]); + + if (power_actor_get_max_power(cdev, tz, &max_power[i])) + continue; + + total_req_power += req_power[i]; + max_allocatable_power += max_power[i]; + + i++; + } + + power_range = pid_controller(tz, current_temp, control_temp, + max_allocatable_power); + + divvy_up_power(req_power, max_power, num_actors, total_req_power, + power_range, granted_power, extra_actor_power); + + total_granted_power = 0; + i = 0; + list_for_each_entry(instance, &tz->thermal_instances, tz_node) { + if (instance->trip != trip_max_desired_temperature) + continue; + + if (!cdev_is_power_actor(instance->cdev)) + continue; + + power_actor_set_power(instance->cdev, instance, + granted_power[i]); + total_granted_power += granted_power[i]; + + i++; + } + + trace_thermal_power_allocator(tz, req_power, total_req_power, + granted_power, total_granted_power, + num_actors, power_range, + max_allocatable_power, current_temp, + (s32)control_temp - (s32)current_temp); + + devm_kfree(&tz->device, req_power); +unlock: + mutex_unlock(&tz->lock); + + return ret; +} + +static int get_governor_trips(struct thermal_zone_device *tz, + struct power_allocator_params *params) +{ + int i, ret, last_passive; + bool found_first_passive; + + found_first_passive = false; + last_passive = -1; + ret = -EINVAL; + + for (i = 0; i < tz->trips; i++) { + enum thermal_trip_type type; + + ret = tz->ops->get_trip_type(tz, i, &type); + if (ret) + return ret; + + if (!found_first_passive) { + if (type == THERMAL_TRIP_PASSIVE) { + params->trip_switch_on = i; + found_first_passive = true; + } + } else if (type == THERMAL_TRIP_PASSIVE) { + last_passive = i; + } else { + break; + } + } + + if (last_passive != -1) { + params->trip_max_desired_temperature = last_passive; + ret = 0; + } else { + ret = -EINVAL; + } + + return ret; +} + +static void reset_pid_controller(struct power_allocator_params *params) +{ + params->err_integral = 0; + params->prev_err = 0; +} + +static void allow_maximum_power(struct thermal_zone_device *tz) +{ + struct thermal_instance *instance; + struct power_allocator_params *params = tz->governor_data; + + list_for_each_entry(instance, &tz->thermal_instances, tz_node) { + if ((instance->trip != params->trip_max_desired_temperature) || + (!cdev_is_power_actor(instance->cdev))) + continue; + + instance->target = 0; + instance->cdev->updated = false; + thermal_cdev_update(instance->cdev); + } +} + +/** + * power_allocator_bind() - bind the power_allocator governor to a thermal zone + * @tz: thermal zone to bind it to + * + * Check that the thermal zone is valid for this governor, that is, it + * has two thermal trips. If so, initialize the PID controller + * parameters and bind it to the thermal zone. + * + * Return: 0 on success, -EINVAL if the trips were invalid or -ENOMEM + * if we ran out of memory. + */ +static int power_allocator_bind(struct thermal_zone_device *tz) +{ + int ret; + struct power_allocator_params *params; + unsigned long switch_on_temp, control_temp; + u32 temperature_threshold; + + if (!tz->tzp || !tz->tzp->sustainable_power) { + dev_err(&tz->device, + "power_allocator: missing sustainable_power\n"); + return -EINVAL; + } + + params = devm_kzalloc(&tz->device, sizeof(*params), GFP_KERNEL); + if (!params) + return -ENOMEM; + + ret = get_governor_trips(tz, params); + if (ret) { + dev_err(&tz->device, + "thermal zone %s has wrong trip setup for power allocator\n", + tz->type); + goto free; + } + + ret = tz->ops->get_trip_temp(tz, params->trip_switch_on, + &switch_on_temp); + if (ret) + goto free; + + ret = tz->ops->get_trip_temp(tz, params->trip_max_desired_temperature, + &control_temp); + if (ret) + goto free; + + temperature_threshold = control_temp - switch_on_temp; + + tz->tzp->k_po = tz->tzp->k_po ?: + int_to_frac(tz->tzp->sustainable_power) / temperature_threshold; + tz->tzp->k_pu = tz->tzp->k_pu ?: + int_to_frac(2 * tz->tzp->sustainable_power) / + temperature_threshold; + tz->tzp->k_i = tz->tzp->k_i ?: int_to_frac(10) / 1000; + /* + * The default for k_d and integral_cutoff is 0, so we can + * leave them as they are. + */ + + reset_pid_controller(params); + + tz->governor_data = params; + + return 0; + +free: + devm_kfree(&tz->device, params); + return ret; +} + +static void power_allocator_unbind(struct thermal_zone_device *tz) +{ + dev_dbg(&tz->device, "Unbinding from thermal zone %d\n", tz->id); + devm_kfree(&tz->device, tz->governor_data); + tz->governor_data = NULL; +} + +static int power_allocator_throttle(struct thermal_zone_device *tz, int trip) +{ + int ret; + unsigned long switch_on_temp, control_temp, current_temp; + struct power_allocator_params *params = tz->governor_data; + + /* + * We get called for every trip point but we only need to do + * our calculations once + */ + if (trip != params->trip_max_desired_temperature) + return 0; + + ret = thermal_zone_get_temp(tz, ¤t_temp); + if (ret) { + dev_warn(&tz->device, "Failed to get temperature: %d\n", ret); + return ret; + } + + ret = tz->ops->get_trip_temp(tz, params->trip_switch_on, + &switch_on_temp); + if (ret) { + dev_warn(&tz->device, + "Failed to get switch on temperature: %d\n", ret); + return ret; + } + + if (current_temp < switch_on_temp) { + tz->passive = 0; + reset_pid_controller(params); + allow_maximum_power(tz); + return 0; + } + + tz->passive = 1; + + ret = tz->ops->get_trip_temp(tz, params->trip_max_desired_temperature, + &control_temp); + if (ret) { + dev_warn(&tz->device, + "Failed to get the maximum desired temperature: %d\n", + ret); + return ret; + } + + return allocate_power(tz, current_temp, control_temp); +} + +static struct thermal_governor thermal_gov_power_allocator = { + .name = "power_allocator", + .bind_to_tz = power_allocator_bind, + .unbind_from_tz = power_allocator_unbind, + .throttle = power_allocator_throttle, +}; + +int thermal_gov_power_allocator_register(void) +{ + return thermal_register_governor(&thermal_gov_power_allocator); +} + +void thermal_gov_power_allocator_unregister(void) +{ + thermal_unregister_governor(&thermal_gov_power_allocator); +} diff --git a/drivers/thermal/qcom-spmi-temp-alarm.c b/drivers/thermal/qcom-spmi-temp-alarm.c new file mode 100644 index 0000000..c8d27b8 --- /dev/null +++ b/drivers/thermal/qcom-spmi-temp-alarm.c @@ -0,0 +1,309 @@ +/* + * Copyright (c) 2011-2015, The Linux Foundation. All rights reserved. + * + * This program is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License version 2 and + * only version 2 as published by the Free Software Foundation. + * + * This program is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + */ + +#include <linux/delay.h> +#include <linux/err.h> +#include <linux/iio/consumer.h> +#include <linux/interrupt.h> +#include <linux/module.h> +#include <linux/of.h> +#include <linux/of_device.h> +#include <linux/platform_device.h> +#include <linux/regmap.h> +#include <linux/thermal.h> + +#define QPNP_TM_REG_TYPE 0x04 +#define QPNP_TM_REG_SUBTYPE 0x05 +#define QPNP_TM_REG_STATUS 0x08 +#define QPNP_TM_REG_SHUTDOWN_CTRL1 0x40 +#define QPNP_TM_REG_ALARM_CTRL 0x46 + +#define QPNP_TM_TYPE 0x09 +#define QPNP_TM_SUBTYPE 0x08 + +#define STATUS_STAGE_MASK 0x03 + +#define SHUTDOWN_CTRL1_THRESHOLD_MASK 0x03 + +#define ALARM_CTRL_FORCE_ENABLE 0x80 + +/* + * Trip point values based on threshold control + * 0 = {105 C, 125 C, 145 C} + * 1 = {110 C, 130 C, 150 C} + * 2 = {115 C, 135 C, 155 C} + * 3 = {120 C, 140 C, 160 C} +*/ +#define TEMP_STAGE_STEP 20000 /* Stage step: 20.000 C */ +#define TEMP_STAGE_HYSTERESIS 2000 + +#define TEMP_THRESH_MIN 105000 /* Threshold Min: 105 C */ +#define TEMP_THRESH_STEP 5000 /* Threshold step: 5 C */ + +#define THRESH_MIN 0 + +/* Temperature in Milli Celsius reported during stage 0 if no ADC is present */ +#define DEFAULT_TEMP 37000 + +struct qpnp_tm_chip { + struct regmap *map; + struct thermal_zone_device *tz_dev; + long temp; + unsigned int thresh; + unsigned int stage; + unsigned int prev_stage; + unsigned int base; + struct iio_channel *adc; +}; + +static int qpnp_tm_read(struct qpnp_tm_chip *chip, u16 addr, u8 *data) +{ + unsigned int val; + int ret; + + ret = regmap_read(chip->map, chip->base + addr, &val); + if (ret < 0) + return ret; + + *data = val; + return 0; +} + +static int qpnp_tm_write(struct qpnp_tm_chip *chip, u16 addr, u8 data) +{ + return regmap_write(chip->map, chip->base + addr, data); +} + +/* + * This function updates the internal temp value based on the + * current thermal stage and threshold as well as the previous stage + */ +static int qpnp_tm_update_temp_no_adc(struct qpnp_tm_chip *chip) +{ + unsigned int stage; + int ret; + u8 reg = 0; + + ret = qpnp_tm_read(chip, QPNP_TM_REG_STATUS, ®); + if (ret < 0) + return ret; + + stage = reg & STATUS_STAGE_MASK; + + if (stage > chip->stage) { + /* increasing stage, use lower bound */ + chip->temp = (stage - 1) * TEMP_STAGE_STEP + + chip->thresh * TEMP_THRESH_STEP + + TEMP_STAGE_HYSTERESIS + TEMP_THRESH_MIN; + } else if (stage < chip->stage) { + /* decreasing stage, use upper bound */ + chip->temp = stage * TEMP_STAGE_STEP + + chip->thresh * TEMP_THRESH_STEP - + TEMP_STAGE_HYSTERESIS + TEMP_THRESH_MIN; + } + + chip->stage = stage; + + return 0; +} + +static int qpnp_tm_get_temp(void *data, long *temp) +{ + struct qpnp_tm_chip *chip = data; + int ret, mili_celsius; + + if (!temp) + return -EINVAL; + + if (IS_ERR(chip->adc)) { + ret = qpnp_tm_update_temp_no_adc(chip); + if (ret < 0) + return ret; + } else { + ret = iio_read_channel_processed(chip->adc, &mili_celsius); + if (ret < 0) + return ret; + + chip->temp = mili_celsius; + } + + *temp = chip->temp < 0 ? 0 : chip->temp; + + return 0; +} + +static const struct thermal_zone_of_device_ops qpnp_tm_sensor_ops = { + .get_temp = qpnp_tm_get_temp, +}; + +static irqreturn_t qpnp_tm_isr(int irq, void *data) +{ + struct qpnp_tm_chip *chip = data; + + thermal_zone_device_update(chip->tz_dev); + + return IRQ_HANDLED; +} + +/* + * This function initializes the internal temp value based on only the + * current thermal stage and threshold. Setup threshold control and + * disable shutdown override. + */ +static int qpnp_tm_init(struct qpnp_tm_chip *chip) +{ + int ret; + u8 reg; + + chip->thresh = THRESH_MIN; + chip->temp = DEFAULT_TEMP; + + ret = qpnp_tm_read(chip, QPNP_TM_REG_STATUS, ®); + if (ret < 0) + return ret; + + chip->stage = reg & STATUS_STAGE_MASK; + + if (chip->stage) + chip->temp = chip->thresh * TEMP_THRESH_STEP + + (chip->stage - 1) * TEMP_STAGE_STEP + + TEMP_THRESH_MIN; + + /* + * Set threshold and disable software override of stage 2 and 3 + * shutdowns. + */ + reg = chip->thresh & SHUTDOWN_CTRL1_THRESHOLD_MASK; + ret = qpnp_tm_write(chip, QPNP_TM_REG_SHUTDOWN_CTRL1, reg); + if (ret < 0) + return ret; + + /* Enable the thermal alarm PMIC module in always-on mode. */ + reg = ALARM_CTRL_FORCE_ENABLE; + ret = qpnp_tm_write(chip, QPNP_TM_REG_ALARM_CTRL, reg); + + return ret; +} + +static int qpnp_tm_probe(struct platform_device *pdev) +{ + struct qpnp_tm_chip *chip; + struct device_node *node; + u8 type, subtype; + u32 res[2]; + int ret, irq; + + node = pdev->dev.of_node; + + chip = devm_kzalloc(&pdev->dev, sizeof(*chip), GFP_KERNEL); + if (!chip) + return -ENOMEM; + + dev_set_drvdata(&pdev->dev, chip); + + chip->map = dev_get_regmap(pdev->dev.parent, NULL); + if (!chip->map) + return -ENXIO; + + ret = of_property_read_u32_array(node, "reg", res, 2); + if (ret < 0) + return ret; + + irq = platform_get_irq(pdev, 0); + if (irq < 0) + return irq; + + /* ADC based measurements are optional */ + chip->adc = iio_channel_get(&pdev->dev, "thermal"); + if (PTR_ERR(chip->adc) == -EPROBE_DEFER) + return PTR_ERR(chip->adc); + + chip->base = res[0]; + + ret = qpnp_tm_read(chip, QPNP_TM_REG_TYPE, &type); + if (ret < 0) { + dev_err(&pdev->dev, "could not read type\n"); + goto fail; + } + + ret = qpnp_tm_read(chip, QPNP_TM_REG_SUBTYPE, &subtype); + if (ret < 0) { + dev_err(&pdev->dev, "could not read subtype\n"); + goto fail; + } + + if (type != QPNP_TM_TYPE || subtype != QPNP_TM_SUBTYPE) { + dev_err(&pdev->dev, "invalid type 0x%02x or subtype 0x%02x\n", + type, subtype); + ret = -ENODEV; + goto fail; + } + + ret = qpnp_tm_init(chip); + if (ret < 0) { + dev_err(&pdev->dev, "init failed\n"); + goto fail; + } + + ret = devm_request_threaded_irq(&pdev->dev, irq, NULL, qpnp_tm_isr, + IRQF_ONESHOT, node->name, chip); + if (ret < 0) + goto fail; + + chip->tz_dev = thermal_zone_of_sensor_register(&pdev->dev, 0, chip, + &qpnp_tm_sensor_ops); + if (IS_ERR(chip->tz_dev)) { + dev_err(&pdev->dev, "failed to register sensor\n"); + ret = PTR_ERR(chip->tz_dev); + goto fail; + } + + return 0; + +fail: + if (!IS_ERR(chip->adc)) + iio_channel_release(chip->adc); + + return ret; +} + +static int qpnp_tm_remove(struct platform_device *pdev) +{ + struct qpnp_tm_chip *chip = dev_get_drvdata(&pdev->dev); + + thermal_zone_of_sensor_unregister(&pdev->dev, chip->tz_dev); + if (!IS_ERR(chip->adc)) + iio_channel_release(chip->adc); + + return 0; +} + +static const struct of_device_id qpnp_tm_match_table[] = { + { .compatible = "qcom,spmi-temp-alarm" }, + { } +}; +MODULE_DEVICE_TABLE(of, qpnp_tm_match_table); + +static struct platform_driver qpnp_tm_driver = { + .driver = { + .name = "spmi-temp-alarm", + .of_match_table = qpnp_tm_match_table, + }, + .probe = qpnp_tm_probe, + .remove = qpnp_tm_remove, +}; +module_platform_driver(qpnp_tm_driver); + +MODULE_ALIAS("platform:spmi-temp-alarm"); +MODULE_DESCRIPTION("QPNP PMIC Temperature Alarm driver"); +MODULE_LICENSE("GPL v2"); diff --git a/drivers/thermal/samsung/exynos_tmu.c b/drivers/thermal/samsung/exynos_tmu.c index 1d30b09..531f4b17 100644 --- a/drivers/thermal/samsung/exynos_tmu.c +++ b/drivers/thermal/samsung/exynos_tmu.c @@ -97,6 +97,32 @@ #define EXYNOS4412_MUX_ADDR_VALUE 6 #define EXYNOS4412_MUX_ADDR_SHIFT 20 +/* Exynos5433 specific registers */ +#define EXYNOS5433_TMU_REG_CONTROL1 0x024 +#define EXYNOS5433_TMU_SAMPLING_INTERVAL 0x02c +#define EXYNOS5433_TMU_COUNTER_VALUE0 0x030 +#define EXYNOS5433_TMU_COUNTER_VALUE1 0x034 +#define EXYNOS5433_TMU_REG_CURRENT_TEMP1 0x044 +#define EXYNOS5433_THD_TEMP_RISE3_0 0x050 +#define EXYNOS5433_THD_TEMP_RISE7_4 0x054 +#define EXYNOS5433_THD_TEMP_FALL3_0 0x060 +#define EXYNOS5433_THD_TEMP_FALL7_4 0x064 +#define EXYNOS5433_TMU_REG_INTEN 0x0c0 +#define EXYNOS5433_TMU_REG_INTPEND 0x0c8 +#define EXYNOS5433_TMU_EMUL_CON 0x110 +#define EXYNOS5433_TMU_PD_DET_EN 0x130 + +#define EXYNOS5433_TRIMINFO_SENSOR_ID_SHIFT 16 +#define EXYNOS5433_TRIMINFO_CALIB_SEL_SHIFT 23 +#define EXYNOS5433_TRIMINFO_SENSOR_ID_MASK \ + (0xf << EXYNOS5433_TRIMINFO_SENSOR_ID_SHIFT) +#define EXYNOS5433_TRIMINFO_CALIB_SEL_MASK BIT(23) + +#define EXYNOS5433_TRIMINFO_ONE_POINT_TRIMMING 0 +#define EXYNOS5433_TRIMINFO_TWO_POINT_TRIMMING 1 + +#define EXYNOS5433_PD_DET_EN 1 + /*exynos5440 specific registers*/ #define EXYNOS5440_TMU_S0_7_TRIM 0x000 #define EXYNOS5440_TMU_S0_7_CTRL 0x020 @@ -484,6 +510,101 @@ out: return ret; } +static int exynos5433_tmu_initialize(struct platform_device *pdev) +{ + struct exynos_tmu_data *data = platform_get_drvdata(pdev); + struct exynos_tmu_platform_data *pdata = data->pdata; + struct thermal_zone_device *tz = data->tzd; + unsigned int status, trim_info; + unsigned int rising_threshold = 0, falling_threshold = 0; + unsigned long temp, temp_hist; + int ret = 0, threshold_code, i, sensor_id, cal_type; + + status = readb(data->base + EXYNOS_TMU_REG_STATUS); + if (!status) { + ret = -EBUSY; + goto out; + } + + trim_info = readl(data->base + EXYNOS_TMU_REG_TRIMINFO); + sanitize_temp_error(data, trim_info); + + /* Read the temperature sensor id */ + sensor_id = (trim_info & EXYNOS5433_TRIMINFO_SENSOR_ID_MASK) + >> EXYNOS5433_TRIMINFO_SENSOR_ID_SHIFT; + dev_info(&pdev->dev, "Temperature sensor ID: 0x%x\n", sensor_id); + + /* Read the calibration mode */ + writel(trim_info, data->base + EXYNOS_TMU_REG_TRIMINFO); + cal_type = (trim_info & EXYNOS5433_TRIMINFO_CALIB_SEL_MASK) + >> EXYNOS5433_TRIMINFO_CALIB_SEL_SHIFT; + + switch (cal_type) { + case EXYNOS5433_TRIMINFO_ONE_POINT_TRIMMING: + pdata->cal_type = TYPE_ONE_POINT_TRIMMING; + break; + case EXYNOS5433_TRIMINFO_TWO_POINT_TRIMMING: + pdata->cal_type = TYPE_TWO_POINT_TRIMMING; + break; + default: + pdata->cal_type = TYPE_ONE_POINT_TRIMMING; + break; + }; + + dev_info(&pdev->dev, "Calibration type is %d-point calibration\n", + cal_type ? 2 : 1); + + /* Write temperature code for rising and falling threshold */ + for (i = 0; i < of_thermal_get_ntrips(tz); i++) { + int rising_reg_offset, falling_reg_offset; + int j = 0; + + switch (i) { + case 0: + case 1: + case 2: + case 3: + rising_reg_offset = EXYNOS5433_THD_TEMP_RISE3_0; + falling_reg_offset = EXYNOS5433_THD_TEMP_FALL3_0; + j = i; + break; + case 4: + case 5: + case 6: + case 7: + rising_reg_offset = EXYNOS5433_THD_TEMP_RISE7_4; + falling_reg_offset = EXYNOS5433_THD_TEMP_FALL7_4; + j = i - 4; + break; + default: + continue; + } + + /* Write temperature code for rising threshold */ + tz->ops->get_trip_temp(tz, i, &temp); + temp /= MCELSIUS; + threshold_code = temp_to_code(data, temp); + + rising_threshold = readl(data->base + rising_reg_offset); + rising_threshold |= (threshold_code << j * 8); + writel(rising_threshold, data->base + rising_reg_offset); + + /* Write temperature code for falling threshold */ + tz->ops->get_trip_hyst(tz, i, &temp_hist); + temp_hist = temp - (temp_hist / MCELSIUS); + threshold_code = temp_to_code(data, temp_hist); + + falling_threshold = readl(data->base + falling_reg_offset); + falling_threshold &= ~(0xff << j * 8); + falling_threshold |= (threshold_code << j * 8); + writel(falling_threshold, data->base + falling_reg_offset); + } + + data->tmu_clear_irqs(data); +out: + return ret; +} + static int exynos5440_tmu_initialize(struct platform_device *pdev) { struct exynos_tmu_data *data = platform_get_drvdata(pdev); @@ -643,6 +764,48 @@ static void exynos4210_tmu_control(struct platform_device *pdev, bool on) writel(con, data->base + EXYNOS_TMU_REG_CONTROL); } +static void exynos5433_tmu_control(struct platform_device *pdev, bool on) +{ + struct exynos_tmu_data *data = platform_get_drvdata(pdev); + struct thermal_zone_device *tz = data->tzd; + unsigned int con, interrupt_en, pd_det_en; + + con = get_con_reg(data, readl(data->base + EXYNOS_TMU_REG_CONTROL)); + + if (on) { + con |= (1 << EXYNOS_TMU_CORE_EN_SHIFT); + interrupt_en = + (of_thermal_is_trip_valid(tz, 7) + << EXYNOS7_TMU_INTEN_RISE7_SHIFT) | + (of_thermal_is_trip_valid(tz, 6) + << EXYNOS7_TMU_INTEN_RISE6_SHIFT) | + (of_thermal_is_trip_valid(tz, 5) + << EXYNOS7_TMU_INTEN_RISE5_SHIFT) | + (of_thermal_is_trip_valid(tz, 4) + << EXYNOS7_TMU_INTEN_RISE4_SHIFT) | + (of_thermal_is_trip_valid(tz, 3) + << EXYNOS7_TMU_INTEN_RISE3_SHIFT) | + (of_thermal_is_trip_valid(tz, 2) + << EXYNOS7_TMU_INTEN_RISE2_SHIFT) | + (of_thermal_is_trip_valid(tz, 1) + << EXYNOS7_TMU_INTEN_RISE1_SHIFT) | + (of_thermal_is_trip_valid(tz, 0) + << EXYNOS7_TMU_INTEN_RISE0_SHIFT); + + interrupt_en |= + interrupt_en << EXYNOS_TMU_INTEN_FALL0_SHIFT; + } else { + con &= ~(1 << EXYNOS_TMU_CORE_EN_SHIFT); + interrupt_en = 0; /* Disable all interrupts */ + } + + pd_det_en = on ? EXYNOS5433_PD_DET_EN : 0; + + writel(pd_det_en, data->base + EXYNOS5433_TMU_PD_DET_EN); + writel(interrupt_en, data->base + EXYNOS5433_TMU_REG_INTEN); + writel(con, data->base + EXYNOS_TMU_REG_CONTROL); +} + static void exynos5440_tmu_control(struct platform_device *pdev, bool on) { struct exynos_tmu_data *data = platform_get_drvdata(pdev); @@ -770,6 +933,8 @@ static void exynos4412_tmu_set_emulation(struct exynos_tmu_data *data, if (data->soc == SOC_ARCH_EXYNOS5260) emul_con = EXYNOS5260_EMUL_CON; + if (data->soc == SOC_ARCH_EXYNOS5433) + emul_con = EXYNOS5433_TMU_EMUL_CON; else if (data->soc == SOC_ARCH_EXYNOS7) emul_con = EXYNOS7_TMU_REG_EMUL_CON; else @@ -882,6 +1047,9 @@ static void exynos4210_tmu_clear_irqs(struct exynos_tmu_data *data) } else if (data->soc == SOC_ARCH_EXYNOS7) { tmu_intstat = EXYNOS7_TMU_REG_INTPEND; tmu_intclear = EXYNOS7_TMU_REG_INTPEND; + } else if (data->soc == SOC_ARCH_EXYNOS5433) { + tmu_intstat = EXYNOS5433_TMU_REG_INTPEND; + tmu_intclear = EXYNOS5433_TMU_REG_INTPEND; } else { tmu_intstat = EXYNOS_TMU_REG_INTSTAT; tmu_intclear = EXYNOS_TMU_REG_INTCLEAR; @@ -926,6 +1094,7 @@ static const struct of_device_id exynos_tmu_match[] = { { .compatible = "samsung,exynos5260-tmu", }, { .compatible = "samsung,exynos5420-tmu", }, { .compatible = "samsung,exynos5420-tmu-ext-triminfo", }, + { .compatible = "samsung,exynos5433-tmu", }, { .compatible = "samsung,exynos5440-tmu", }, { .compatible = "samsung,exynos7-tmu", }, { /* sentinel */ }, @@ -949,6 +1118,8 @@ static int exynos_of_get_soc_type(struct device_node *np) else if (of_device_is_compatible(np, "samsung,exynos5420-tmu-ext-triminfo")) return SOC_ARCH_EXYNOS5420_TRIMINFO; + else if (of_device_is_compatible(np, "samsung,exynos5433-tmu")) + return SOC_ARCH_EXYNOS5433; else if (of_device_is_compatible(np, "samsung,exynos5440-tmu")) return SOC_ARCH_EXYNOS5440; else if (of_device_is_compatible(np, "samsung,exynos7-tmu")) @@ -1069,6 +1240,13 @@ static int exynos_map_dt_data(struct platform_device *pdev) data->tmu_set_emulation = exynos4412_tmu_set_emulation; data->tmu_clear_irqs = exynos4210_tmu_clear_irqs; break; + case SOC_ARCH_EXYNOS5433: + data->tmu_initialize = exynos5433_tmu_initialize; + data->tmu_control = exynos5433_tmu_control; + data->tmu_read = exynos4412_tmu_read; + data->tmu_set_emulation = exynos4412_tmu_set_emulation; + data->tmu_clear_irqs = exynos4210_tmu_clear_irqs; + break; case SOC_ARCH_EXYNOS5440: data->tmu_initialize = exynos5440_tmu_initialize; data->tmu_control = exynos5440_tmu_control; @@ -1172,7 +1350,9 @@ static int exynos_tmu_probe(struct platform_device *pdev) goto err_clk_sec; } - if (data->soc == SOC_ARCH_EXYNOS7) { + switch (data->soc) { + case SOC_ARCH_EXYNOS5433: + case SOC_ARCH_EXYNOS7: data->sclk = devm_clk_get(&pdev->dev, "tmu_sclk"); if (IS_ERR(data->sclk)) { dev_err(&pdev->dev, "Failed to get sclk\n"); @@ -1184,7 +1364,10 @@ static int exynos_tmu_probe(struct platform_device *pdev) goto err_clk; } } - } + break; + default: + break; + }; ret = exynos_tmu_initialize(pdev); if (ret) { diff --git a/drivers/thermal/samsung/exynos_tmu.h b/drivers/thermal/samsung/exynos_tmu.h index 4d71ec6..440c714 100644 --- a/drivers/thermal/samsung/exynos_tmu.h +++ b/drivers/thermal/samsung/exynos_tmu.h @@ -33,6 +33,7 @@ enum soc_type { SOC_ARCH_EXYNOS5260, SOC_ARCH_EXYNOS5420, SOC_ARCH_EXYNOS5420_TRIMINFO, + SOC_ARCH_EXYNOS5433, SOC_ARCH_EXYNOS5440, SOC_ARCH_EXYNOS7, }; diff --git a/drivers/thermal/thermal_core.c b/drivers/thermal/thermal_core.c index 4108db7..04659bf 100644 --- a/drivers/thermal/thermal_core.c +++ b/drivers/thermal/thermal_core.c @@ -75,6 +75,58 @@ static struct thermal_governor *__find_governor(const char *name) return NULL; } +/** + * bind_previous_governor() - bind the previous governor of the thermal zone + * @tz: a valid pointer to a struct thermal_zone_device + * @failed_gov_name: the name of the governor that failed to register + * + * Register the previous governor of the thermal zone after a new + * governor has failed to be bound. + */ +static void bind_previous_governor(struct thermal_zone_device *tz, + const char *failed_gov_name) +{ + if (tz->governor && tz->governor->bind_to_tz) { + if (tz->governor->bind_to_tz(tz)) { + dev_err(&tz->device, + "governor %s failed to bind and the previous one (%s) failed to bind again, thermal zone %s has no governor\n", + failed_gov_name, tz->governor->name, tz->type); + tz->governor = NULL; + } + } +} + +/** + * thermal_set_governor() - Switch to another governor + * @tz: a valid pointer to a struct thermal_zone_device + * @new_gov: pointer to the new governor + * + * Change the governor of thermal zone @tz. + * + * Return: 0 on success, an error if the new governor's bind_to_tz() failed. + */ +static int thermal_set_governor(struct thermal_zone_device *tz, + struct thermal_governor *new_gov) +{ + int ret = 0; + + if (tz->governor && tz->governor->unbind_from_tz) + tz->governor->unbind_from_tz(tz); + + if (new_gov && new_gov->bind_to_tz) { + ret = new_gov->bind_to_tz(tz); + if (ret) { + bind_previous_governor(tz, new_gov->name); + + return ret; + } + } + + tz->governor = new_gov; + + return ret; +} + int thermal_register_governor(struct thermal_governor *governor) { int err; @@ -107,8 +159,15 @@ int thermal_register_governor(struct thermal_governor *governor) name = pos->tzp->governor_name; - if (!strncasecmp(name, governor->name, THERMAL_NAME_LENGTH)) - pos->governor = governor; + if (!strncasecmp(name, governor->name, THERMAL_NAME_LENGTH)) { + int ret; + + ret = thermal_set_governor(pos, governor); + if (ret) + dev_err(&pos->device, + "Failed to set governor %s for thermal zone %s: %d\n", + governor->name, pos->type, ret); + } } mutex_unlock(&thermal_list_lock); @@ -134,7 +193,7 @@ void thermal_unregister_governor(struct thermal_governor *governor) list_for_each_entry(pos, &thermal_tz_list, node) { if (!strncasecmp(pos->governor->name, governor->name, THERMAL_NAME_LENGTH)) - pos->governor = NULL; + thermal_set_governor(pos, NULL); } mutex_unlock(&thermal_list_lock); @@ -218,7 +277,8 @@ static void print_bind_err_msg(struct thermal_zone_device *tz, static void __bind(struct thermal_zone_device *tz, int mask, struct thermal_cooling_device *cdev, - unsigned long *limits) + unsigned long *limits, + unsigned int weight) { int i, ret; @@ -233,7 +293,8 @@ static void __bind(struct thermal_zone_device *tz, int mask, upper = limits[i * 2 + 1]; } ret = thermal_zone_bind_cooling_device(tz, i, cdev, - upper, lower); + upper, lower, + weight); if (ret) print_bind_err_msg(tz, cdev, ret); } @@ -280,7 +341,8 @@ static void bind_cdev(struct thermal_cooling_device *cdev) continue; tzp->tbp[i].cdev = cdev; __bind(pos, tzp->tbp[i].trip_mask, cdev, - tzp->tbp[i].binding_limits); + tzp->tbp[i].binding_limits, + tzp->tbp[i].weight); } } @@ -319,7 +381,8 @@ static void bind_tz(struct thermal_zone_device *tz) continue; tzp->tbp[i].cdev = pos; __bind(tz, tzp->tbp[i].trip_mask, pos, - tzp->tbp[i].binding_limits); + tzp->tbp[i].binding_limits, + tzp->tbp[i].weight); } } exit: @@ -713,7 +776,8 @@ passive_store(struct device *dev, struct device_attribute *attr, thermal_zone_bind_cooling_device(tz, THERMAL_TRIPS_NONE, cdev, THERMAL_NO_LIMIT, - THERMAL_NO_LIMIT); + THERMAL_NO_LIMIT, + THERMAL_WEIGHT_DEFAULT); } mutex_unlock(&thermal_list_lock); if (!tz->passive_delay) @@ -765,8 +829,9 @@ policy_store(struct device *dev, struct device_attribute *attr, if (!gov) goto exit; - tz->governor = gov; - ret = count; + ret = thermal_set_governor(tz, gov); + if (!ret) + ret = count; exit: mutex_unlock(&tz->lock); @@ -810,6 +875,158 @@ emul_temp_store(struct device *dev, struct device_attribute *attr, static DEVICE_ATTR(emul_temp, S_IWUSR, NULL, emul_temp_store); #endif/*CONFIG_THERMAL_EMULATION*/ +static ssize_t +sustainable_power_show(struct device *dev, struct device_attribute *devattr, + char *buf) +{ + struct thermal_zone_device *tz = to_thermal_zone(dev); + + if (tz->tzp) + return sprintf(buf, "%u\n", tz->tzp->sustainable_power); + else + return -EIO; +} + +static ssize_t +sustainable_power_store(struct device *dev, struct device_attribute *devattr, + const char *buf, size_t count) +{ + struct thermal_zone_device *tz = to_thermal_zone(dev); + u32 sustainable_power; + + if (!tz->tzp) + return -EIO; + + if (kstrtou32(buf, 10, &sustainable_power)) + return -EINVAL; + + tz->tzp->sustainable_power = sustainable_power; + + return count; +} +static DEVICE_ATTR(sustainable_power, S_IWUSR | S_IRUGO, sustainable_power_show, + sustainable_power_store); + +#define create_s32_tzp_attr(name) \ + static ssize_t \ + name##_show(struct device *dev, struct device_attribute *devattr, \ + char *buf) \ + { \ + struct thermal_zone_device *tz = to_thermal_zone(dev); \ + \ + if (tz->tzp) \ + return sprintf(buf, "%u\n", tz->tzp->name); \ + else \ + return -EIO; \ + } \ + \ + static ssize_t \ + name##_store(struct device *dev, struct device_attribute *devattr, \ + const char *buf, size_t count) \ + { \ + struct thermal_zone_device *tz = to_thermal_zone(dev); \ + s32 value; \ + \ + if (!tz->tzp) \ + return -EIO; \ + \ + if (kstrtos32(buf, 10, &value)) \ + return -EINVAL; \ + \ + tz->tzp->name = value; \ + \ + return count; \ + } \ + static DEVICE_ATTR(name, S_IWUSR | S_IRUGO, name##_show, name##_store) + +create_s32_tzp_attr(k_po); +create_s32_tzp_attr(k_pu); +create_s32_tzp_attr(k_i); +create_s32_tzp_attr(k_d); +create_s32_tzp_attr(integral_cutoff); +create_s32_tzp_attr(slope); +create_s32_tzp_attr(offset); +#undef create_s32_tzp_attr + +static struct device_attribute *dev_tzp_attrs[] = { + &dev_attr_sustainable_power, + &dev_attr_k_po, + &dev_attr_k_pu, + &dev_attr_k_i, + &dev_attr_k_d, + &dev_attr_integral_cutoff, + &dev_attr_slope, + &dev_attr_offset, +}; + +static int create_tzp_attrs(struct device *dev) +{ + int i; + + for (i = 0; i < ARRAY_SIZE(dev_tzp_attrs); i++) { + int ret; + struct device_attribute *dev_attr = dev_tzp_attrs[i]; + + ret = device_create_file(dev, dev_attr); + if (ret) + return ret; + } + + return 0; +} + +/** + * power_actor_get_max_power() - get the maximum power that a cdev can consume + * @cdev: pointer to &thermal_cooling_device + * @tz: a valid thermal zone device pointer + * @max_power: pointer in which to store the maximum power + * + * Calculate the maximum power consumption in milliwats that the + * cooling device can currently consume and store it in @max_power. + * + * Return: 0 on success, -EINVAL if @cdev doesn't support the + * power_actor API or -E* on other error. + */ +int power_actor_get_max_power(struct thermal_cooling_device *cdev, + struct thermal_zone_device *tz, u32 *max_power) +{ + if (!cdev_is_power_actor(cdev)) + return -EINVAL; + + return cdev->ops->state2power(cdev, tz, 0, max_power); +} + +/** + * power_actor_set_power() - limit the maximum power that a cooling device can consume + * @cdev: pointer to &thermal_cooling_device + * @instance: thermal instance to update + * @power: the power in milliwatts + * + * Set the cooling device to consume at most @power milliwatts. + * + * Return: 0 on success, -EINVAL if the cooling device does not + * implement the power actor API or -E* for other failures. + */ +int power_actor_set_power(struct thermal_cooling_device *cdev, + struct thermal_instance *instance, u32 power) +{ + unsigned long state; + int ret; + + if (!cdev_is_power_actor(cdev)) + return -EINVAL; + + ret = cdev->ops->power2state(cdev, instance->tz, power, &state); + if (ret) + return ret; + + instance->target = state; + cdev->updated = false; + thermal_cdev_update(cdev); + + return 0; +} + static DEVICE_ATTR(type, 0444, type_show, NULL); static DEVICE_ATTR(temp, 0444, temp_show, NULL); static DEVICE_ATTR(mode, 0644, mode_show, mode_store); @@ -917,6 +1134,34 @@ static const struct attribute_group *cooling_device_attr_groups[] = { NULL, }; +static ssize_t +thermal_cooling_device_weight_show(struct device *dev, + struct device_attribute *attr, char *buf) +{ + struct thermal_instance *instance; + + instance = container_of(attr, struct thermal_instance, weight_attr); + + return sprintf(buf, "%d\n", instance->weight); +} + +static ssize_t +thermal_cooling_device_weight_store(struct device *dev, + struct device_attribute *attr, + const char *buf, size_t count) +{ + struct thermal_instance *instance; + int ret, weight; + + ret = kstrtoint(buf, 0, &weight); + if (ret) + return ret; + + instance = container_of(attr, struct thermal_instance, weight_attr); + instance->weight = weight; + + return count; +} /* Device management */ /** @@ -931,6 +1176,9 @@ static const struct attribute_group *cooling_device_attr_groups[] = { * @lower: the Minimum cooling state can be used for this trip point. * THERMAL_NO_LIMIT means no lower limit, * and the cooling device can be in cooling state 0. + * @weight: The weight of the cooling device to be bound to the + * thermal zone. Use THERMAL_WEIGHT_DEFAULT for the + * default value * * This interface function bind a thermal cooling device to the certain trip * point of a thermal zone device. @@ -941,7 +1189,8 @@ static const struct attribute_group *cooling_device_attr_groups[] = { int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz, int trip, struct thermal_cooling_device *cdev, - unsigned long upper, unsigned long lower) + unsigned long upper, unsigned long lower, + unsigned int weight) { struct thermal_instance *dev; struct thermal_instance *pos; @@ -986,6 +1235,7 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz, dev->upper = upper; dev->lower = lower; dev->target = THERMAL_NO_TARGET; + dev->weight = weight; result = get_idr(&tz->idr, &tz->lock, &dev->id); if (result) @@ -1006,6 +1256,16 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz, if (result) goto remove_symbol_link; + sprintf(dev->weight_attr_name, "cdev%d_weight", dev->id); + sysfs_attr_init(&dev->weight_attr.attr); + dev->weight_attr.attr.name = dev->weight_attr_name; + dev->weight_attr.attr.mode = S_IWUSR | S_IRUGO; + dev->weight_attr.show = thermal_cooling_device_weight_show; + dev->weight_attr.store = thermal_cooling_device_weight_store; + result = device_create_file(&tz->device, &dev->weight_attr); + if (result) + goto remove_trip_file; + mutex_lock(&tz->lock); mutex_lock(&cdev->lock); list_for_each_entry(pos, &tz->thermal_instances, tz_node) @@ -1023,6 +1283,8 @@ int thermal_zone_bind_cooling_device(struct thermal_zone_device *tz, if (!result) return 0; + device_remove_file(&tz->device, &dev->weight_attr); +remove_trip_file: device_remove_file(&tz->device, &dev->attr); remove_symbol_link: sysfs_remove_link(&tz->device.kobj, dev->name); @@ -1377,7 +1639,8 @@ static int create_trip_attrs(struct thermal_zone_device *tz, int mask) tz->trip_temp_attrs[indx].name; tz->trip_temp_attrs[indx].attr.attr.mode = S_IRUGO; tz->trip_temp_attrs[indx].attr.show = trip_point_temp_show; - if (mask & (1 << indx)) { + if (IS_ENABLED(CONFIG_THERMAL_WRITABLE_TRIPS) && + mask & (1 << indx)) { tz->trip_temp_attrs[indx].attr.attr.mode |= S_IWUSR; tz->trip_temp_attrs[indx].attr.store = trip_point_temp_store; @@ -1454,7 +1717,7 @@ static void remove_trip_attrs(struct thermal_zone_device *tz) struct thermal_zone_device *thermal_zone_device_register(const char *type, int trips, int mask, void *devdata, struct thermal_zone_device_ops *ops, - const struct thermal_zone_params *tzp, + struct thermal_zone_params *tzp, int passive_delay, int polling_delay) { struct thermal_zone_device *tz; @@ -1462,6 +1725,7 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type, int result; int count; int passive = 0; + struct thermal_governor *governor; if (type && strlen(type) >= THERMAL_NAME_LENGTH) return ERR_PTR(-EINVAL); @@ -1548,13 +1812,24 @@ struct thermal_zone_device *thermal_zone_device_register(const char *type, if (result) goto unregister; + /* Add thermal zone params */ + result = create_tzp_attrs(&tz->device); + if (result) + goto unregister; + /* Update 'this' zone's governor information */ mutex_lock(&thermal_governor_lock); if (tz->tzp) - tz->governor = __find_governor(tz->tzp->governor_name); + governor = __find_governor(tz->tzp->governor_name); else - tz->governor = def_governor; + governor = def_governor; + + result = thermal_set_governor(tz, governor); + if (result) { + mutex_unlock(&thermal_governor_lock); + goto unregister; + } mutex_unlock(&thermal_governor_lock); @@ -1643,7 +1918,7 @@ void thermal_zone_device_unregister(struct thermal_zone_device *tz) device_remove_file(&tz->device, &dev_attr_mode); device_remove_file(&tz->device, &dev_attr_policy); remove_trip_attrs(tz); - tz->governor = NULL; + thermal_set_governor(tz, NULL); thermal_remove_hwmon_sysfs(tz); release_idr(&thermal_tz_idr, &thermal_idr_lock, tz->id); @@ -1799,7 +2074,11 @@ static int __init thermal_register_governors(void) if (result) return result; - return thermal_gov_user_space_register(); + result = thermal_gov_user_space_register(); + if (result) + return result; + + return thermal_gov_power_allocator_register(); } static void thermal_unregister_governors(void) @@ -1808,6 +2087,7 @@ static void thermal_unregister_governors(void) thermal_gov_fair_share_unregister(); thermal_gov_bang_bang_unregister(); thermal_gov_user_space_unregister(); + thermal_gov_power_allocator_unregister(); } static int __init thermal_init(void) diff --git a/drivers/thermal/thermal_core.h b/drivers/thermal/thermal_core.h index 8e39181..d7ac1fc 100644 --- a/drivers/thermal/thermal_core.h +++ b/drivers/thermal/thermal_core.h @@ -46,8 +46,11 @@ struct thermal_instance { unsigned long target; /* expected cooling state */ char attr_name[THERMAL_NAME_LENGTH]; struct device_attribute attr; + char weight_attr_name[THERMAL_NAME_LENGTH]; + struct device_attribute weight_attr; struct list_head tz_node; /* node in tz->thermal_instances */ struct list_head cdev_node; /* node in cdev->thermal_instances */ + unsigned int weight; /* The weight of the cooling device */ }; int thermal_register_governor(struct thermal_governor *); @@ -85,6 +88,14 @@ static inline int thermal_gov_user_space_register(void) { return 0; } static inline void thermal_gov_user_space_unregister(void) {} #endif /* CONFIG_THERMAL_GOV_USER_SPACE */ +#ifdef CONFIG_THERMAL_GOV_POWER_ALLOCATOR +int thermal_gov_power_allocator_register(void); +void thermal_gov_power_allocator_unregister(void); +#else +static inline int thermal_gov_power_allocator_register(void) { return 0; } +static inline void thermal_gov_power_allocator_unregister(void) {} +#endif /* CONFIG_THERMAL_GOV_POWER_ALLOCATOR */ + /* device tree support */ #ifdef CONFIG_THERMAL_OF int of_parse_thermal_zones(void); diff --git a/drivers/thermal/ti-soc-thermal/ti-bandgap.c b/drivers/thermal/ti-soc-thermal/ti-bandgap.c index 62a5d44..63e2f5c 100644 --- a/drivers/thermal/ti-soc-thermal/ti-bandgap.c +++ b/drivers/thermal/ti-soc-thermal/ti-bandgap.c @@ -43,6 +43,8 @@ #include "ti-bandgap.h" +static int ti_bandgap_force_single_read(struct ti_bandgap *bgp, int id); + /*** Helper functions to access registers and their bitfields ***/ /** @@ -103,19 +105,15 @@ do { \ */ static int ti_bandgap_power(struct ti_bandgap *bgp, bool on) { - int i, ret = 0; + int i; - if (!TI_BANDGAP_HAS(bgp, POWER_SWITCH)) { - ret = -ENOTSUPP; - goto exit; - } + if (!TI_BANDGAP_HAS(bgp, POWER_SWITCH)) + return -ENOTSUPP; for (i = 0; i < bgp->conf->sensor_count; i++) /* active on 0 */ RMW_BITS(bgp, i, temp_sensor_ctrl, bgap_tempsoff_mask, !on); - -exit: - return ret; + return 0; } /** @@ -263,18 +261,13 @@ static int ti_bandgap_adc_to_mcelsius(struct ti_bandgap *bgp, int adc_val, int *t) { const struct ti_bandgap_data *conf = bgp->conf; - int ret = 0; /* look up for temperature in the table and return the temperature */ - if (adc_val < conf->adc_start_val || adc_val > conf->adc_end_val) { - ret = -ERANGE; - goto exit; - } + if (adc_val < conf->adc_start_val || adc_val > conf->adc_end_val) + return -ERANGE; *t = bgp->conf->conv_table[adc_val - conf->adc_start_val]; - -exit: - return ret; + return 0; } /** @@ -295,16 +288,14 @@ int ti_bandgap_mcelsius_to_adc(struct ti_bandgap *bgp, long temp, int *adc) { const struct ti_bandgap_data *conf = bgp->conf; const int *conv_table = bgp->conf->conv_table; - int high, low, mid, ret = 0; + int high, low, mid; low = 0; high = conf->adc_end_val - conf->adc_start_val; mid = (high + low) / 2; - if (temp < conv_table[low] || temp > conv_table[high]) { - ret = -ERANGE; - goto exit; - } + if (temp < conv_table[low] || temp > conv_table[high]) + return -ERANGE; while (low < high) { if (temp < conv_table[mid]) @@ -315,9 +306,7 @@ int ti_bandgap_mcelsius_to_adc(struct ti_bandgap *bgp, long temp, int *adc) } *adc = conf->adc_start_val + low; - -exit: - return ret; + return 0; } /** @@ -343,13 +332,11 @@ int ti_bandgap_add_hyst(struct ti_bandgap *bgp, int adc_val, int hyst_val, */ ret = ti_bandgap_adc_to_mcelsius(bgp, adc_val, &temp); if (ret < 0) - goto exit; + return ret; temp += hyst_val; ret = ti_bandgap_mcelsius_to_adc(bgp, temp, sum); - -exit: return ret; } @@ -468,22 +455,18 @@ exit: */ static inline int ti_bandgap_validate(struct ti_bandgap *bgp, int id) { - int ret = 0; - if (!bgp || IS_ERR(bgp)) { pr_err("%s: invalid bandgap pointer\n", __func__); - ret = -EINVAL; - goto exit; + return -EINVAL; } if ((id < 0) || (id >= bgp->conf->sensor_count)) { dev_err(bgp->dev, "%s: sensor id out of range (%d)\n", __func__, id); - ret = -ERANGE; + return -ERANGE; } -exit: - return ret; + return 0; } /** @@ -511,12 +494,10 @@ static int _ti_bandgap_write_threshold(struct ti_bandgap *bgp, int id, int val, ret = ti_bandgap_validate(bgp, id); if (ret) - goto exit; + return ret; - if (!TI_BANDGAP_HAS(bgp, TALERT)) { - ret = -ENOTSUPP; - goto exit; - } + if (!TI_BANDGAP_HAS(bgp, TALERT)) + return -ENOTSUPP; ts_data = bgp->conf->sensors[id].ts_data; tsr = bgp->conf->sensors[id].registers; @@ -529,17 +510,15 @@ static int _ti_bandgap_write_threshold(struct ti_bandgap *bgp, int id, int val, } if (ret) - goto exit; + return ret; ret = ti_bandgap_mcelsius_to_adc(bgp, val, &adc_val); if (ret < 0) - goto exit; + return ret; spin_lock(&bgp->lock); ret = ti_bandgap_update_alert_threshold(bgp, id, adc_val, hot); spin_unlock(&bgp->lock); - -exit: return ret; } @@ -582,7 +561,7 @@ static int _ti_bandgap_read_threshold(struct ti_bandgap *bgp, int id, temp = ti_bandgap_readl(bgp, tsr->bgap_threshold); temp = (temp & mask) >> __ffs(mask); - ret |= ti_bandgap_adc_to_mcelsius(bgp, temp, &temp); + ret = ti_bandgap_adc_to_mcelsius(bgp, temp, &temp); if (ret) { dev_err(bgp->dev, "failed to read thot\n"); ret = -EIO; @@ -852,11 +831,17 @@ int ti_bandgap_read_temperature(struct ti_bandgap *bgp, int id, if (ret) return ret; + if (!TI_BANDGAP_HAS(bgp, MODE_CONFIG)) { + ret = ti_bandgap_force_single_read(bgp, id); + if (ret) + return ret; + } + spin_lock(&bgp->lock); temp = ti_bandgap_read_temp(bgp, id); spin_unlock(&bgp->lock); - ret |= ti_bandgap_adc_to_mcelsius(bgp, temp, &temp); + ret = ti_bandgap_adc_to_mcelsius(bgp, temp, &temp); if (ret) return -EIO; @@ -917,7 +902,8 @@ void *ti_bandgap_get_sensor_data(struct ti_bandgap *bgp, int id) static int ti_bandgap_force_single_read(struct ti_bandgap *bgp, int id) { - u32 temp = 0, counter = 1000; + u32 counter = 1000; + struct temp_sensor_registers *tsr; /* Select single conversion mode */ if (TI_BANDGAP_HAS(bgp, MODE_CONFIG)) @@ -925,16 +911,27 @@ ti_bandgap_force_single_read(struct ti_bandgap *bgp, int id) /* Start of Conversion = 1 */ RMW_BITS(bgp, id, temp_sensor_ctrl, bgap_soc_mask, 1); - /* Wait until DTEMP is updated */ - temp = ti_bandgap_read_temp(bgp, id); - while ((temp == 0) && --counter) - temp = ti_bandgap_read_temp(bgp, id); - /* REVISIT: Check correct condition for end of conversion */ + /* Wait for EOCZ going up */ + tsr = bgp->conf->sensors[id].registers; + + while (--counter) { + if (ti_bandgap_readl(bgp, tsr->temp_sensor_ctrl) & + tsr->bgap_eocz_mask) + break; + } /* Start of Conversion = 0 */ RMW_BITS(bgp, id, temp_sensor_ctrl, bgap_soc_mask, 0); + /* Wait for EOCZ going down */ + counter = 1000; + while (--counter) { + if (!(ti_bandgap_readl(bgp, tsr->temp_sensor_ctrl) & + tsr->bgap_eocz_mask)) + break; + } + return 0; } @@ -1220,11 +1217,10 @@ int ti_bandgap_probe(struct platform_device *pdev) goto free_irqs; } - bgp->div_clk = clk_get(NULL, bgp->conf->div_ck_name); + bgp->div_clk = clk_get(NULL, bgp->conf->div_ck_name); ret = IS_ERR(bgp->div_clk); if (ret) { - dev_err(&pdev->dev, - "failed to request div_ts_ck clock ref\n"); + dev_err(&pdev->dev, "failed to request div_ts_ck clock ref\n"); ret = PTR_ERR(bgp->div_clk); goto free_irqs; } diff --git a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c index a38c175..c7c5b37 100644 --- a/drivers/thermal/ti-soc-thermal/ti-thermal-common.c +++ b/drivers/thermal/ti-soc-thermal/ti-thermal-common.c @@ -75,7 +75,7 @@ static inline int ti_thermal_hotspot_temperature(int t, int s, int c) } /* thermal zone ops */ -/* Get temperature callback function for thermal zone*/ +/* Get temperature callback function for thermal zone */ static inline int __ti_thermal_get_temp(void *devdata, long *temp) { struct thermal_zone_device *pcb_tz = NULL; @@ -146,7 +146,8 @@ static int ti_thermal_bind(struct thermal_zone_device *thermal, return thermal_zone_bind_cooling_device(thermal, 0, cdev, /* bind with min and max states defined by cpu_cooling */ THERMAL_NO_LIMIT, - THERMAL_NO_LIMIT); + THERMAL_NO_LIMIT, + THERMAL_WEIGHT_DEFAULT); } /* Unbind callback functions for thermal zone */ diff --git a/drivers/thermal/x86_pkg_temp_thermal.c b/drivers/thermal/x86_pkg_temp_thermal.c index 9ea3d9d..50d1d2c 100644 --- a/drivers/thermal/x86_pkg_temp_thermal.c +++ b/drivers/thermal/x86_pkg_temp_thermal.c @@ -68,7 +68,7 @@ struct phy_dev_entry { struct thermal_zone_device *tzone; }; -static const struct thermal_zone_params pkg_temp_tz_params = { +static struct thermal_zone_params pkg_temp_tz_params = { .no_hwmon = true, }; diff --git a/include/linux/cpu_cooling.h b/include/linux/cpu_cooling.h index bd95527..c156f50 100644 --- a/include/linux/cpu_cooling.h +++ b/include/linux/cpu_cooling.h @@ -28,6 +28,9 @@ #include <linux/thermal.h> #include <linux/cpumask.h> +typedef int (*get_static_t)(cpumask_t *cpumask, int interval, + unsigned long voltage, u32 *power); + #ifdef CONFIG_CPU_THERMAL /** * cpufreq_cooling_register - function to create cpufreq cooling device. @@ -36,6 +39,10 @@ struct thermal_cooling_device * cpufreq_cooling_register(const struct cpumask *clip_cpus); +struct thermal_cooling_device * +cpufreq_power_cooling_register(const struct cpumask *clip_cpus, + u32 capacitance, get_static_t plat_static_func); + /** * of_cpufreq_cooling_register - create cpufreq cooling device based on DT. * @np: a valid struct device_node to the cooling device device tree node. @@ -45,6 +52,12 @@ cpufreq_cooling_register(const struct cpumask *clip_cpus); struct thermal_cooling_device * of_cpufreq_cooling_register(struct device_node *np, const struct cpumask *clip_cpus); + +struct thermal_cooling_device * +of_cpufreq_power_cooling_register(struct device_node *np, + const struct cpumask *clip_cpus, + u32 capacitance, + get_static_t plat_static_func); #else static inline struct thermal_cooling_device * of_cpufreq_cooling_register(struct device_node *np, @@ -52,6 +65,15 @@ of_cpufreq_cooling_register(struct device_node *np, { return ERR_PTR(-ENOSYS); } + +static inline struct thermal_cooling_device * +of_cpufreq_power_cooling_register(struct device_node *np, + const struct cpumask *clip_cpus, + u32 capacitance, + get_static_t plat_static_func) +{ + return NULL; +} #endif /** @@ -68,11 +90,28 @@ cpufreq_cooling_register(const struct cpumask *clip_cpus) return ERR_PTR(-ENOSYS); } static inline struct thermal_cooling_device * +cpufreq_power_cooling_register(const struct cpumask *clip_cpus, + u32 capacitance, get_static_t plat_static_func) +{ + return NULL; +} + +static inline struct thermal_cooling_device * of_cpufreq_cooling_register(struct device_node *np, const struct cpumask *clip_cpus) { return ERR_PTR(-ENOSYS); } + +static inline struct thermal_cooling_device * +of_cpufreq_power_cooling_register(struct device_node *np, + const struct cpumask *clip_cpus, + u32 capacitance, + get_static_t plat_static_func) +{ + return NULL; +} + static inline void cpufreq_cooling_unregister(struct thermal_cooling_device *cdev) { diff --git a/include/linux/thermal.h b/include/linux/thermal.h index 5eac316..037e9df 100644 --- a/include/linux/thermal.h +++ b/include/linux/thermal.h @@ -40,6 +40,9 @@ /* No upper/lower limit requirement */ #define THERMAL_NO_LIMIT ((u32)~0) +/* Default weight of a bound cooling device */ +#define THERMAL_WEIGHT_DEFAULT 0 + /* Unit conversion macros */ #define KELVIN_TO_CELSIUS(t) (long)(((long)t-2732 >= 0) ? \ ((long)t-2732+5)/10 : ((long)t-2732-5)/10) @@ -56,10 +59,13 @@ #define DEFAULT_THERMAL_GOVERNOR "fair_share" #elif defined(CONFIG_THERMAL_DEFAULT_GOV_USER_SPACE) #define DEFAULT_THERMAL_GOVERNOR "user_space" +#elif defined(CONFIG_THERMAL_DEFAULT_GOV_POWER_ALLOCATOR) +#define DEFAULT_THERMAL_GOVERNOR "power_allocator" #endif struct thermal_zone_device; struct thermal_cooling_device; +struct thermal_instance; enum thermal_device_mode { THERMAL_DEVICE_DISABLED = 0, @@ -113,6 +119,12 @@ struct thermal_cooling_device_ops { int (*get_max_state) (struct thermal_cooling_device *, unsigned long *); int (*get_cur_state) (struct thermal_cooling_device *, unsigned long *); int (*set_cur_state) (struct thermal_cooling_device *, unsigned long); + int (*get_requested_power)(struct thermal_cooling_device *, + struct thermal_zone_device *, u32 *); + int (*state2power)(struct thermal_cooling_device *, + struct thermal_zone_device *, unsigned long, u32 *); + int (*power2state)(struct thermal_cooling_device *, + struct thermal_zone_device *, u32, unsigned long *); }; struct thermal_cooling_device { @@ -144,8 +156,7 @@ struct thermal_attr { * @devdata: private pointer for device private data * @trips: number of trip points the thermal zone supports * @passive_delay: number of milliseconds to wait between polls when - * performing passive cooling. Currenty only used by the - * step-wise governor + * performing passive cooling. * @polling_delay: number of milliseconds to wait between polls when * checking whether trip points have been crossed (0 for * interrupt driven systems) @@ -155,13 +166,13 @@ struct thermal_attr { * @last_temperature: previous temperature read * @emul_temperature: emulated temperature when using CONFIG_THERMAL_EMULATION * @passive: 1 if you've crossed a passive trip point, 0 otherwise. - * Currenty only used by the step-wise governor. * @forced_passive: If > 0, temperature at which to switch on all ACPI * processor cooling devices. Currently only used by the * step-wise governor. * @ops: operations this &thermal_zone_device supports * @tzp: thermal zone parameters * @governor: pointer to the governor for this thermal zone + * @governor_data: private pointer for governor data * @thermal_instances: list of &struct thermal_instance of this thermal zone * @idr: &struct idr to generate unique id for this zone's cooling * devices @@ -186,8 +197,9 @@ struct thermal_zone_device { int passive; unsigned int forced_passive; struct thermal_zone_device_ops *ops; - const struct thermal_zone_params *tzp; + struct thermal_zone_params *tzp; struct thermal_governor *governor; + void *governor_data; struct list_head thermal_instances; struct idr idr; struct mutex lock; @@ -198,12 +210,19 @@ struct thermal_zone_device { /** * struct thermal_governor - structure that holds thermal governor information * @name: name of the governor + * @bind_to_tz: callback called when binding to a thermal zone. If it + * returns 0, the governor is bound to the thermal zone, + * otherwise it fails. + * @unbind_from_tz: callback called when a governor is unbound from a + * thermal zone. * @throttle: callback called for every trip point even if temperature is * below the trip point temperature * @governor_list: node in thermal_governor_list (in thermal_core.c) */ struct thermal_governor { char name[THERMAL_NAME_LENGTH]; + int (*bind_to_tz)(struct thermal_zone_device *tz); + void (*unbind_from_tz)(struct thermal_zone_device *tz); int (*throttle)(struct thermal_zone_device *tz, int trip); struct list_head governor_list; }; @@ -214,9 +233,12 @@ struct thermal_bind_params { /* * This is a measure of 'how effectively these devices can - * cool 'this' thermal zone. The shall be determined by platform - * characterization. This is on a 'percentage' scale. - * See Documentation/thermal/sysfs-api.txt for more information. + * cool 'this' thermal zone. It shall be determined by + * platform characterization. This value is relative to the + * rest of the weights so a cooling device whose weight is + * double that of another cooling device is twice as + * effective. See Documentation/thermal/sysfs-api.txt for more + * information. */ int weight; @@ -253,6 +275,44 @@ struct thermal_zone_params { int num_tbps; /* Number of tbp entries */ struct thermal_bind_params *tbp; + + /* + * Sustainable power (heat) that this thermal zone can dissipate in + * mW + */ + u32 sustainable_power; + + /* + * Proportional parameter of the PID controller when + * overshooting (i.e., when temperature is below the target) + */ + s32 k_po; + + /* + * Proportional parameter of the PID controller when + * undershooting + */ + s32 k_pu; + + /* Integral parameter of the PID controller */ + s32 k_i; + + /* Derivative parameter of the PID controller */ + s32 k_d; + + /* threshold below which the error is no longer accumulated */ + s32 integral_cutoff; + + /* + * @slope: slope of a linear temperature adjustment curve. + * Used by thermal zone drivers. + */ + int slope; + /* + * @offset: offset of a linear temperature adjustment curve. + * Used by thermal zone drivers (default 0). + */ + int offset; }; struct thermal_genl_event { @@ -316,14 +376,25 @@ void thermal_zone_of_sensor_unregister(struct device *dev, #endif #if IS_ENABLED(CONFIG_THERMAL) +static inline bool cdev_is_power_actor(struct thermal_cooling_device *cdev) +{ + return cdev->ops->get_requested_power && cdev->ops->state2power && + cdev->ops->power2state; +} + +int power_actor_get_max_power(struct thermal_cooling_device *, + struct thermal_zone_device *tz, u32 *max_power); +int power_actor_set_power(struct thermal_cooling_device *, + struct thermal_instance *, u32); struct thermal_zone_device *thermal_zone_device_register(const char *, int, int, void *, struct thermal_zone_device_ops *, - const struct thermal_zone_params *, int, int); + struct thermal_zone_params *, int, int); void thermal_zone_device_unregister(struct thermal_zone_device *); int thermal_zone_bind_cooling_device(struct thermal_zone_device *, int, struct thermal_cooling_device *, - unsigned long, unsigned long); + unsigned long, unsigned long, + unsigned int); int thermal_zone_unbind_cooling_device(struct thermal_zone_device *, int, struct thermal_cooling_device *); void thermal_zone_device_update(struct thermal_zone_device *); @@ -343,6 +414,14 @@ struct thermal_instance *get_thermal_instance(struct thermal_zone_device *, void thermal_cdev_update(struct thermal_cooling_device *); void thermal_notify_framework(struct thermal_zone_device *, int); #else +static inline bool cdev_is_power_actor(struct thermal_cooling_device *cdev) +{ return false; } +static inline int power_actor_get_max_power(struct thermal_cooling_device *cdev, + struct thermal_zone_device *tz, u32 *max_power) +{ return 0; } +static inline int power_actor_set_power(struct thermal_cooling_device *cdev, + struct thermal_instance *tz, u32 power) +{ return 0; } static inline struct thermal_zone_device *thermal_zone_device_register( const char *type, int trips, int mask, void *devdata, struct thermal_zone_device_ops *ops, diff --git a/include/trace/events/thermal.h b/include/trace/events/thermal.h index 0f4f95d..8b1f806 100644 --- a/include/trace/events/thermal.h +++ b/include/trace/events/thermal.h @@ -77,6 +77,64 @@ TRACE_EVENT(thermal_zone_trip, __entry->trip_type) ); +TRACE_EVENT(thermal_power_cpu_get_power, + TP_PROTO(const struct cpumask *cpus, unsigned long freq, u32 *load, + size_t load_len, u32 dynamic_power, u32 static_power), + + TP_ARGS(cpus, freq, load, load_len, dynamic_power, static_power), + + TP_STRUCT__entry( + __bitmask(cpumask, num_possible_cpus()) + __field(unsigned long, freq ) + __dynamic_array(u32, load, load_len) + __field(size_t, load_len ) + __field(u32, dynamic_power ) + __field(u32, static_power ) + ), + + TP_fast_assign( + __assign_bitmask(cpumask, cpumask_bits(cpus), + num_possible_cpus()); + __entry->freq = freq; + memcpy(__get_dynamic_array(load), load, + load_len * sizeof(*load)); + __entry->load_len = load_len; + __entry->dynamic_power = dynamic_power; + __entry->static_power = static_power; + ), + + TP_printk("cpus=%s freq=%lu load={%s} dynamic_power=%d static_power=%d", + __get_bitmask(cpumask), __entry->freq, + __print_array(__get_dynamic_array(load), __entry->load_len, 4), + __entry->dynamic_power, __entry->static_power) +); + +TRACE_EVENT(thermal_power_cpu_limit, + TP_PROTO(const struct cpumask *cpus, unsigned int freq, + unsigned long cdev_state, u32 power), + + TP_ARGS(cpus, freq, cdev_state, power), + + TP_STRUCT__entry( + __bitmask(cpumask, num_possible_cpus()) + __field(unsigned int, freq ) + __field(unsigned long, cdev_state) + __field(u32, power ) + ), + + TP_fast_assign( + __assign_bitmask(cpumask, cpumask_bits(cpus), + num_possible_cpus()); + __entry->freq = freq; + __entry->cdev_state = cdev_state; + __entry->power = power; + ), + + TP_printk("cpus=%s freq=%u cdev_state=%lu power=%u", + __get_bitmask(cpumask), __entry->freq, __entry->cdev_state, + __entry->power) +); + #endif /* _TRACE_THERMAL_H */ /* This part must be outside protection */ diff --git a/include/trace/events/thermal_power_allocator.h b/include/trace/events/thermal_power_allocator.h new file mode 100644 index 0000000..12e1321 --- /dev/null +++ b/include/trace/events/thermal_power_allocator.h @@ -0,0 +1,87 @@ +#undef TRACE_SYSTEM +#define TRACE_SYSTEM thermal_power_allocator + +#if !defined(_TRACE_THERMAL_POWER_ALLOCATOR_H) || defined(TRACE_HEADER_MULTI_READ) +#define _TRACE_THERMAL_POWER_ALLOCATOR_H + +#include <linux/tracepoint.h> + +TRACE_EVENT(thermal_power_allocator, + TP_PROTO(struct thermal_zone_device *tz, u32 *req_power, + u32 total_req_power, u32 *granted_power, + u32 total_granted_power, size_t num_actors, + u32 power_range, u32 max_allocatable_power, + unsigned long current_temp, s32 delta_temp), + TP_ARGS(tz, req_power, total_req_power, granted_power, + total_granted_power, num_actors, power_range, + max_allocatable_power, current_temp, delta_temp), + TP_STRUCT__entry( + __field(int, tz_id ) + __dynamic_array(u32, req_power, num_actors ) + __field(u32, total_req_power ) + __dynamic_array(u32, granted_power, num_actors) + __field(u32, total_granted_power ) + __field(size_t, num_actors ) + __field(u32, power_range ) + __field(u32, max_allocatable_power ) + __field(unsigned long, current_temp ) + __field(s32, delta_temp ) + ), + TP_fast_assign( + __entry->tz_id = tz->id; + memcpy(__get_dynamic_array(req_power), req_power, + num_actors * sizeof(*req_power)); + __entry->total_req_power = total_req_power; + memcpy(__get_dynamic_array(granted_power), granted_power, + num_actors * sizeof(*granted_power)); + __entry->total_granted_power = total_granted_power; + __entry->num_actors = num_actors; + __entry->power_range = power_range; + __entry->max_allocatable_power = max_allocatable_power; + __entry->current_temp = current_temp; + __entry->delta_temp = delta_temp; + ), + + TP_printk("thermal_zone_id=%d req_power={%s} total_req_power=%u granted_power={%s} total_granted_power=%u power_range=%u max_allocatable_power=%u current_temperature=%lu delta_temperature=%d", + __entry->tz_id, + __print_array(__get_dynamic_array(req_power), + __entry->num_actors, 4), + __entry->total_req_power, + __print_array(__get_dynamic_array(granted_power), + __entry->num_actors, 4), + __entry->total_granted_power, __entry->power_range, + __entry->max_allocatable_power, __entry->current_temp, + __entry->delta_temp) +); + +TRACE_EVENT(thermal_power_allocator_pid, + TP_PROTO(struct thermal_zone_device *tz, s32 err, s32 err_integral, + s64 p, s64 i, s64 d, s32 output), + TP_ARGS(tz, err, err_integral, p, i, d, output), + TP_STRUCT__entry( + __field(int, tz_id ) + __field(s32, err ) + __field(s32, err_integral) + __field(s64, p ) + __field(s64, i ) + __field(s64, d ) + __field(s32, output ) + ), + TP_fast_assign( + __entry->tz_id = tz->id; + __entry->err = err; + __entry->err_integral = err_integral; + __entry->p = p; + __entry->i = i; + __entry->d = d; + __entry->output = output; + ), + + TP_printk("thermal_zone_id=%d err=%d err_integral=%d p=%lld i=%lld d=%lld output=%d", + __entry->tz_id, __entry->err, __entry->err_integral, + __entry->p, __entry->i, __entry->d, __entry->output) +); +#endif /* _TRACE_THERMAL_POWER_ALLOCATOR_H */ + +/* This part must be outside protection */ +#include <trace/define_trace.h> |