diff options
Diffstat (limited to 'drivers/powercap')
-rw-r--r-- | drivers/powercap/intel_rapl.c | 216 |
1 files changed, 139 insertions, 77 deletions
diff --git a/drivers/powercap/intel_rapl.c b/drivers/powercap/intel_rapl.c index 256efed..4631696 100644 --- a/drivers/powercap/intel_rapl.c +++ b/drivers/powercap/intel_rapl.c @@ -29,6 +29,7 @@ #include <linux/sysfs.h> #include <linux/cpu.h> #include <linux/powercap.h> +#include <asm/iosf_mbi.h> #include <asm/processor.h> #include <asm/cpu_device_id.h> @@ -70,11 +71,6 @@ #define RAPL_PRIMITIVE_DERIVED BIT(1) /* not from raw data */ #define RAPL_PRIMITIVE_DUMMY BIT(2) -/* scale RAPL units to avoid floating point math inside kernel */ -#define POWER_UNIT_SCALE (1000000) -#define ENERGY_UNIT_SCALE (1000000) -#define TIME_UNIT_SCALE (1000000) - #define TIME_WINDOW_MAX_MSEC 40000 #define TIME_WINDOW_MIN_MSEC 250 @@ -175,9 +171,9 @@ struct rapl_package { unsigned int id; /* physical package/socket id */ unsigned int nr_domains; unsigned long domain_map; /* bit map of active domains */ - unsigned int power_unit_divisor; - unsigned int energy_unit_divisor; - unsigned int time_unit_divisor; + unsigned int power_unit; + unsigned int energy_unit; + unsigned int time_unit; struct rapl_domain *domains; /* array of domains, sized at runtime */ struct powercap_zone *power_zone; /* keep track of parent zone */ int nr_cpus; /* active cpus on the package, topology info is lost during @@ -197,6 +193,9 @@ struct rapl_defaults { }; static struct rapl_defaults *rapl_defaults; +/* Sideband MBI registers */ +#define IOSF_CPU_POWER_BUDGET_CTL (0x2) + #define PACKAGE_PLN_INT_SAVED BIT(0) #define MAX_PRIM_NAME (32) @@ -348,23 +347,13 @@ static int find_nr_power_limit(struct rapl_domain *rd) static int set_domain_enable(struct powercap_zone *power_zone, bool mode) { struct rapl_domain *rd = power_zone_to_rapl_domain(power_zone); - int nr_powerlimit; if (rd->state & DOMAIN_STATE_BIOS_LOCKED) return -EACCES; + get_online_cpus(); - nr_powerlimit = find_nr_power_limit(rd); - /* here we activate/deactivate the hardware for power limiting */ rapl_write_data_raw(rd, PL1_ENABLE, mode); - /* always enable clamp such that p-state can go below OS requested - * range. power capping priority over guranteed frequency. - */ - rapl_write_data_raw(rd, PL1_CLAMP, mode); - /* some domains have pl2 */ - if (nr_powerlimit > 1) { - rapl_write_data_raw(rd, PL2_ENABLE, mode); - rapl_write_data_raw(rd, PL2_CLAMP, mode); - } + rapl_defaults->set_floor_freq(rd, mode); put_online_cpus(); return 0; @@ -662,9 +651,7 @@ static void rapl_init_domains(struct rapl_package *rp) static u64 rapl_unit_xlate(int package, enum unit_type type, u64 value, int to_raw) { - u64 divisor = 1; - int scale = 1; /* scale to user friendly data without floating point */ - u64 f, y; /* fraction and exp. used for time unit */ + u64 units = 1; struct rapl_package *rp; rp = find_package_by_id(package); @@ -673,42 +660,24 @@ static u64 rapl_unit_xlate(int package, enum unit_type type, u64 value, switch (type) { case POWER_UNIT: - divisor = rp->power_unit_divisor; - scale = POWER_UNIT_SCALE; + units = rp->power_unit; break; case ENERGY_UNIT: - scale = ENERGY_UNIT_SCALE; - divisor = rp->energy_unit_divisor; + units = rp->energy_unit; break; case TIME_UNIT: - divisor = rp->time_unit_divisor; - scale = TIME_UNIT_SCALE; - /* special processing based on 2^Y*(1+F)/4 = val/divisor, refer - * to Intel Software Developer's manual Vol. 3a, CH 14.7.4. - */ - if (!to_raw) { - f = (value & 0x60) >> 5; - y = value & 0x1f; - value = (1 << y) * (4 + f) * scale / 4; - return div64_u64(value, divisor); - } else { - do_div(value, scale); - value *= divisor; - y = ilog2(value); - f = div64_u64(4 * (value - (1 << y)), 1 << y); - value = (y & 0x1f) | ((f & 0x3) << 5); - return value; - } - break; + return rapl_defaults->compute_time_window(rp, value, to_raw); case ARBITRARY_UNIT: default: return value; }; if (to_raw) - return div64_u64(value * divisor, scale); - else - return div64_u64(value * scale, divisor); + return div64_u64(value, units); + + value *= units; + + return value; } /* in the order of enum rapl_primitives */ @@ -842,12 +811,18 @@ static int rapl_write_data_raw(struct rapl_domain *rd, return 0; } -static const struct x86_cpu_id energy_unit_quirk_ids[] = { - { X86_VENDOR_INTEL, 6, 0x37},/* Valleyview */ - {} -}; - -static int rapl_check_unit(struct rapl_package *rp, int cpu) +/* + * Raw RAPL data stored in MSRs are in certain scales. We need to + * convert them into standard units based on the units reported in + * the RAPL unit MSRs. This is specific to CPUs as the method to + * calculate units differ on different CPUs. + * We convert the units to below format based on CPUs. + * i.e. + * energy unit: microJoules : Represented in microJoules by default + * power unit : microWatts : Represented in milliWatts by default + * time unit : microseconds: Represented in seconds by default + */ +static int rapl_check_unit_core(struct rapl_package *rp, int cpu) { u64 msr_val; u32 value; @@ -858,36 +833,47 @@ static int rapl_check_unit(struct rapl_package *rp, int cpu) return -ENODEV; } - /* Raw RAPL data stored in MSRs are in certain scales. We need to - * convert them into standard units based on the divisors reported in - * the RAPL unit MSRs. - * i.e. - * energy unit: 1/enery_unit_divisor Joules - * power unit: 1/power_unit_divisor Watts - * time unit: 1/time_unit_divisor Seconds - */ value = (msr_val & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET; - /* some CPUs have different way to calculate energy unit */ - if (x86_match_cpu(energy_unit_quirk_ids)) - rp->energy_unit_divisor = 1000000 / (1 << value); - else - rp->energy_unit_divisor = 1 << value; + rp->energy_unit = 1000000 / (1 << value); value = (msr_val & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET; - rp->power_unit_divisor = 1 << value; + rp->power_unit = 1000000 / (1 << value); value = (msr_val & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET; - rp->time_unit_divisor = 1 << value; + rp->time_unit = 1000000 / (1 << value); - pr_debug("Physical package %d units: energy=%d, time=%d, power=%d\n", - rp->id, - rp->energy_unit_divisor, - rp->time_unit_divisor, - rp->power_unit_divisor); + pr_debug("Core CPU package %d energy=%duJ, time=%dus, power=%duW\n", + rp->id, rp->energy_unit, rp->time_unit, rp->power_unit); return 0; } +static int rapl_check_unit_atom(struct rapl_package *rp, int cpu) +{ + u64 msr_val; + u32 value; + + if (rdmsrl_safe_on_cpu(cpu, MSR_RAPL_POWER_UNIT, &msr_val)) { + pr_err("Failed to read power unit MSR 0x%x on CPU %d, exit.\n", + MSR_RAPL_POWER_UNIT, cpu); + return -ENODEV; + } + value = (msr_val & ENERGY_UNIT_MASK) >> ENERGY_UNIT_OFFSET; + rp->energy_unit = 1 << value; + + value = (msr_val & POWER_UNIT_MASK) >> POWER_UNIT_OFFSET; + rp->power_unit = (1 << value) * 1000; + + value = (msr_val & TIME_UNIT_MASK) >> TIME_UNIT_OFFSET; + rp->time_unit = 1000000 / (1 << value); + + pr_debug("Atom package %d energy=%duJ, time=%dus, power=%duW\n", + rp->id, rp->energy_unit, rp->time_unit, rp->power_unit); + + return 0; +} + + /* REVISIT: * When package power limit is set artificially low by RAPL, LVT * thermal interrupt for package power limit should be ignored @@ -955,10 +941,86 @@ static void package_power_limit_irq_restore(int package_id) wrmsr_on_cpu(cpu, MSR_IA32_PACKAGE_THERM_INTERRUPT, l, h); } +static void set_floor_freq_default(struct rapl_domain *rd, bool mode) +{ + int nr_powerlimit = find_nr_power_limit(rd); + + /* always enable clamp such that p-state can go below OS requested + * range. power capping priority over guranteed frequency. + */ + rapl_write_data_raw(rd, PL1_CLAMP, mode); + + /* some domains have pl2 */ + if (nr_powerlimit > 1) { + rapl_write_data_raw(rd, PL2_ENABLE, mode); + rapl_write_data_raw(rd, PL2_CLAMP, mode); + } +} + +static void set_floor_freq_atom(struct rapl_domain *rd, bool enable) +{ + static u32 power_ctrl_orig_val; + u32 mdata; + + if (!power_ctrl_orig_val) + iosf_mbi_read(BT_MBI_UNIT_PMC, BT_MBI_PMC_READ, + IOSF_CPU_POWER_BUDGET_CTL, &power_ctrl_orig_val); + mdata = power_ctrl_orig_val; + if (enable) { + mdata &= ~(0x7f << 8); + mdata |= 1 << 8; + } + iosf_mbi_write(BT_MBI_UNIT_PMC, BT_MBI_PMC_WRITE, + IOSF_CPU_POWER_BUDGET_CTL, mdata); +} + +static u64 rapl_compute_time_window_core(struct rapl_package *rp, u64 value, + bool to_raw) +{ + u64 f, y; /* fraction and exp. used for time unit */ + + /* + * Special processing based on 2^Y*(1+F/4), refer + * to Intel Software Developer's manual Vol.3B: CH 14.9.3. + */ + if (!to_raw) { + f = (value & 0x60) >> 5; + y = value & 0x1f; + value = (1 << y) * (4 + f) * rp->time_unit / 4; + } else { + do_div(value, rp->time_unit); + y = ilog2(value); + f = div64_u64(4 * (value - (1 << y)), 1 << y); + value = (y & 0x1f) | ((f & 0x3) << 5); + } + return value; +} + +static u64 rapl_compute_time_window_atom(struct rapl_package *rp, u64 value, + bool to_raw) +{ + /* + * Atom time unit encoding is straight forward val * time_unit, + * where time_unit is default to 1 sec. Never 0. + */ + if (!to_raw) + return (value) ? value *= rp->time_unit : rp->time_unit; + else + value = div64_u64(value, rp->time_unit); + + return value; +} + static const struct rapl_defaults rapl_defaults_core = { + .check_unit = rapl_check_unit_core, + .set_floor_freq = set_floor_freq_default, + .compute_time_window = rapl_compute_time_window_core, }; static const struct rapl_defaults rapl_defaults_atom = { + .check_unit = rapl_check_unit_atom, + .set_floor_freq = set_floor_freq_atom, + .compute_time_window = rapl_compute_time_window_atom, }; #define RAPL_CPU(_model, _ops) { \ @@ -1262,7 +1324,7 @@ static int rapl_detect_topology(void) /* check if the package contains valid domains */ if (rapl_detect_domains(new_package, i) || - rapl_check_unit(new_package, i)) { + rapl_defaults->check_unit(new_package, i)) { kfree(new_package->domains); kfree(new_package); /* free up the packages already initialized */ @@ -1317,7 +1379,7 @@ static int rapl_add_package(int cpu) rp->nr_cpus = 1; /* check if the package contains valid domains */ if (rapl_detect_domains(rp, cpu) || - rapl_check_unit(rp, cpu)) { + rapl_defaults->check_unit(rp, cpu)) { ret = -ENODEV; goto err_free_package; } |