summaryrefslogtreecommitdiffstats
path: root/arch/x86/kernel/cpu
diff options
context:
space:
mode:
authorThomas Gleixner <tglx@linutronix.de>2018-06-22 21:20:35 +0200
committerThomas Gleixner <tglx@linutronix.de>2018-06-22 21:20:35 +0200
commit7731b8bc94e599c9a79e428f3359ff2c34b7576a (patch)
tree879f18ccbe274122f2d4f095b43cbc7f953e0ada /arch/x86/kernel/cpu
parent48e315618dc4dc8904182cd221e3d395d5d97005 (diff)
parent9ffc59d57228d74809700be6f7ecb1db10292f05 (diff)
downloadop-kernel-dev-7731b8bc94e599c9a79e428f3359ff2c34b7576a.zip
op-kernel-dev-7731b8bc94e599c9a79e428f3359ff2c34b7576a.tar.gz
Merge branch 'linus' into x86/urgent
Required to queue a dependent fix.
Diffstat (limited to 'arch/x86/kernel/cpu')
-rw-r--r--arch/x86/kernel/cpu/Makefile2
-rw-r--r--arch/x86/kernel/cpu/amd.c36
-rw-r--r--arch/x86/kernel/cpu/bugs.c13
-rw-r--r--arch/x86/kernel/cpu/cacheinfo.c (renamed from arch/x86/kernel/cpu/intel_cacheinfo.c)46
-rw-r--r--arch/x86/kernel/cpu/centaur.c53
-rw-r--r--arch/x86/kernel/cpu/common.c46
-rw-r--r--arch/x86/kernel/cpu/cpu.h10
-rw-r--r--arch/x86/kernel/cpu/intel.c34
-rw-r--r--arch/x86/kernel/cpu/intel_rdt.c50
-rw-r--r--arch/x86/kernel/cpu/intel_rdt.h18
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c24
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_monitor.c170
-rw-r--r--arch/x86/kernel/cpu/intel_rdt_rdtgroup.c33
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce-inject.c2
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce.c20
-rw-r--r--arch/x86/kernel/cpu/mcheck/mce_amd.c17
-rw-r--r--arch/x86/kernel/cpu/mtrr/Makefile2
-rw-r--r--arch/x86/kernel/cpu/mtrr/if.c2
-rw-r--r--arch/x86/kernel/cpu/mtrr/mtrr.c (renamed from arch/x86/kernel/cpu/mtrr/main.c)37
-rw-r--r--arch/x86/kernel/cpu/topology.c8
20 files changed, 498 insertions, 125 deletions
diff --git a/arch/x86/kernel/cpu/Makefile b/arch/x86/kernel/cpu/Makefile
index a66229f..7a40196 100644
--- a/arch/x86/kernel/cpu/Makefile
+++ b/arch/x86/kernel/cpu/Makefile
@@ -17,7 +17,7 @@ KCOV_INSTRUMENT_perf_event.o := n
nostackp := $(call cc-option, -fno-stack-protector)
CFLAGS_common.o := $(nostackp)
-obj-y := intel_cacheinfo.o scattered.o topology.o
+obj-y := cacheinfo.o scattered.o topology.o
obj-y += common.o
obj-y += rdrand.o
obj-y += match.o
diff --git a/arch/x86/kernel/cpu/amd.c b/arch/x86/kernel/cpu/amd.c
index 1b18be3..082d787 100644
--- a/arch/x86/kernel/cpu/amd.c
+++ b/arch/x86/kernel/cpu/amd.c
@@ -9,6 +9,7 @@
#include <linux/random.h>
#include <asm/processor.h>
#include <asm/apic.h>
+#include <asm/cacheinfo.h>
#include <asm/cpu.h>
#include <asm/spec-ctrl.h>
#include <asm/smp.h>
@@ -298,7 +299,6 @@ static int nearby_node(int apicid)
}
#endif
-#ifdef CONFIG_SMP
/*
* Fix up cpu_core_id for pre-F17h systems to be in the
* [0 .. cores_per_node - 1] range. Not really needed but
@@ -328,6 +328,7 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
/* get information required for multi-node processors */
if (boot_cpu_has(X86_FEATURE_TOPOEXT)) {
+ int err;
u32 eax, ebx, ecx, edx;
cpuid(0x8000001e, &eax, &ebx, &ecx, &edx);
@@ -346,21 +347,15 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
}
/*
- * We may have multiple LLCs if L3 caches exist, so check if we
- * have an L3 cache by looking at the L3 cache CPUID leaf.
+ * In case leaf B is available, use it to derive
+ * topology information.
*/
- if (cpuid_edx(0x80000006)) {
- if (c->x86 == 0x17) {
- /*
- * LLC is at the core complex level.
- * Core complex id is ApicId[3].
- */
- per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
- } else {
- /* LLC is at the node level. */
- per_cpu(cpu_llc_id, cpu) = node_id;
- }
- }
+ err = detect_extended_topology(c);
+ if (!err)
+ c->x86_coreid_bits = get_count_order(c->x86_max_cores);
+
+ cacheinfo_amd_init_llc_id(c, cpu, node_id);
+
} else if (cpu_has(c, X86_FEATURE_NODEID_MSR)) {
u64 value;
@@ -376,7 +371,6 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
legacy_fixup_core_id(c);
}
}
-#endif
/*
* On a AMD dual core setup the lower bits of the APIC id distinguish the cores.
@@ -384,7 +378,6 @@ static void amd_get_topology(struct cpuinfo_x86 *c)
*/
static void amd_detect_cmp(struct cpuinfo_x86 *c)
{
-#ifdef CONFIG_SMP
unsigned bits;
int cpu = smp_processor_id();
@@ -395,17 +388,11 @@ static void amd_detect_cmp(struct cpuinfo_x86 *c)
c->phys_proc_id = c->initial_apicid >> bits;
/* use socket ID also for last level cache */
per_cpu(cpu_llc_id, cpu) = c->phys_proc_id;
- amd_get_topology(c);
-#endif
}
u16 amd_get_nb_id(int cpu)
{
- u16 id = 0;
-#ifdef CONFIG_SMP
- id = per_cpu(cpu_llc_id, cpu);
-#endif
- return id;
+ return per_cpu(cpu_llc_id, cpu);
}
EXPORT_SYMBOL_GPL(amd_get_nb_id);
@@ -864,6 +851,7 @@ static void init_amd(struct cpuinfo_x86 *c)
/* Multi core CPU? */
if (c->extended_cpuid_level >= 0x80000008) {
amd_detect_cmp(c);
+ amd_get_topology(c);
srat_detect_node(c);
}
diff --git a/arch/x86/kernel/cpu/bugs.c b/arch/x86/kernel/cpu/bugs.c
index 7416fc2..cd0fda1 100644
--- a/arch/x86/kernel/cpu/bugs.c
+++ b/arch/x86/kernel/cpu/bugs.c
@@ -529,18 +529,15 @@ static enum ssb_mitigation __init __ssb_select_mitigation(void)
if (mode == SPEC_STORE_BYPASS_DISABLE) {
setup_force_cpu_cap(X86_FEATURE_SPEC_STORE_BYPASS_DISABLE);
/*
- * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD uses
- * a completely different MSR and bit dependent on family.
+ * Intel uses the SPEC CTRL MSR Bit(2) for this, while AMD may
+ * use a completely different MSR and bit dependent on family.
*/
- switch (boot_cpu_data.x86_vendor) {
- case X86_VENDOR_INTEL:
+ if (!static_cpu_has(X86_FEATURE_MSR_SPEC_CTRL))
+ x86_amd_ssb_disable();
+ else {
x86_spec_ctrl_base |= SPEC_CTRL_SSBD;
x86_spec_ctrl_mask |= SPEC_CTRL_SSBD;
wrmsrl(MSR_IA32_SPEC_CTRL, x86_spec_ctrl_base);
- break;
- case X86_VENDOR_AMD:
- x86_amd_ssb_disable();
- break;
}
}
diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/cacheinfo.c
index 54d04d5..38354c6 100644
--- a/arch/x86/kernel/cpu/intel_cacheinfo.c
+++ b/arch/x86/kernel/cpu/cacheinfo.c
@@ -20,6 +20,8 @@
#include <asm/amd_nb.h>
#include <asm/smp.h>
+#include "cpu.h"
+
#define LVL_1_INST 1
#define LVL_1_DATA 2
#define LVL_2 3
@@ -637,6 +639,45 @@ static int find_num_cache_leaves(struct cpuinfo_x86 *c)
return i;
}
+void cacheinfo_amd_init_llc_id(struct cpuinfo_x86 *c, int cpu, u8 node_id)
+{
+ /*
+ * We may have multiple LLCs if L3 caches exist, so check if we
+ * have an L3 cache by looking at the L3 cache CPUID leaf.
+ */
+ if (!cpuid_edx(0x80000006))
+ return;
+
+ if (c->x86 < 0x17) {
+ /* LLC is at the node level. */
+ per_cpu(cpu_llc_id, cpu) = node_id;
+ } else if (c->x86 == 0x17 &&
+ c->x86_model >= 0 && c->x86_model <= 0x1F) {
+ /*
+ * LLC is at the core complex level.
+ * Core complex ID is ApicId[3] for these processors.
+ */
+ per_cpu(cpu_llc_id, cpu) = c->apicid >> 3;
+ } else {
+ /*
+ * LLC ID is calculated from the number of threads sharing the
+ * cache.
+ * */
+ u32 eax, ebx, ecx, edx, num_sharing_cache = 0;
+ u32 llc_index = find_num_cache_leaves(c) - 1;
+
+ cpuid_count(0x8000001d, llc_index, &eax, &ebx, &ecx, &edx);
+ if (eax)
+ num_sharing_cache = ((eax >> 14) & 0xfff) + 1;
+
+ if (num_sharing_cache) {
+ int bits = get_count_order(num_sharing_cache) - 1;
+
+ per_cpu(cpu_llc_id, cpu) = c->apicid >> bits;
+ }
+ }
+}
+
void init_amd_cacheinfo(struct cpuinfo_x86 *c)
{
@@ -650,7 +691,7 @@ void init_amd_cacheinfo(struct cpuinfo_x86 *c)
}
}
-unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)
+void init_intel_cacheinfo(struct cpuinfo_x86 *c)
{
/* Cache sizes */
unsigned int trace = 0, l1i = 0, l1d = 0, l2 = 0, l3 = 0;
@@ -802,7 +843,8 @@ unsigned int init_intel_cacheinfo(struct cpuinfo_x86 *c)
c->x86_cache_size = l3 ? l3 : (l2 ? l2 : (l1i+l1d));
- return l2;
+ if (!l2)
+ cpu_detect_cache_sizes(c);
}
static int __cache_amd_cpumap_setup(unsigned int cpu, int index,
diff --git a/arch/x86/kernel/cpu/centaur.c b/arch/x86/kernel/cpu/centaur.c
index e5ec0f1..14433ff 100644
--- a/arch/x86/kernel/cpu/centaur.c
+++ b/arch/x86/kernel/cpu/centaur.c
@@ -18,6 +18,13 @@
#define RNG_ENABLED (1 << 3)
#define RNG_ENABLE (1 << 6) /* MSR_VIA_RNG */
+#define X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW 0x00200000
+#define X86_VMX_FEATURE_PROC_CTLS_VNMI 0x00400000
+#define X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS 0x80000000
+#define X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC 0x00000001
+#define X86_VMX_FEATURE_PROC_CTLS2_EPT 0x00000002
+#define X86_VMX_FEATURE_PROC_CTLS2_VPID 0x00000020
+
static void init_c3(struct cpuinfo_x86 *c)
{
u32 lo, hi;
@@ -112,6 +119,31 @@ static void early_init_centaur(struct cpuinfo_x86 *c)
}
}
+static void centaur_detect_vmx_virtcap(struct cpuinfo_x86 *c)
+{
+ u32 vmx_msr_low, vmx_msr_high, msr_ctl, msr_ctl2;
+
+ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS, vmx_msr_low, vmx_msr_high);
+ msr_ctl = vmx_msr_high | vmx_msr_low;
+
+ if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW)
+ set_cpu_cap(c, X86_FEATURE_TPR_SHADOW);
+ if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_VNMI)
+ set_cpu_cap(c, X86_FEATURE_VNMI);
+ if (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_2ND_CTLS) {
+ rdmsr(MSR_IA32_VMX_PROCBASED_CTLS2,
+ vmx_msr_low, vmx_msr_high);
+ msr_ctl2 = vmx_msr_high | vmx_msr_low;
+ if ((msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VIRT_APIC) &&
+ (msr_ctl & X86_VMX_FEATURE_PROC_CTLS_TPR_SHADOW))
+ set_cpu_cap(c, X86_FEATURE_FLEXPRIORITY);
+ if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_EPT)
+ set_cpu_cap(c, X86_FEATURE_EPT);
+ if (msr_ctl2 & X86_VMX_FEATURE_PROC_CTLS2_VPID)
+ set_cpu_cap(c, X86_FEATURE_VPID);
+ }
+}
+
static void init_centaur(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_X86_32
@@ -128,6 +160,24 @@ static void init_centaur(struct cpuinfo_x86 *c)
clear_cpu_cap(c, 0*32+31);
#endif
early_init_centaur(c);
+ init_intel_cacheinfo(c);
+ detect_num_cpu_cores(c);
+#ifdef CONFIG_X86_32
+ detect_ht(c);
+#endif
+
+ if (c->cpuid_level > 9) {
+ unsigned int eax = cpuid_eax(10);
+
+ /*
+ * Check for version and the number of counters
+ * Version(eax[7:0]) can't be 0;
+ * Counters(eax[15:8]) should be greater than 1;
+ */
+ if ((eax & 0xff) && (((eax >> 8) & 0xff) > 1))
+ set_cpu_cap(c, X86_FEATURE_ARCH_PERFMON);
+ }
+
switch (c->x86) {
#ifdef CONFIG_X86_32
case 5:
@@ -199,6 +249,9 @@ static void init_centaur(struct cpuinfo_x86 *c)
#ifdef CONFIG_X86_64
set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
#endif
+
+ if (cpu_has(c, X86_FEATURE_VMX))
+ centaur_detect_vmx_virtcap(c);
}
#ifdef CONFIG_X86_32
diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c
index 38276f5..0df7151 100644
--- a/arch/x86/kernel/cpu/common.c
+++ b/arch/x86/kernel/cpu/common.c
@@ -66,6 +66,13 @@ cpumask_var_t cpu_callin_mask;
/* representing cpus for which sibling maps can be computed */
cpumask_var_t cpu_sibling_setup_mask;
+/* Number of siblings per CPU package */
+int smp_num_siblings = 1;
+EXPORT_SYMBOL(smp_num_siblings);
+
+/* Last level cache ID of each logical CPU */
+DEFINE_PER_CPU_READ_MOSTLY(u16, cpu_llc_id) = BAD_APICID;
+
/* correctly size the local cpu masks */
void __init setup_cpu_local_masks(void)
{
@@ -577,6 +584,19 @@ static void get_model_name(struct cpuinfo_x86 *c)
*(s + 1) = '\0';
}
+void detect_num_cpu_cores(struct cpuinfo_x86 *c)
+{
+ unsigned int eax, ebx, ecx, edx;
+
+ c->x86_max_cores = 1;
+ if (!IS_ENABLED(CONFIG_SMP) || c->cpuid_level < 4)
+ return;
+
+ cpuid_count(4, 0, &eax, &ebx, &ecx, &edx);
+ if (eax & 0x1f)
+ c->x86_max_cores = (eax >> 26) + 1;
+}
+
void cpu_detect_cache_sizes(struct cpuinfo_x86 *c)
{
unsigned int n, dummy, ebx, ecx, edx, l2size;
@@ -783,6 +803,12 @@ static void init_speculation_control(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_STIBP);
set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
}
+
+ if (cpu_has(c, X86_FEATURE_AMD_SSBD)) {
+ set_cpu_cap(c, X86_FEATURE_SSBD);
+ set_cpu_cap(c, X86_FEATURE_MSR_SPEC_CTRL);
+ clear_cpu_cap(c, X86_FEATURE_VIRT_SSBD);
+ }
}
void get_cpu_cap(struct cpuinfo_x86 *c)
@@ -972,7 +998,8 @@ static void __init cpu_set_bug_bits(struct cpuinfo_x86 *c)
rdmsrl(MSR_IA32_ARCH_CAPABILITIES, ia32_cap);
if (!x86_match_cpu(cpu_no_spec_store_bypass) &&
- !(ia32_cap & ARCH_CAP_SSB_NO))
+ !(ia32_cap & ARCH_CAP_SSB_NO) &&
+ !cpu_has(c, X86_FEATURE_AMD_SSB_NO))
setup_force_cpu_bug(X86_BUG_SPEC_STORE_BYPASS);
if (x86_match_cpu(cpu_no_meltdown))
@@ -1044,6 +1071,21 @@ static void __init early_identify_cpu(struct cpuinfo_x86 *c)
*/
setup_clear_cpu_cap(X86_FEATURE_PCID);
#endif
+
+ /*
+ * Later in the boot process pgtable_l5_enabled() relies on
+ * cpu_feature_enabled(X86_FEATURE_LA57). If 5-level paging is not
+ * enabled by this point we need to clear the feature bit to avoid
+ * false-positives at the later stage.
+ *
+ * pgtable_l5_enabled() can be false here for several reasons:
+ * - 5-level paging is disabled compile-time;
+ * - it's 32-bit kernel;
+ * - machine doesn't support 5-level paging;
+ * - user specified 'no5lvl' in kernel command line.
+ */
+ if (!pgtable_l5_enabled())
+ setup_clear_cpu_cap(X86_FEATURE_LA57);
}
void __init early_cpu_init(void)
@@ -1557,7 +1599,7 @@ DEFINE_PER_CPU(unsigned long, cpu_current_top_of_stack) =
(unsigned long)&init_thread_union + THREAD_SIZE;
EXPORT_PER_CPU_SYMBOL(cpu_current_top_of_stack);
-#ifdef CONFIG_CC_STACKPROTECTOR
+#ifdef CONFIG_STACKPROTECTOR
DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary);
#endif
diff --git a/arch/x86/kernel/cpu/cpu.h b/arch/x86/kernel/cpu/cpu.h
index 37672d2..38216f6 100644
--- a/arch/x86/kernel/cpu/cpu.h
+++ b/arch/x86/kernel/cpu/cpu.h
@@ -47,6 +47,16 @@ extern const struct cpu_dev *const __x86_cpu_dev_start[],
extern void get_cpu_cap(struct cpuinfo_x86 *c);
extern void cpu_detect_cache_sizes(struct cpuinfo_x86 *c);
+extern void init_scattered_cpuid_features(struct cpuinfo_x86 *c);
+extern u32 get_scattered_cpuid_leaf(unsigned int level,
+ unsigned int sub_leaf,
+ enum cpuid_regs_idx reg);
+extern void init_intel_cacheinfo(struct cpuinfo_x86 *c);
+extern void init_amd_cacheinfo(struct cpuinfo_x86 *c);
+
+extern void detect_num_cpu_cores(struct cpuinfo_x86 *c);
+extern int detect_extended_topology(struct cpuinfo_x86 *c);
+extern void detect_ht(struct cpuinfo_x86 *c);
unsigned int aperfmperf_get_khz(int cpu);
diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c
index 577e7f7..eb75564 100644
--- a/arch/x86/kernel/cpu/intel.c
+++ b/arch/x86/kernel/cpu/intel.c
@@ -456,24 +456,6 @@ static void srat_detect_node(struct cpuinfo_x86 *c)
#endif
}
-/*
- * find out the number of processor cores on the die
- */
-static int intel_num_cpu_cores(struct cpuinfo_x86 *c)
-{
- unsigned int eax, ebx, ecx, edx;
-
- if (!IS_ENABLED(CONFIG_SMP) || c->cpuid_level < 4)
- return 1;
-
- /* Intel has a non-standard dependency on %ecx for this CPUID level. */
- cpuid_count(4, 0, &eax, &ebx, &ecx, &edx);
- if (eax & 0x1f)
- return (eax >> 26) + 1;
- else
- return 1;
-}
-
static void detect_vmx_virtcap(struct cpuinfo_x86 *c)
{
/* Intel VMX MSR indicated features */
@@ -656,8 +638,6 @@ static void init_intel_misc_features(struct cpuinfo_x86 *c)
static void init_intel(struct cpuinfo_x86 *c)
{
- unsigned int l2 = 0;
-
early_init_intel(c);
intel_workarounds(c);
@@ -674,19 +654,13 @@ static void init_intel(struct cpuinfo_x86 *c)
* let's use the legacy cpuid vector 0x1 and 0x4 for topology
* detection.
*/
- c->x86_max_cores = intel_num_cpu_cores(c);
+ detect_num_cpu_cores(c);
#ifdef CONFIG_X86_32
detect_ht(c);
#endif
}
- l2 = init_intel_cacheinfo(c);
-
- /* Detect legacy cache sizes if init_intel_cacheinfo did not */
- if (l2 == 0) {
- cpu_detect_cache_sizes(c);
- l2 = c->x86_cache_size;
- }
+ init_intel_cacheinfo(c);
if (c->cpuid_level > 9) {
unsigned eax = cpuid_eax(10);
@@ -699,7 +673,8 @@ static void init_intel(struct cpuinfo_x86 *c)
set_cpu_cap(c, X86_FEATURE_LFENCE_RDTSC);
if (boot_cpu_has(X86_FEATURE_DS)) {
- unsigned int l1;
+ unsigned int l1, l2;
+
rdmsr(MSR_IA32_MISC_ENABLE, l1, l2);
if (!(l1 & (1<<11)))
set_cpu_cap(c, X86_FEATURE_BTS);
@@ -727,6 +702,7 @@ static void init_intel(struct cpuinfo_x86 *c)
* Dixon is NOT a Celeron.
*/
if (c->x86 == 6) {
+ unsigned int l2 = c->x86_cache_size;
char *p = NULL;
switch (c->x86_model) {
diff --git a/arch/x86/kernel/cpu/intel_rdt.c b/arch/x86/kernel/cpu/intel_rdt.c
index 316a887..ec4754f 100644
--- a/arch/x86/kernel/cpu/intel_rdt.c
+++ b/arch/x86/kernel/cpu/intel_rdt.c
@@ -33,8 +33,8 @@
#include <asm/intel_rdt_sched.h>
#include "intel_rdt.h"
-#define MAX_MBA_BW 100u
#define MBA_IS_LINEAR 0x4
+#define MBA_MAX_MBPS U32_MAX
/* Mutex to protect rdtgroup access. */
DEFINE_MUTEX(rdtgroup_mutex);
@@ -178,7 +178,7 @@ struct rdt_resource rdt_resources_all[] = {
.msr_update = mba_wrmsr,
.cache_level = 3,
.parse_ctrlval = parse_bw,
- .format_str = "%d=%*d",
+ .format_str = "%d=%*u",
.fflags = RFTYPE_RES_MB,
},
};
@@ -230,6 +230,14 @@ static inline void cache_alloc_hsw_probe(void)
rdt_alloc_capable = true;
}
+bool is_mba_sc(struct rdt_resource *r)
+{
+ if (!r)
+ return rdt_resources_all[RDT_RESOURCE_MBA].membw.mba_sc;
+
+ return r->membw.mba_sc;
+}
+
/*
* rdt_get_mb_table() - get a mapping of bandwidth(b/w) percentage values
* exposed to user interface and the h/w understandable delay values.
@@ -341,7 +349,7 @@ static int get_cache_id(int cpu, int level)
* that can be written to QOS_MSRs.
* There are currently no SKUs which support non linear delay values.
*/
-static u32 delay_bw_map(unsigned long bw, struct rdt_resource *r)
+u32 delay_bw_map(unsigned long bw, struct rdt_resource *r)
{
if (r->membw.delay_linear)
return MAX_MBA_BW - bw;
@@ -431,25 +439,40 @@ struct rdt_domain *rdt_find_domain(struct rdt_resource *r, int id,
return NULL;
}
+void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm)
+{
+ int i;
+
+ /*
+ * Initialize the Control MSRs to having no control.
+ * For Cache Allocation: Set all bits in cbm
+ * For Memory Allocation: Set b/w requested to 100%
+ * and the bandwidth in MBps to U32_MAX
+ */
+ for (i = 0; i < r->num_closid; i++, dc++, dm++) {
+ *dc = r->default_ctrl;
+ *dm = MBA_MAX_MBPS;
+ }
+}
+
static int domain_setup_ctrlval(struct rdt_resource *r, struct rdt_domain *d)
{
struct msr_param m;
- u32 *dc;
- int i;
+ u32 *dc, *dm;
dc = kmalloc_array(r->num_closid, sizeof(*d->ctrl_val), GFP_KERNEL);
if (!dc)
return -ENOMEM;
- d->ctrl_val = dc;
+ dm = kmalloc_array(r->num_closid, sizeof(*d->mbps_val), GFP_KERNEL);
+ if (!dm) {
+ kfree(dc);
+ return -ENOMEM;
+ }
- /*
- * Initialize the Control MSRs to having no control.
- * For Cache Allocation: Set all bits in cbm
- * For Memory Allocation: Set b/w requested to 100
- */
- for (i = 0; i < r->num_closid; i++, dc++)
- *dc = r->default_ctrl;
+ d->ctrl_val = dc;
+ d->mbps_val = dm;
+ setup_default_ctrlval(r, dc, dm);
m.low = 0;
m.high = r->num_closid;
@@ -588,6 +611,7 @@ static void domain_remove_cpu(int cpu, struct rdt_resource *r)
}
kfree(d->ctrl_val);
+ kfree(d->mbps_val);
kfree(d->rmid_busy_llc);
kfree(d->mbm_total);
kfree(d->mbm_local);
diff --git a/arch/x86/kernel/cpu/intel_rdt.h b/arch/x86/kernel/cpu/intel_rdt.h
index 3fd7a70..3975282 100644
--- a/arch/x86/kernel/cpu/intel_rdt.h
+++ b/arch/x86/kernel/cpu/intel_rdt.h
@@ -28,6 +28,7 @@
#define MBM_CNTR_WIDTH 24
#define MBM_OVERFLOW_INTERVAL 1000
+#define MAX_MBA_BW 100u
#define RMID_VAL_ERROR BIT_ULL(63)
#define RMID_VAL_UNAVAIL BIT_ULL(62)
@@ -180,10 +181,20 @@ struct rftype {
* struct mbm_state - status for each MBM counter in each domain
* @chunks: Total data moved (multiply by rdt_group.mon_scale to get bytes)
* @prev_msr Value of IA32_QM_CTR for this RMID last time we read it
+ * @chunks_bw Total local data moved. Used for bandwidth calculation
+ * @prev_bw_msr:Value of previous IA32_QM_CTR for bandwidth counting
+ * @prev_bw The most recent bandwidth in MBps
+ * @delta_bw Difference between the current and previous bandwidth
+ * @delta_comp Indicates whether to compute the delta_bw
*/
struct mbm_state {
u64 chunks;
u64 prev_msr;
+ u64 chunks_bw;
+ u64 prev_bw_msr;
+ u32 prev_bw;
+ u32 delta_bw;
+ bool delta_comp;
};
/**
@@ -202,6 +213,7 @@ struct mbm_state {
* @cqm_work_cpu:
* worker cpu for CQM h/w counters
* @ctrl_val: array of cache or mem ctrl values (indexed by CLOSID)
+ * @mbps_val: When mba_sc is enabled, this holds the bandwidth in MBps
* @new_ctrl: new ctrl value to be loaded
* @have_new_ctrl: did user provide new_ctrl for this domain
*/
@@ -217,6 +229,7 @@ struct rdt_domain {
int mbm_work_cpu;
int cqm_work_cpu;
u32 *ctrl_val;
+ u32 *mbps_val;
u32 new_ctrl;
bool have_new_ctrl;
};
@@ -259,6 +272,7 @@ struct rdt_cache {
* @min_bw: Minimum memory bandwidth percentage user can request
* @bw_gran: Granularity at which the memory bandwidth is allocated
* @delay_linear: True if memory B/W delay is in linear scale
+ * @mba_sc: True if MBA software controller(mba_sc) is enabled
* @mb_map: Mapping of memory B/W percentage to memory B/W delay
*/
struct rdt_membw {
@@ -266,6 +280,7 @@ struct rdt_membw {
u32 min_bw;
u32 bw_gran;
u32 delay_linear;
+ bool mba_sc;
u32 *mb_map;
};
@@ -445,6 +460,9 @@ void mon_event_read(struct rmid_read *rr, struct rdt_domain *d,
void mbm_setup_overflow_handler(struct rdt_domain *dom,
unsigned long delay_ms);
void mbm_handle_overflow(struct work_struct *work);
+bool is_mba_sc(struct rdt_resource *r);
+void setup_default_ctrlval(struct rdt_resource *r, u32 *dc, u32 *dm);
+u32 delay_bw_map(unsigned long bw, struct rdt_resource *r);
void cqm_setup_limbo_handler(struct rdt_domain *dom, unsigned long delay_ms);
void cqm_handle_limbo(struct work_struct *work);
bool has_busy_rmid(struct rdt_resource *r, struct rdt_domain *d);
diff --git a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
index 23e1d5c..116d57b 100644
--- a/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
+++ b/arch/x86/kernel/cpu/intel_rdt_ctrlmondata.c
@@ -53,7 +53,8 @@ static bool bw_validate(char *buf, unsigned long *data, struct rdt_resource *r)
return false;
}
- if (bw < r->membw.min_bw || bw > r->default_ctrl) {
+ if ((bw < r->membw.min_bw || bw > r->default_ctrl) &&
+ !is_mba_sc(r)) {
rdt_last_cmd_printf("MB value %ld out of range [%d,%d]\n", bw,
r->membw.min_bw, r->default_ctrl);
return false;
@@ -179,6 +180,8 @@ static int update_domains(struct rdt_resource *r, int closid)
struct msr_param msr_param;
cpumask_var_t cpu_mask;
struct rdt_domain *d;
+ bool mba_sc;
+ u32 *dc;
int cpu;
if (!zalloc_cpumask_var(&cpu_mask, GFP_KERNEL))
@@ -188,13 +191,20 @@ static int update_domains(struct rdt_resource *r, int closid)
msr_param.high = msr_param.low + 1;
msr_param.res = r;
+ mba_sc = is_mba_sc(r);
list_for_each_entry(d, &r->domains, list) {
- if (d->have_new_ctrl && d->new_ctrl != d->ctrl_val[closid]) {
+ dc = !mba_sc ? d->ctrl_val : d->mbps_val;
+ if (d->have_new_ctrl && d->new_ctrl != dc[closid]) {
cpumask_set_cpu(cpumask_any(&d->cpu_mask), cpu_mask);
- d->ctrl_val[closid] = d->new_ctrl;
+ dc[closid] = d->new_ctrl;
}
}
- if (cpumask_empty(cpu_mask))
+
+ /*
+ * Avoid writing the control msr with control values when
+ * MBA software controller is enabled
+ */
+ if (cpumask_empty(cpu_mask) || mba_sc)
goto done;
cpu = get_cpu();
/* Update CBM on this cpu if it's in cpu_mask. */
@@ -282,13 +292,17 @@ static void show_doms(struct seq_file *s, struct rdt_resource *r, int closid)
{
struct rdt_domain *dom;
bool sep = false;
+ u32 ctrl_val;
seq_printf(s, "%*s:", max_name_width, r->name);
list_for_each_entry(dom, &r->domains, list) {
if (sep)
seq_puts(s, ";");
+
+ ctrl_val = (!is_mba_sc(r) ? dom->ctrl_val[closid] :
+ dom->mbps_val[closid]);
seq_printf(s, r->format_str, dom->id, max_data_width,
- dom->ctrl_val[closid]);
+ ctrl_val);
sep = true;
}
seq_puts(s, "\n");
diff --git a/arch/x86/kernel/cpu/intel_rdt_monitor.c b/arch/x86/kernel/cpu/intel_rdt_monitor.c
index 681450e..b0f3aed 100644
--- a/arch/x86/kernel/cpu/intel_rdt_monitor.c
+++ b/arch/x86/kernel/cpu/intel_rdt_monitor.c
@@ -225,10 +225,18 @@ void free_rmid(u32 rmid)
list_add_tail(&entry->list, &rmid_free_lru);
}
+static u64 mbm_overflow_count(u64 prev_msr, u64 cur_msr)
+{
+ u64 shift = 64 - MBM_CNTR_WIDTH, chunks;
+
+ chunks = (cur_msr << shift) - (prev_msr << shift);
+ return chunks >>= shift;
+}
+
static int __mon_event_count(u32 rmid, struct rmid_read *rr)
{
- u64 chunks, shift, tval;
struct mbm_state *m;
+ u64 chunks, tval;
tval = __rmid_read(rmid, rr->evtid);
if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL)) {
@@ -254,14 +262,12 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr)
}
if (rr->first) {
- m->prev_msr = tval;
- m->chunks = 0;
+ memset(m, 0, sizeof(struct mbm_state));
+ m->prev_bw_msr = m->prev_msr = tval;
return 0;
}
- shift = 64 - MBM_CNTR_WIDTH;
- chunks = (tval << shift) - (m->prev_msr << shift);
- chunks >>= shift;
+ chunks = mbm_overflow_count(m->prev_msr, tval);
m->chunks += chunks;
m->prev_msr = tval;
@@ -270,6 +276,32 @@ static int __mon_event_count(u32 rmid, struct rmid_read *rr)
}
/*
+ * Supporting function to calculate the memory bandwidth
+ * and delta bandwidth in MBps.
+ */
+static void mbm_bw_count(u32 rmid, struct rmid_read *rr)
+{
+ struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_L3];
+ struct mbm_state *m = &rr->d->mbm_local[rmid];
+ u64 tval, cur_bw, chunks;
+
+ tval = __rmid_read(rmid, rr->evtid);
+ if (tval & (RMID_VAL_ERROR | RMID_VAL_UNAVAIL))
+ return;
+
+ chunks = mbm_overflow_count(m->prev_bw_msr, tval);
+ m->chunks_bw += chunks;
+ m->chunks = m->chunks_bw;
+ cur_bw = (chunks * r->mon_scale) >> 20;
+
+ if (m->delta_comp)
+ m->delta_bw = abs(cur_bw - m->prev_bw);
+ m->delta_comp = false;
+ m->prev_bw = cur_bw;
+ m->prev_bw_msr = tval;
+}
+
+/*
* This is called via IPI to read the CQM/MBM counters
* on a domain.
*/
@@ -297,6 +329,118 @@ void mon_event_count(void *info)
}
}
+/*
+ * Feedback loop for MBA software controller (mba_sc)
+ *
+ * mba_sc is a feedback loop where we periodically read MBM counters and
+ * adjust the bandwidth percentage values via the IA32_MBA_THRTL_MSRs so
+ * that:
+ *
+ * current bandwdith(cur_bw) < user specified bandwidth(user_bw)
+ *
+ * This uses the MBM counters to measure the bandwidth and MBA throttle
+ * MSRs to control the bandwidth for a particular rdtgrp. It builds on the
+ * fact that resctrl rdtgroups have both monitoring and control.
+ *
+ * The frequency of the checks is 1s and we just tag along the MBM overflow
+ * timer. Having 1s interval makes the calculation of bandwidth simpler.
+ *
+ * Although MBA's goal is to restrict the bandwidth to a maximum, there may
+ * be a need to increase the bandwidth to avoid uncecessarily restricting
+ * the L2 <-> L3 traffic.
+ *
+ * Since MBA controls the L2 external bandwidth where as MBM measures the
+ * L3 external bandwidth the following sequence could lead to such a
+ * situation.
+ *
+ * Consider an rdtgroup which had high L3 <-> memory traffic in initial
+ * phases -> mba_sc kicks in and reduced bandwidth percentage values -> but
+ * after some time rdtgroup has mostly L2 <-> L3 traffic.
+ *
+ * In this case we may restrict the rdtgroup's L2 <-> L3 traffic as its
+ * throttle MSRs already have low percentage values. To avoid
+ * unnecessarily restricting such rdtgroups, we also increase the bandwidth.
+ */
+static void update_mba_bw(struct rdtgroup *rgrp, struct rdt_domain *dom_mbm)
+{
+ u32 closid, rmid, cur_msr, cur_msr_val, new_msr_val;
+ struct mbm_state *pmbm_data, *cmbm_data;
+ u32 cur_bw, delta_bw, user_bw;
+ struct rdt_resource *r_mba;
+ struct rdt_domain *dom_mba;
+ struct list_head *head;
+ struct rdtgroup *entry;
+
+ r_mba = &rdt_resources_all[RDT_RESOURCE_MBA];
+ closid = rgrp->closid;
+ rmid = rgrp->mon.rmid;
+ pmbm_data = &dom_mbm->mbm_local[rmid];
+
+ dom_mba = get_domain_from_cpu(smp_processor_id(), r_mba);
+ if (!dom_mba) {
+ pr_warn_once("Failure to get domain for MBA update\n");
+ return;
+ }
+
+ cur_bw = pmbm_data->prev_bw;
+ user_bw = dom_mba->mbps_val[closid];
+ delta_bw = pmbm_data->delta_bw;
+ cur_msr_val = dom_mba->ctrl_val[closid];
+
+ /*
+ * For Ctrl groups read data from child monitor groups.
+ */
+ head = &rgrp->mon.crdtgrp_list;
+ list_for_each_entry(entry, head, mon.crdtgrp_list) {
+ cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid];
+ cur_bw += cmbm_data->prev_bw;
+ delta_bw += cmbm_data->delta_bw;
+ }
+
+ /*
+ * Scale up/down the bandwidth linearly for the ctrl group. The
+ * bandwidth step is the bandwidth granularity specified by the
+ * hardware.
+ *
+ * The delta_bw is used when increasing the bandwidth so that we
+ * dont alternately increase and decrease the control values
+ * continuously.
+ *
+ * For ex: consider cur_bw = 90MBps, user_bw = 100MBps and if
+ * bandwidth step is 20MBps(> user_bw - cur_bw), we would keep
+ * switching between 90 and 110 continuously if we only check
+ * cur_bw < user_bw.
+ */
+ if (cur_msr_val > r_mba->membw.min_bw && user_bw < cur_bw) {
+ new_msr_val = cur_msr_val - r_mba->membw.bw_gran;
+ } else if (cur_msr_val < MAX_MBA_BW &&
+ (user_bw > (cur_bw + delta_bw))) {
+ new_msr_val = cur_msr_val + r_mba->membw.bw_gran;
+ } else {
+ return;
+ }
+
+ cur_msr = r_mba->msr_base + closid;
+ wrmsrl(cur_msr, delay_bw_map(new_msr_val, r_mba));
+ dom_mba->ctrl_val[closid] = new_msr_val;
+
+ /*
+ * Delta values are updated dynamically package wise for each
+ * rdtgrp everytime the throttle MSR changes value.
+ *
+ * This is because (1)the increase in bandwidth is not perfectly
+ * linear and only "approximately" linear even when the hardware
+ * says it is linear.(2)Also since MBA is a core specific
+ * mechanism, the delta values vary based on number of cores used
+ * by the rdtgrp.
+ */
+ pmbm_data->delta_comp = true;
+ list_for_each_entry(entry, head, mon.crdtgrp_list) {
+ cmbm_data = &dom_mbm->mbm_local[entry->mon.rmid];
+ cmbm_data->delta_comp = true;
+ }
+}
+
static void mbm_update(struct rdt_domain *d, int rmid)
{
struct rmid_read rr;
@@ -314,7 +458,16 @@ static void mbm_update(struct rdt_domain *d, int rmid)
}
if (is_mbm_local_enabled()) {
rr.evtid = QOS_L3_MBM_LOCAL_EVENT_ID;
- __mon_event_count(rmid, &rr);
+
+ /*
+ * Call the MBA software controller only for the
+ * control groups and when user has enabled
+ * the software controller explicitly.
+ */
+ if (!is_mba_sc(NULL))
+ __mon_event_count(rmid, &rr);
+ else
+ mbm_bw_count(rmid, &rr);
}
}
@@ -385,6 +538,9 @@ void mbm_handle_overflow(struct work_struct *work)
head = &prgrp->mon.crdtgrp_list;
list_for_each_entry(crgrp, head, mon.crdtgrp_list)
mbm_update(d, crgrp->mon.rmid);
+
+ if (is_mba_sc(NULL))
+ update_mba_bw(prgrp, d);
}
schedule_delayed_work_on(cpu, &d->mbm_over, delay);
diff --git a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
index fca759d..749856a 100644
--- a/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
+++ b/arch/x86/kernel/cpu/intel_rdt_rdtgroup.c
@@ -1005,6 +1005,11 @@ static void l2_qos_cfg_update(void *arg)
wrmsrl(IA32_L2_QOS_CFG, *enable ? L2_QOS_CDP_ENABLE : 0ULL);
}
+static inline bool is_mba_linear(void)
+{
+ return rdt_resources_all[RDT_RESOURCE_MBA].membw.delay_linear;
+}
+
static int set_cache_qos_cfg(int level, bool enable)
{
void (*update)(void *arg);
@@ -1041,6 +1046,28 @@ static int set_cache_qos_cfg(int level, bool enable)
return 0;
}
+/*
+ * Enable or disable the MBA software controller
+ * which helps user specify bandwidth in MBps.
+ * MBA software controller is supported only if
+ * MBM is supported and MBA is in linear scale.
+ */
+static int set_mba_sc(bool mba_sc)
+{
+ struct rdt_resource *r = &rdt_resources_all[RDT_RESOURCE_MBA];
+ struct rdt_domain *d;
+
+ if (!is_mbm_enabled() || !is_mba_linear() ||
+ mba_sc == is_mba_sc(r))
+ return -EINVAL;
+
+ r->membw.mba_sc = mba_sc;
+ list_for_each_entry(d, &r->domains, list)
+ setup_default_ctrlval(r, d->ctrl_val, d->mbps_val);
+
+ return 0;
+}
+
static int cdp_enable(int level, int data_type, int code_type)
{
struct rdt_resource *r_ldata = &rdt_resources_all[data_type];
@@ -1123,6 +1150,10 @@ static int parse_rdtgroupfs_options(char *data)
ret = cdpl2_enable();
if (ret)
goto out;
+ } else if (!strcmp(token, "mba_MBps")) {
+ ret = set_mba_sc(true);
+ if (ret)
+ goto out;
} else {
ret = -EINVAL;
goto out;
@@ -1445,6 +1476,8 @@ static void rdt_kill_sb(struct super_block *sb)
cpus_read_lock();
mutex_lock(&rdtgroup_mutex);
+ set_mba_sc(false);
+
/*Put everything back to default values. */
for_each_alloc_enabled_rdt_resource(r)
reset_all_ctrls(r);
diff --git a/arch/x86/kernel/cpu/mcheck/mce-inject.c b/arch/x86/kernel/cpu/mcheck/mce-inject.c
index 475cb4f..c805a06 100644
--- a/arch/x86/kernel/cpu/mcheck/mce-inject.c
+++ b/arch/x86/kernel/cpu/mcheck/mce-inject.c
@@ -48,7 +48,7 @@ static struct dentry *dfs_inj;
static u8 n_banks;
-#define MAX_FLAG_OPT_SIZE 3
+#define MAX_FLAG_OPT_SIZE 4
#define NBCFG 0x44
enum injection_type {
diff --git a/arch/x86/kernel/cpu/mcheck/mce.c b/arch/x86/kernel/cpu/mcheck/mce.c
index 42cf288..e4cf6ff 100644
--- a/arch/x86/kernel/cpu/mcheck/mce.c
+++ b/arch/x86/kernel/cpu/mcheck/mce.c
@@ -1457,7 +1457,7 @@ static int __mcheck_cpu_mce_banks_init(void)
int i;
u8 num_banks = mca_cfg.banks;
- mce_banks = kzalloc(num_banks * sizeof(struct mce_bank), GFP_KERNEL);
+ mce_banks = kcalloc(num_banks, sizeof(struct mce_bank), GFP_KERNEL);
if (!mce_banks)
return -ENOMEM;
@@ -1727,6 +1727,21 @@ static void __mcheck_cpu_init_early(struct cpuinfo_x86 *c)
}
}
+static void mce_centaur_feature_init(struct cpuinfo_x86 *c)
+{
+ struct mca_config *cfg = &mca_cfg;
+
+ /*
+ * All newer Centaur CPUs support MCE broadcasting. Enable
+ * synchronization with a one second timeout.
+ */
+ if ((c->x86 == 6 && c->x86_model == 0xf && c->x86_stepping >= 0xe) ||
+ c->x86 > 6) {
+ if (cfg->monarch_timeout < 0)
+ cfg->monarch_timeout = USEC_PER_SEC;
+ }
+}
+
static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
{
switch (c->x86_vendor) {
@@ -1739,6 +1754,9 @@ static void __mcheck_cpu_init_vendor(struct cpuinfo_x86 *c)
mce_amd_feature_init(c);
break;
}
+ case X86_VENDOR_CENTAUR:
+ mce_centaur_feature_init(c);
+ break;
default:
break;
diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c
index c8e0388..dd33c35 100644
--- a/arch/x86/kernel/cpu/mcheck/mce_amd.c
+++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c
@@ -436,8 +436,7 @@ static void deferred_error_interrupt_enable(struct cpuinfo_x86 *c)
wrmsr(MSR_CU_DEF_ERR, low, high);
}
-static u32 smca_get_block_address(unsigned int cpu, unsigned int bank,
- unsigned int block)
+static u32 smca_get_block_address(unsigned int bank, unsigned int block)
{
u32 low, high;
u32 addr = 0;
@@ -456,13 +455,13 @@ static u32 smca_get_block_address(unsigned int cpu, unsigned int bank,
* For SMCA enabled processors, BLKPTR field of the first MISC register
* (MCx_MISC0) indicates presence of additional MISC regs set (MISC1-4).
*/
- if (rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
+ if (rdmsr_safe(MSR_AMD64_SMCA_MCx_CONFIG(bank), &low, &high))
goto out;
if (!(low & MCI_CONFIG_MCAX))
goto out;
- if (!rdmsr_safe_on_cpu(cpu, MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
+ if (!rdmsr_safe(MSR_AMD64_SMCA_MCx_MISC(bank), &low, &high) &&
(low & MASK_BLKPTR_LO))
addr = MSR_AMD64_SMCA_MCx_MISCy(bank, block - 1);
@@ -471,7 +470,7 @@ out:
return addr;
}
-static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 high,
+static u32 get_block_address(u32 current_addr, u32 low, u32 high,
unsigned int bank, unsigned int block)
{
u32 addr = 0, offset = 0;
@@ -480,7 +479,7 @@ static u32 get_block_address(unsigned int cpu, u32 current_addr, u32 low, u32 hi
return addr;
if (mce_flags.smca)
- return smca_get_block_address(cpu, bank, block);
+ return smca_get_block_address(bank, block);
/* Fall back to method we used for older processors: */
switch (block) {
@@ -558,7 +557,7 @@ void mce_amd_feature_init(struct cpuinfo_x86 *c)
smca_configure(bank, cpu);
for (block = 0; block < NR_BLOCKS; ++block) {
- address = get_block_address(cpu, address, low, high, bank, block);
+ address = get_block_address(address, low, high, bank, block);
if (!address)
break;
@@ -1175,7 +1174,7 @@ static int allocate_threshold_blocks(unsigned int cpu, unsigned int bank,
if (err)
goto out_free;
recurse:
- address = get_block_address(cpu, address, low, high, bank, ++block);
+ address = get_block_address(address, low, high, bank, ++block);
if (!address)
return 0;
@@ -1385,7 +1384,7 @@ int mce_threshold_create_device(unsigned int cpu)
if (bp)
return 0;
- bp = kzalloc(sizeof(struct threshold_bank *) * mca_cfg.banks,
+ bp = kcalloc(mca_cfg.banks, sizeof(struct threshold_bank *),
GFP_KERNEL);
if (!bp)
return -ENOMEM;
diff --git a/arch/x86/kernel/cpu/mtrr/Makefile b/arch/x86/kernel/cpu/mtrr/Makefile
index ad9e5ed..2ad9107 100644
--- a/arch/x86/kernel/cpu/mtrr/Makefile
+++ b/arch/x86/kernel/cpu/mtrr/Makefile
@@ -1,3 +1,3 @@
-obj-y := main.o if.o generic.o cleanup.o
+obj-y := mtrr.o if.o generic.o cleanup.o
obj-$(CONFIG_X86_32) += amd.o cyrix.o centaur.o
diff --git a/arch/x86/kernel/cpu/mtrr/if.c b/arch/x86/kernel/cpu/mtrr/if.c
index c610f47..4021d38 100644
--- a/arch/x86/kernel/cpu/mtrr/if.c
+++ b/arch/x86/kernel/cpu/mtrr/if.c
@@ -43,7 +43,7 @@ mtrr_file_add(unsigned long base, unsigned long size,
max = num_var_ranges;
if (fcount == NULL) {
- fcount = kzalloc(max * sizeof *fcount, GFP_KERNEL);
+ fcount = kcalloc(max, sizeof(*fcount), GFP_KERNEL);
if (!fcount)
return -ENOMEM;
FILE_FCOUNT(file) = fcount;
diff --git a/arch/x86/kernel/cpu/mtrr/main.c b/arch/x86/kernel/cpu/mtrr/mtrr.c
index 7468de4..9a19c80 100644
--- a/arch/x86/kernel/cpu/mtrr/main.c
+++ b/arch/x86/kernel/cpu/mtrr/mtrr.c
@@ -46,6 +46,7 @@
#include <linux/pci.h>
#include <linux/smp.h>
#include <linux/syscore_ops.h>
+#include <linux/rcupdate.h>
#include <asm/cpufeature.h>
#include <asm/e820/api.h>
@@ -100,7 +101,7 @@ static int have_wrcomb(void)
if (dev->vendor == PCI_VENDOR_ID_SERVERWORKS &&
dev->device == PCI_DEVICE_ID_SERVERWORKS_LE &&
dev->revision <= 5) {
- pr_info("mtrr: Serverworks LE rev < 6 detected. Write-combining disabled.\n");
+ pr_info("Serverworks LE rev < 6 detected. Write-combining disabled.\n");
pci_dev_put(dev);
return 0;
}
@@ -110,7 +111,7 @@ static int have_wrcomb(void)
*/
if (dev->vendor == PCI_VENDOR_ID_INTEL &&
dev->device == PCI_DEVICE_ID_INTEL_82451NX) {
- pr_info("mtrr: Intel 450NX MMC detected. Write-combining disabled.\n");
+ pr_info("Intel 450NX MMC detected. Write-combining disabled.\n");
pci_dev_put(dev);
return 0;
}
@@ -312,24 +313,24 @@ int mtrr_add_page(unsigned long base, unsigned long size,
return error;
if (type >= MTRR_NUM_TYPES) {
- pr_warn("mtrr: type: %u invalid\n", type);
+ pr_warn("type: %u invalid\n", type);
return -EINVAL;
}
/* If the type is WC, check that this processor supports it */
if ((type == MTRR_TYPE_WRCOMB) && !have_wrcomb()) {
- pr_warn("mtrr: your processor doesn't support write-combining\n");
+ pr_warn("your processor doesn't support write-combining\n");
return -ENOSYS;
}
if (!size) {
- pr_warn("mtrr: zero sized request\n");
+ pr_warn("zero sized request\n");
return -EINVAL;
}
if ((base | (base + size - 1)) >>
(boot_cpu_data.x86_phys_bits - PAGE_SHIFT)) {
- pr_warn("mtrr: base or size exceeds the MTRR width\n");
+ pr_warn("base or size exceeds the MTRR width\n");
return -EINVAL;
}
@@ -360,8 +361,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
} else if (types_compatible(type, ltype))
continue;
}
- pr_warn("mtrr: 0x%lx000,0x%lx000 overlaps existing"
- " 0x%lx000,0x%lx000\n", base, size, lbase,
+ pr_warn("0x%lx000,0x%lx000 overlaps existing 0x%lx000,0x%lx000\n", base, size, lbase,
lsize);
goto out;
}
@@ -369,7 +369,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
if (ltype != type) {
if (types_compatible(type, ltype))
continue;
- pr_warn("mtrr: type mismatch for %lx000,%lx000 old: %s new: %s\n",
+ pr_warn("type mismatch for %lx000,%lx000 old: %s new: %s\n",
base, size, mtrr_attrib_to_str(ltype),
mtrr_attrib_to_str(type));
goto out;
@@ -395,7 +395,7 @@ int mtrr_add_page(unsigned long base, unsigned long size,
}
}
} else {
- pr_info("mtrr: no more MTRRs available\n");
+ pr_info("no more MTRRs available\n");
}
error = i;
out:
@@ -407,8 +407,8 @@ int mtrr_add_page(unsigned long base, unsigned long size,
static int mtrr_check(unsigned long base, unsigned long size)
{
if ((base & (PAGE_SIZE - 1)) || (size & (PAGE_SIZE - 1))) {
- pr_warn("mtrr: size and base must be multiples of 4 kiB\n");
- pr_debug("mtrr: size: 0x%lx base: 0x%lx\n", size, base);
+ pr_warn("size and base must be multiples of 4 kiB\n");
+ pr_debug("size: 0x%lx base: 0x%lx\n", size, base);
dump_stack();
return -1;
}
@@ -499,22 +499,22 @@ int mtrr_del_page(int reg, unsigned long base, unsigned long size)
}
}
if (reg < 0) {
- pr_debug("mtrr: no MTRR for %lx000,%lx000 found\n",
+ pr_debug("no MTRR for %lx000,%lx000 found\n",
base, size);
goto out;
}
}
if (reg >= max) {
- pr_warn("mtrr: register: %d too big\n", reg);
+ pr_warn("register: %d too big\n", reg);
goto out;
}
mtrr_if->get(reg, &lbase, &lsize, &ltype);
if (lsize < 1) {
- pr_warn("mtrr: MTRR %d not used\n", reg);
+ pr_warn("MTRR %d not used\n", reg);
goto out;
}
if (mtrr_usage_table[reg] < 1) {
- pr_warn("mtrr: reg: %d has count=0\n", reg);
+ pr_warn("reg: %d has count=0\n", reg);
goto out;
}
if (--mtrr_usage_table[reg] < 1)
@@ -775,7 +775,7 @@ void __init mtrr_bp_init(void)
}
if (!mtrr_enabled()) {
- pr_info("MTRR: Disabled\n");
+ pr_info("Disabled\n");
/*
* PAT initialization relies on MTRR's rendezvous handler.
@@ -793,6 +793,9 @@ void mtrr_ap_init(void)
if (!use_intel() || mtrr_aps_delayed_init)
return;
+
+ rcu_cpu_starting(smp_processor_id());
+
/*
* Ideally we should hold mtrr_mutex here to avoid mtrr entries
* changed, but this routine will be called in cpu boot time,
diff --git a/arch/x86/kernel/cpu/topology.c b/arch/x86/kernel/cpu/topology.c
index b099024..81c0afb 100644
--- a/arch/x86/kernel/cpu/topology.c
+++ b/arch/x86/kernel/cpu/topology.c
@@ -27,7 +27,7 @@
* exists, use it for populating initial_apicid and cpu topology
* detection.
*/
-void detect_extended_topology(struct cpuinfo_x86 *c)
+int detect_extended_topology(struct cpuinfo_x86 *c)
{
#ifdef CONFIG_SMP
unsigned int eax, ebx, ecx, edx, sub_index;
@@ -36,7 +36,7 @@ void detect_extended_topology(struct cpuinfo_x86 *c)
static bool printed;
if (c->cpuid_level < 0xb)
- return;
+ return -1;
cpuid_count(0xb, SMT_LEVEL, &eax, &ebx, &ecx, &edx);
@@ -44,7 +44,7 @@ void detect_extended_topology(struct cpuinfo_x86 *c)
* check if the cpuid leaf 0xb is actually implemented.
*/
if (ebx == 0 || (LEAFB_SUBTYPE(ecx) != SMT_TYPE))
- return;
+ return -1;
set_cpu_cap(c, X86_FEATURE_XTOPOLOGY);
@@ -95,6 +95,6 @@ void detect_extended_topology(struct cpuinfo_x86 *c)
c->cpu_core_id);
printed = 1;
}
- return;
#endif
+ return 0;
}
OpenPOWER on IntegriCloud