From 84f2b9b2edc09595569c7397cc3c888764ffd78b Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 2 Feb 2012 12:04:01 +0100 Subject: perf: Remove deprecated WARN_ON_ONCE() With the new throttling/unthrottling code introduced with commit: e050e3f0a71b ("perf: Fix broken interrupt rate throttling") we occasionally hit two WARN_ON_ONCE() checks in: - intel_pmu_pebs_enable() - intel_pmu_lbr_enable() - x86_pmu_start() The assertions are no longer problematic. There is a valid path where they can trigger but it is harmless. The assertion can be triggered with: $ perf record -e instructions:pp .... Leading to paths: intel_pmu_pebs_enable intel_pmu_enable_event x86_perf_event_set_period x86_pmu_start perf_adjust_freq_unthr_context perf_event_task_tick scheduler_tick And: intel_pmu_lbr_enable intel_pmu_enable_event x86_perf_event_set_period x86_pmu_start perf_adjust_freq_unthr_context. perf_event_task_tick scheduler_tick cpuc->enabled is always on because when we get to perf_adjust_freq_unthr_context() the PMU is not totally disabled. Furthermore when we need to adjust a period, we only stop the event we need to change and not the entire PMU. Thus, when we re-enable, cpuc->enabled is already set. Note that when we stop the event, both pebs and lbr are stopped if necessary (and possible). Signed-off-by: Stephane Eranian Cc: peterz@infradead.org Link: http://lkml.kernel.org/r/20120202110401.GA30911@quad Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 3 --- arch/x86/kernel/cpu/perf_event_intel_ds.c | 1 - arch/x86/kernel/cpu/perf_event_intel_lbr.c | 2 -- 3 files changed, 6 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 5adce10..2a30e5a 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -986,9 +986,6 @@ static void x86_pmu_start(struct perf_event *event, int flags) struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int idx = event->hw.idx; - if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) - return; - if (WARN_ON_ONCE(idx == -1)) return; diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 73da6b6..d6bd49f 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -439,7 +439,6 @@ void intel_pmu_pebs_enable(struct perf_event *event) hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; cpuc->pebs_enabled |= 1ULL << hwc->idx; - WARN_ON_ONCE(cpuc->enabled); if (x86_pmu.intel_cap.pebs_trap && event->attr.precise_ip > 1) intel_pmu_lbr_enable(event); diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index 3fab3de..47a7e63 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -72,8 +72,6 @@ void intel_pmu_lbr_enable(struct perf_event *event) if (!x86_pmu.lbr_nr) return; - WARN_ON_ONCE(cpuc->enabled); - /* * Reset the LBR stack if we changed task context to * avoid data leaks. -- cgit v1.1 From f39d47ff819ed52a2afbdbecbe35f23f7755f58d Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Tue, 7 Feb 2012 14:39:57 +0100 Subject: perf: Fix double start/stop in x86_pmu_start() The following patch fixes a bug introduced by the following commit: e050e3f0a71b ("perf: Fix broken interrupt rate throttling") The patch caused the following warning to pop up depending on the sampling frequency adjustments: ------------[ cut here ]------------ WARNING: at arch/x86/kernel/cpu/perf_event.c:995 x86_pmu_start+0x79/0xd4() It was caused by the following call sequence: perf_adjust_freq_unthr_context.part() { stop() if (delta > 0) { perf_adjust_period() { if (period > 8*...) { stop() ... start() } } } start() } Which caused a double start and a double stop, thus triggering the assert in x86_pmu_start(). The patch fixes the problem by avoiding the double calls. We pass a new argument to perf_adjust_period() to indicate whether or not the event is already stopped. We can't just remove the start/stop from that function because it's called from __perf_event_overflow where the event needs to be reloaded via a stop/start back-toback call. The patch reintroduces the assertion in x86_pmu_start() which was removed by commit: 84f2b9b ("perf: Remove deprecated WARN_ON_ONCE()") In this second version, we've added calls to disable/enable PMU during unthrottling or frequency adjustment based on bug report of spurious NMI interrupts from Eric Dumazet. Reported-and-tested-by: Eric Dumazet Signed-off-by: Stephane Eranian Acked-by: Peter Zijlstra Cc: markus@trippelsdorf.de Cc: paulus@samba.org Link: http://lkml.kernel.org/r/20120207133956.GA4932@quad [ Minor edits to the changelog and to the code ] Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 2a30e5a..5adce10 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -986,6 +986,9 @@ static void x86_pmu_start(struct perf_event *event, int flags) struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int idx = event->hw.idx; + if (WARN_ON_ONCE(!(event->hw.state & PERF_HES_STOPPED))) + return; + if (WARN_ON_ONCE(idx == -1)) return; -- cgit v1.1 From 32c3233885eb10ac9cb9410f2f8cd64b8df2b2a1 Mon Sep 17 00:00:00 2001 From: Andreas Herrmann Date: Wed, 8 Feb 2012 20:52:29 +0100 Subject: x86/amd: Fix L1i and L2 cache sharing information for AMD family 15h processors For L1 instruction cache and L2 cache the shared CPU information is wrong. On current AMD family 15h CPUs those caches are shared between both cores of a compute unit. This fixes https://bugzilla.kernel.org/show_bug.cgi?id=42607 Signed-off-by: Andreas Herrmann Cc: Petkov Borislav Cc: Dave Jones Cc: Link: http://lkml.kernel.org/r/20120208195229.GA17523@alberich.amd.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/intel_cacheinfo.c | 44 ++++++++++++++++++++++++++++------- 1 file changed, 36 insertions(+), 8 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/intel_cacheinfo.c b/arch/x86/kernel/cpu/intel_cacheinfo.c index 6b45e5e..73d08ed 100644 --- a/arch/x86/kernel/cpu/intel_cacheinfo.c +++ b/arch/x86/kernel/cpu/intel_cacheinfo.c @@ -326,8 +326,7 @@ static void __cpuinit amd_calc_l3_indices(struct amd_northbridge *nb) l3->indices = (max(max3(sc0, sc1, sc2), sc3) << 10) - 1; } -static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, - int index) +static void __cpuinit amd_init_l3_cache(struct _cpuid4_info_regs *this_leaf, int index) { int node; @@ -725,14 +724,16 @@ static DEFINE_PER_CPU(struct _cpuid4_info *, ici_cpuid4_info); #define CPUID4_INFO_IDX(x, y) (&((per_cpu(ici_cpuid4_info, x))[y])) #ifdef CONFIG_SMP -static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) + +static int __cpuinit cache_shared_amd_cpu_map_setup(unsigned int cpu, int index) { - struct _cpuid4_info *this_leaf, *sibling_leaf; - unsigned long num_threads_sharing; - int index_msb, i, sibling; + struct _cpuid4_info *this_leaf; + int ret, i, sibling; struct cpuinfo_x86 *c = &cpu_data(cpu); - if ((index == 3) && (c->x86_vendor == X86_VENDOR_AMD)) { + ret = 0; + if (index == 3) { + ret = 1; for_each_cpu(i, cpu_llc_shared_mask(cpu)) { if (!per_cpu(ici_cpuid4_info, i)) continue; @@ -743,8 +744,35 @@ static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) set_bit(sibling, this_leaf->shared_cpu_map); } } - return; + } else if ((c->x86 == 0x15) && ((index == 1) || (index == 2))) { + ret = 1; + for_each_cpu(i, cpu_sibling_mask(cpu)) { + if (!per_cpu(ici_cpuid4_info, i)) + continue; + this_leaf = CPUID4_INFO_IDX(i, index); + for_each_cpu(sibling, cpu_sibling_mask(cpu)) { + if (!cpu_online(sibling)) + continue; + set_bit(sibling, this_leaf->shared_cpu_map); + } + } } + + return ret; +} + +static void __cpuinit cache_shared_cpu_map_setup(unsigned int cpu, int index) +{ + struct _cpuid4_info *this_leaf, *sibling_leaf; + unsigned long num_threads_sharing; + int index_msb, i; + struct cpuinfo_x86 *c = &cpu_data(cpu); + + if (c->x86_vendor == X86_VENDOR_AMD) { + if (cache_shared_amd_cpu_map_setup(cpu, index)) + return; + } + this_leaf = CPUID4_INFO_IDX(cpu, index); num_threads_sharing = 1 + this_leaf->base.eax.split.num_threads_sharing; -- cgit v1.1 From 7e16838d94b566a17b65231073d179bc04d590c8 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Sun, 19 Feb 2012 13:27:00 -0800 Subject: i387: support lazy restore of FPU state This makes us recognize when we try to restore FPU state that matches what we already have in the FPU on this CPU, and avoids the restore entirely if so. To do this, we add two new data fields: - a percpu 'fpu_owner_task' variable that gets written any time we update the "has_fpu" field, and thus acts as a kind of back-pointer to the task that owns the CPU. The exception is when we save the FPU state as part of a context switch - if the save can keep the FPU state around, we leave the 'fpu_owner_task' variable pointing at the task whose FP state still remains on the CPU. - a per-thread 'last_cpu' field, that indicates which CPU that thread used its FPU on last. We update this on every context switch (writing an invalid CPU number if the last context switch didn't leave the FPU in a lazily usable state), so we know that *that* thread has done nothing else with the FPU since. These two fields together can be used when next switching back to the task to see if the CPU still matches: if 'fpu_owner_task' matches the task we are switching to, we know that no other task (or kernel FPU usage) touched the FPU on this CPU in the meantime, and if the current CPU number matches the 'last_cpu' field, we know that this thread did no other FP work on any other CPU, so the FPU state on the CPU must match what was saved on last context switch. In that case, we can avoid the 'f[x]rstor' entirely, and just clear the CR0.TS bit. Signed-off-by: Linus Torvalds --- arch/x86/kernel/cpu/common.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index d43cad7..b667148 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1044,6 +1044,8 @@ DEFINE_PER_CPU(char *, irq_stack_ptr) = DEFINE_PER_CPU(unsigned int, irq_count) = -1; +DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); + /* * Special IST stacks which the CPU switches to when it calls * an IST-marked descriptor entry. Up to 7 stacks (hardware -- cgit v1.1 From 27e74da9800289e69ba907777df1e2085231eff7 Mon Sep 17 00:00:00 2001 From: Linus Torvalds Date: Mon, 20 Feb 2012 19:34:10 -0800 Subject: i387: export 'fpu_owner_task' per-cpu variable (And define it properly for x86-32, which had its 'current_task' declaration in separate from x86-64) Bitten by my dislike for modules on the machines I use, and the fact that apparently nobody else actually wanted to test the patches I sent out. Snif. Nobody else cares. Anyway, we probably should uninline the 'kernel_fpu_begin()' function that is what modules actually use and that references this, but this is the minimal fix for now. Reported-by: Josh Boyer Reported-and-tested-by: Jongman Heo Signed-off-by: Linus Torvalds --- arch/x86/kernel/cpu/common.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/common.c b/arch/x86/kernel/cpu/common.c index b667148..c0f7d68 100644 --- a/arch/x86/kernel/cpu/common.c +++ b/arch/x86/kernel/cpu/common.c @@ -1045,6 +1045,7 @@ DEFINE_PER_CPU(char *, irq_stack_ptr) = DEFINE_PER_CPU(unsigned int, irq_count) = -1; DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); +EXPORT_PER_CPU_SYMBOL(fpu_owner_task); /* * Special IST stacks which the CPU switches to when it calls @@ -1113,6 +1114,8 @@ void debug_stack_reset(void) DEFINE_PER_CPU(struct task_struct *, current_task) = &init_task; EXPORT_PER_CPU_SYMBOL(current_task); +DEFINE_PER_CPU(struct task_struct *, fpu_owner_task); +EXPORT_PER_CPU_SYMBOL(fpu_owner_task); #ifdef CONFIG_CC_STACKPROTECTOR DEFINE_PER_CPU_ALIGNED(struct stack_canary, stack_canary); -- cgit v1.1 From 3f806e50981825fa56a7f1938f24c0680816be45 Mon Sep 17 00:00:00 2001 From: Borislav Petkov Date: Fri, 3 Feb 2012 20:18:01 +0100 Subject: x86/mce/AMD: Fix UP build error 141168c36cde ("x86: Simplify code by removing a !SMP #ifdefs from 'struct cpuinfo_x86'") removed a bunch of CONFIG_SMP ifdefs around code touching struct cpuinfo_x86 members but also caused the following build error with Randy's randconfigs: mce_amd.c:(.cpuinit.text+0x4723): undefined reference to `cpu_llc_shared_map' Restore the #ifdef in threshold_create_bank() which creates symlinks on the non-BSP CPUs. There's a better patch series being worked on by Kevin Winchester which will solve this in a cleaner fashion, but that series is too ambitious for v3.3 merging - so we first queue up this trivial fix and then do the rest for v3.4. Signed-off-by: Borislav Petkov Acked-by: Kevin Winchester Cc: Randy Dunlap Cc: Nick Bowler Link: http://lkml.kernel.org/r/20120203191801.GA2846@x1.osrc.amd.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/mcheck/mce_amd.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/mcheck/mce_amd.c b/arch/x86/kernel/cpu/mcheck/mce_amd.c index 786e76a..e4eeaaf 100644 --- a/arch/x86/kernel/cpu/mcheck/mce_amd.c +++ b/arch/x86/kernel/cpu/mcheck/mce_amd.c @@ -528,6 +528,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) sprintf(name, "threshold_bank%i", bank); +#ifdef CONFIG_SMP if (cpu_data(cpu).cpu_core_id && shared_bank[bank]) { /* symlink */ i = cpumask_first(cpu_llc_shared_mask(cpu)); @@ -553,6 +554,7 @@ static __cpuinit int threshold_create_bank(unsigned int cpu, unsigned int bank) goto out; } +#endif b = kzalloc(sizeof(struct threshold_bank), GFP_KERNEL); if (!b) { -- cgit v1.1 From 1018faa6cf23b256bf25919ef203cd7c129f06f2 Mon Sep 17 00:00:00 2001 From: Joerg Roedel Date: Wed, 29 Feb 2012 14:57:32 +0100 Subject: perf/x86/kvm: Fix Host-Only/Guest-Only counting with SVM disabled It turned out that a performance counter on AMD does not count at all when the GO or HO bit is set in the control register and SVM is disabled in EFER. This patch works around this issue by masking out the HO bit in the performance counter control register when SVM is not enabled. The GO bit is not touched because it is only set when the user wants to count in guest-mode only. So when SVM is disabled the counter should not run at all and the not-counting is the intended behaviour. Signed-off-by: Joerg Roedel Signed-off-by: Peter Zijlstra Cc: Avi Kivity Cc: Stephane Eranian Cc: David Ahern Cc: Gleb Natapov Cc: Robert Richter Cc: stable@vger.kernel.org # v3.2 Link: http://lkml.kernel.org/r/1330523852-19566-1-git-send-email-joerg.roedel@amd.com Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.h | 8 ++++++-- arch/x86/kernel/cpu/perf_event_amd.c | 37 ++++++++++++++++++++++++++++++++++-- 2 files changed, 41 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event.h b/arch/x86/kernel/cpu/perf_event.h index 8944062..c30c807 100644 --- a/arch/x86/kernel/cpu/perf_event.h +++ b/arch/x86/kernel/cpu/perf_event.h @@ -147,7 +147,9 @@ struct cpu_hw_events { /* * AMD specific bits */ - struct amd_nb *amd_nb; + struct amd_nb *amd_nb; + /* Inverted mask of bits to clear in the perf_ctr ctrl registers */ + u64 perf_ctr_virt_mask; void *kfree_on_online; }; @@ -417,9 +419,11 @@ void x86_pmu_disable_all(void); static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, u64 enable_mask) { + u64 disable_mask = __this_cpu_read(cpu_hw_events.perf_ctr_virt_mask); + if (hwc->extra_reg.reg) wrmsrl(hwc->extra_reg.reg, hwc->extra_reg.config); - wrmsrl(hwc->config_base, hwc->config | enable_mask); + wrmsrl(hwc->config_base, (hwc->config | enable_mask) & ~disable_mask); } void x86_pmu_enable_all(int added); diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 0397b23..67250a5 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -1,4 +1,5 @@ #include +#include #include #include #include @@ -357,7 +358,9 @@ static void amd_pmu_cpu_starting(int cpu) struct amd_nb *nb; int i, nb_id; - if (boot_cpu_data.x86_max_cores < 2) + cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY; + + if (boot_cpu_data.x86_max_cores < 2 || boot_cpu_data.x86 == 0x15) return; nb_id = amd_get_nb_id(cpu); @@ -587,9 +590,9 @@ static __initconst const struct x86_pmu amd_pmu_f15h = { .put_event_constraints = amd_put_event_constraints, .cpu_prepare = amd_pmu_cpu_prepare, - .cpu_starting = amd_pmu_cpu_starting, .cpu_dead = amd_pmu_cpu_dead, #endif + .cpu_starting = amd_pmu_cpu_starting, }; __init int amd_pmu_init(void) @@ -621,3 +624,33 @@ __init int amd_pmu_init(void) return 0; } + +void amd_pmu_enable_virt(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + cpuc->perf_ctr_virt_mask = 0; + + /* Reload all events */ + x86_pmu_disable_all(); + x86_pmu_enable_all(0); +} +EXPORT_SYMBOL_GPL(amd_pmu_enable_virt); + +void amd_pmu_disable_virt(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + /* + * We only mask out the Host-only bit so that host-only counting works + * when SVM is disabled. If someone sets up a guest-only counter when + * SVM is disabled the Guest-only bits still gets set and the counter + * will not count anything. + */ + cpuc->perf_ctr_virt_mask = AMD_PERFMON_EVENTSEL_HOSTONLY; + + /* Reload all events */ + x86_pmu_disable_all(); + x86_pmu_enable_all(0); +} +EXPORT_SYMBOL_GPL(amd_pmu_disable_virt); -- cgit v1.1