From c499336cea8bbe15554c6fcea2138658c5395bfe Mon Sep 17 00:00:00 2001 From: Kan Liang Date: Tue, 15 Nov 2016 13:40:10 -0500 Subject: perf/x86/uncore: Fix crash by removing bogus event_list[] handling for SNB client uncore IMC Vince Weaver reported the following bug when KASAN is enabled: [ 205.748005] BUG: KASAN: slab-out-of-bounds in snb_uncore_imc_event_del+0x6c/0xa0 at addr ffff8800caa43768 [ 205.758324] Read of size 8 by task perf_fuzzer/6618 It's caused by accessing box->event_list. For client IMC, there are no generic counters. It defines its own fixed free running counters. So event_list and n_events are unused. They can be removed safely, which fixes the bug. ( There's still the separate question of how uninitialized state snuck into this data structure - but that's a separate fix. ) Reported-by: Vince Weaver Tested-by: Vince Weaver Signed-off-by: Kan Liang Cc: Peter Zijlstra Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Stephane Eranian Cc: Vince Weaver Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Alexander Shishkin Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Thomas Gleixner Cc: acme@kernel.org Cc: davej@codemonkey.org.uk Cc: dvyukov@google.com Cc: eranian@gmail.com Link: http://lkml.kernel.org/r/1479235210-29090-1-git-send-email-kan.liang@intel.com Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore_snb.c | 12 ------------ 1 file changed, 12 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/intel/uncore_snb.c b/arch/x86/events/intel/uncore_snb.c index 81195cc..a3dcc12 100644 --- a/arch/x86/events/intel/uncore_snb.c +++ b/arch/x86/events/intel/uncore_snb.c @@ -490,24 +490,12 @@ static int snb_uncore_imc_event_add(struct perf_event *event, int flags) snb_uncore_imc_event_start(event, 0); - box->n_events++; - return 0; } static void snb_uncore_imc_event_del(struct perf_event *event, int flags) { - struct intel_uncore_box *box = uncore_event_to_box(event); - int i; - snb_uncore_imc_event_stop(event, PERF_EF_UPDATE); - - for (i = 0; i < box->n_events; i++) { - if (event == box->event_list[i]) { - --box->n_events; - break; - } - } } int snb_pci2phy_map_init(int devid) -- cgit v1.1 From e40ed1542dd779e5037a22c6b534e57127472365 Mon Sep 17 00:00:00 2001 From: Janakarajan Natarajan Date: Thu, 17 Nov 2016 10:15:06 -0600 Subject: perf/x86: Add perf support for AMD family-17h processors This patch enables perf core PMU support for the new AMD family-17h processors. In family-17h, there is no PMC-event constraint. All events, irrespective of the type, can be measured using any of the six generic performance counters. Signed-off-by: Janakarajan Natarajan Acked-by: Borislav Petkov Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Suravee Suthikulpanit Cc: Thomas Gleixner Cc: Vince Weaver Link: http://lkml.kernel.org/r/1479399306-13375-1-git-send-email-Janakarajan.Natarajan@amd.com Signed-off-by: Ingo Molnar --- arch/x86/events/amd/core.c | 8 +++++++- 1 file changed, 7 insertions(+), 1 deletion(-) (limited to 'arch') diff --git a/arch/x86/events/amd/core.c b/arch/x86/events/amd/core.c index f5f4b3f..afb222b 100644 --- a/arch/x86/events/amd/core.c +++ b/arch/x86/events/amd/core.c @@ -662,7 +662,13 @@ static int __init amd_core_pmu_init(void) pr_cont("Fam15h "); x86_pmu.get_event_constraints = amd_get_event_constraints_f15h; break; - + case 0x17: + pr_cont("Fam17h "); + /* + * In family 17h, there are no event constraints in the PMC hardware. + * We fallback to using default amd_get_event_constraints. + */ + break; default: pr_err("core perfctr but no constraints; unknown hardware!\n"); return -ENODEV; -- cgit v1.1 From ae31fe51a3cceaa0cabdb3058f69669ecb47f12e Mon Sep 17 00:00:00 2001 From: Johannes Weiner Date: Tue, 22 Nov 2016 10:57:42 +0100 Subject: perf/x86: Restore TASK_SIZE check on frame pointer The following commit: 75925e1ad7f5 ("perf/x86: Optimize stack walk user accesses") ... switched from copy_from_user_nmi() to __copy_from_user_nmi() with a manual access_ok() check. Unfortunately, copy_from_user_nmi() does an explicit check against TASK_SIZE, whereas the access_ok() uses whatever the current address limit of the task is. We are getting NMIs when __probe_kernel_read() has switched to KERNEL_DS, and then see vmalloc faults when we access what looks like pointers into vmalloc space: [] WARNING: CPU: 3 PID: 3685731 at arch/x86/mm/fault.c:435 vmalloc_fault+0x289/0x290 [] CPU: 3 PID: 3685731 Comm: sh Tainted: G W 4.6.0-5_fbk1_223_gdbf0f40 #1 [] Call Trace: [] [] dump_stack+0x4d/0x6c [] [] __warn+0xd3/0xf0 [] [] warn_slowpath_null+0x1d/0x20 [] [] vmalloc_fault+0x289/0x290 [] [] __do_page_fault+0x330/0x490 [] [] do_page_fault+0xc/0x10 [] [] page_fault+0x22/0x30 [] [] ? perf_callchain_user+0x100/0x2a0 [] [] get_perf_callchain+0x17f/0x190 [] [] perf_callchain+0x67/0x80 [] [] perf_prepare_sample+0x2a0/0x370 [] [] perf_event_output+0x20/0x60 [] [] ? perf_event_update_userpage+0xc7/0x130 [] [] __perf_event_overflow+0x181/0x1d0 [] [] perf_event_overflow+0x14/0x20 [] [] intel_pmu_handle_irq+0x1d3/0x490 [] [] ? copy_user_enhanced_fast_string+0x7/0x10 [] [] ? vunmap_page_range+0x1a1/0x2f0 [] [] ? unmap_kernel_range_noflush+0x11/0x20 [] [] ? ghes_copy_tofrom_phys+0x116/0x1f0 [] [] ? x2apic_send_IPI_self+0x1d/0x20 [] [] perf_event_nmi_handler+0x2d/0x50 [] [] nmi_handle+0x61/0x110 [] [] default_do_nmi+0x44/0x110 [] [] do_nmi+0xdb/0x150 [] [] end_repeat_nmi+0x1a/0x1e [] [] ? copy_user_enhanced_fast_string+0x7/0x10 [] [] ? copy_user_enhanced_fast_string+0x7/0x10 [] [] ? copy_user_enhanced_fast_string+0x7/0x10 [] <> [] ? __probe_kernel_read+0x3e/0xa0 Fix this by moving the valid_user_frame() check to before the uaccess that loads the return address and the pointer to the next frame. Signed-off-by: Johannes Weiner Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: linux-kernel@vger.kernel.org Fixes: 75925e1ad7f5 ("perf/x86: Optimize stack walk user accesses") Signed-off-by: Ingo Molnar --- arch/x86/events/core.c | 10 ++-------- 1 file changed, 2 insertions(+), 8 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/core.c b/arch/x86/events/core.c index d31735f..9d4bf3a 100644 --- a/arch/x86/events/core.c +++ b/arch/x86/events/core.c @@ -2352,7 +2352,7 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent frame.next_frame = 0; frame.return_address = 0; - if (!access_ok(VERIFY_READ, fp, 8)) + if (!valid_user_frame(fp, sizeof(frame))) break; bytes = __copy_from_user_nmi(&frame.next_frame, fp, 4); @@ -2362,9 +2362,6 @@ perf_callchain_user32(struct pt_regs *regs, struct perf_callchain_entry_ctx *ent if (bytes != 0) break; - if (!valid_user_frame(fp, sizeof(frame))) - break; - perf_callchain_store(entry, cs_base + frame.return_address); fp = compat_ptr(ss_base + frame.next_frame); } @@ -2413,7 +2410,7 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs frame.next_frame = NULL; frame.return_address = 0; - if (!access_ok(VERIFY_READ, fp, sizeof(*fp) * 2)) + if (!valid_user_frame(fp, sizeof(frame))) break; bytes = __copy_from_user_nmi(&frame.next_frame, fp, sizeof(*fp)); @@ -2423,9 +2420,6 @@ perf_callchain_user(struct perf_callchain_entry_ctx *entry, struct pt_regs *regs if (bytes != 0) break; - if (!valid_user_frame(fp, sizeof(frame))) - break; - perf_callchain_store(entry, frame.return_address); fp = (void __user *)frame.next_frame; } -- cgit v1.1 From b8000586c90b4804902058a38d3a59ce5708e695 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 17 Nov 2016 18:17:31 +0100 Subject: perf/x86/intel: Cure bogus unwind from PEBS entries Vince Weaver reported that perf_fuzzer + KASAN detects that PEBS event unwinds sometimes do 'weird' things. In particular, we seemed to be ending up unwinding from random places on the NMI stack. While it was somewhat expected that the event record BP,SP would not match the interrupt BP,SP in that the interrupt is strictly later than the record event, it was overlooked that it could be on an already overwritten stack. Therefore, don't copy the recorded BP,SP over the interrupted BP,SP when we need stack unwinds. Note that its still possible the unwind doesn't full match the actual event, as its entirely possible to have done an (I)RET between record and interrupt, but on average it should still point in the general direction of where the event came from. Also, it's the best we can do, considering. The particular scenario that triggered the bogus NMI stack unwind was a PEBS event with very short period, upon enabling the event at the tail of the PMI handler (FREEZE_ON_PMI is not used), it instantly triggers a record (while still on the NMI stack) which in turn triggers the next PMI. This then causes back-to-back NMIs and we'll try and unwind the stack-frame from the last NMI, which obviously is now overwritten by our own. Analyzed-by: Josh Poimboeuf Reported-by: Vince Weaver Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Arnaldo Carvalho de Melo Cc: Jiri Olsa Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Stephane Eranian Cc: Thomas Gleixner Cc: davej@codemonkey.org.uk Cc: dvyukov@google.com Cc: stable@vger.kernel.org Fixes: ca037701a025 ("perf, x86: Add PEBS infrastructure") Link: http://lkml.kernel.org/r/20161117171731.GV3157@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/x86/events/intel/ds.c | 35 +++++++++++++++++++++++------------ arch/x86/events/perf_event.h | 2 +- 2 files changed, 24 insertions(+), 13 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/intel/ds.c b/arch/x86/events/intel/ds.c index 0319311..be20239 100644 --- a/arch/x86/events/intel/ds.c +++ b/arch/x86/events/intel/ds.c @@ -1108,20 +1108,20 @@ static void setup_pebs_sample_data(struct perf_event *event, } /* - * We use the interrupt regs as a base because the PEBS record - * does not contain a full regs set, specifically it seems to - * lack segment descriptors, which get used by things like - * user_mode(). + * We use the interrupt regs as a base because the PEBS record does not + * contain a full regs set, specifically it seems to lack segment + * descriptors, which get used by things like user_mode(). * - * In the simple case fix up only the IP and BP,SP regs, for - * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly. - * A possible PERF_SAMPLE_REGS will have to transfer all regs. + * In the simple case fix up only the IP for PERF_SAMPLE_IP. + * + * We must however always use BP,SP from iregs for the unwinder to stay + * sane; the record BP,SP can point into thin air when the record is + * from a previous PMI context or an (I)RET happend between the record + * and PMI. */ *regs = *iregs; regs->flags = pebs->flags; set_linear_ip(regs, pebs->ip); - regs->bp = pebs->bp; - regs->sp = pebs->sp; if (sample_type & PERF_SAMPLE_REGS_INTR) { regs->ax = pebs->ax; @@ -1130,10 +1130,21 @@ static void setup_pebs_sample_data(struct perf_event *event, regs->dx = pebs->dx; regs->si = pebs->si; regs->di = pebs->di; - regs->bp = pebs->bp; - regs->sp = pebs->sp; - regs->flags = pebs->flags; + /* + * Per the above; only set BP,SP if we don't need callchains. + * + * XXX: does this make sense? + */ + if (!(sample_type & PERF_SAMPLE_CALLCHAIN)) { + regs->bp = pebs->bp; + regs->sp = pebs->sp; + } + + /* + * Preserve PERF_EFLAGS_VM from set_linear_ip(). + */ + regs->flags = pebs->flags | (regs->flags & PERF_EFLAGS_VM); #ifndef CONFIG_X86_32 regs->r8 = pebs->r8; regs->r9 = pebs->r9; diff --git a/arch/x86/events/perf_event.h b/arch/x86/events/perf_event.h index 5874d8d..a77ee02 100644 --- a/arch/x86/events/perf_event.h +++ b/arch/x86/events/perf_event.h @@ -113,7 +113,7 @@ struct debug_store { * Per register state. */ struct er_account { - raw_spinlock_t lock; /* per-core: protect structure */ + raw_spinlock_t lock; /* per-core: protect structure */ u64 config; /* extra MSR config */ u64 reg; /* extra MSR number */ atomic_t ref; /* reference count */ -- cgit v1.1 From 033ac60c7f21f9996a0fab2fd04f334afbf77b33 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 18 Nov 2016 13:53:54 +0100 Subject: perf/x86/intel/uncore: Allow only a single PMU/box within an events group Group validation expects all events to be of the same PMU; however is_uncore_pmu() is too wide, it matches _all_ uncore events, even across PMUs. This triggers failure when we group different events from different uncore PMUs, like: perf stat -vv -e '{uncore_cbox_0/config=0x0334/,uncore_qpi_0/event=1/}' -a sleep 1 Fix is_uncore_pmu() by only matching events to the box at hand. Note that generic code; ran after this step; will disallow this mixture of PMU events. Reported-by: Jiri Olsa Tested-by: Jiri Olsa Signed-off-by: Peter Zijlstra (Intel) Cc: Alexander Shishkin Cc: Arnaldo Carvalho de Melo Cc: Kan Liang Cc: Linus Torvalds Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Thomas Gleixner Cc: Vince Weaver Cc: Vince Weaver Link: http://lkml.kernel.org/r/20161118125354.GQ3117@twins.programming.kicks-ass.net Signed-off-by: Ingo Molnar --- arch/x86/events/intel/uncore.c | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) (limited to 'arch') diff --git a/arch/x86/events/intel/uncore.c b/arch/x86/events/intel/uncore.c index efca268..dbaaf7dc 100644 --- a/arch/x86/events/intel/uncore.c +++ b/arch/x86/events/intel/uncore.c @@ -319,9 +319,9 @@ static struct intel_uncore_box *uncore_alloc_box(struct intel_uncore_type *type, */ static int uncore_pmu_event_init(struct perf_event *event); -static bool is_uncore_event(struct perf_event *event) +static bool is_box_event(struct intel_uncore_box *box, struct perf_event *event) { - return event->pmu->event_init == uncore_pmu_event_init; + return &box->pmu->pmu == event->pmu; } static int @@ -340,7 +340,7 @@ uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, n = box->n_events; - if (is_uncore_event(leader)) { + if (is_box_event(box, leader)) { box->event_list[n] = leader; n++; } @@ -349,7 +349,7 @@ uncore_collect_events(struct intel_uncore_box *box, struct perf_event *leader, return n; list_for_each_entry(event, &leader->sibling_list, group_entry) { - if (!is_uncore_event(event) || + if (!is_box_event(box, event) || event->state <= PERF_EVENT_STATE_OFF) continue; -- cgit v1.1