From fd2a50a0240f5f5b59070474eabd83a85720a406 Mon Sep 17 00:00:00 2001 From: Naga Chumbalkar Date: Thu, 24 Dec 2009 01:54:47 +0000 Subject: x86, perfctr: Remove unused func avail_to_resrv_perfctr_nmi() avail_to_resrv_perfctr_nmi() is neither EXPORT'd, nor used in the file. So remove it. Signed-off-by: Naga Chumbalkar Acked-by: Cyrill Gorcunov Cc: oprofile-list@lists.sf.net LKML-Reference: <20091224015441.6005.4408.sendpatchset@localhost.localdomain> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/nmi.h | 1 - arch/x86/kernel/cpu/perfctr-watchdog.c | 11 ----------- 2 files changed, 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/nmi.h b/arch/x86/include/asm/nmi.h index 139d4c1..93da9c3 100644 --- a/arch/x86/include/asm/nmi.h +++ b/arch/x86/include/asm/nmi.h @@ -19,7 +19,6 @@ extern void die_nmi(char *str, struct pt_regs *regs, int do_panic); extern int check_nmi_watchdog(void); extern int nmi_watchdog_enabled; extern int avail_to_resrv_perfctr_nmi_bit(unsigned int); -extern int avail_to_resrv_perfctr_nmi(unsigned int); extern int reserve_perfctr_nmi(unsigned int); extern void release_perfctr_nmi(unsigned int); extern int reserve_evntsel_nmi(unsigned int); diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 898df97..74f4e85 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -115,17 +115,6 @@ int avail_to_resrv_perfctr_nmi_bit(unsigned int counter) return !test_bit(counter, perfctr_nmi_owner); } - -/* checks the an msr for availability */ -int avail_to_resrv_perfctr_nmi(unsigned int msr) -{ - unsigned int counter; - - counter = nmi_perfctr_msr_to_bit(msr); - BUG_ON(counter > NMI_MAX_COUNTER_BITS); - - return !test_bit(counter, perfctr_nmi_owner); -} EXPORT_SYMBOL(avail_to_resrv_perfctr_nmi_bit); int reserve_perfctr_nmi(unsigned int msr) -- cgit v1.1 From 0fb8ee48d9dfff6a0913ceb0be2068d8be203763 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 31 Dec 2009 05:53:03 +0100 Subject: perf: Drop useless check for ignored frame The check that ignores the debug and nmi stack frames is useless now that we have a frame pointer that makes us start at the right place. We don't anymore have to deal with these. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras LKML-Reference: <1262235183-5320-2-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/stacktrace.h | 2 -- arch/x86/kernel/cpu/perf_event.c | 8 -------- arch/x86/kernel/dumpstack_32.c | 5 ----- arch/x86/kernel/dumpstack_64.c | 5 ----- 4 files changed, 20 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/stacktrace.h b/arch/x86/include/asm/stacktrace.h index 35e8912..4dab78e 100644 --- a/arch/x86/include/asm/stacktrace.h +++ b/arch/x86/include/asm/stacktrace.h @@ -3,8 +3,6 @@ extern int kstack_depth_to_print; -int x86_is_stack_id(int id, char *name); - struct thread_info; struct stacktrace_ops; diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index d616c06..b1bb8c5 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -2297,7 +2297,6 @@ void callchain_store(struct perf_callchain_entry *entry, u64 ip) static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_irq_entry); static DEFINE_PER_CPU(struct perf_callchain_entry, pmc_nmi_entry); -static DEFINE_PER_CPU(int, in_ignored_frame); static void @@ -2313,10 +2312,6 @@ static void backtrace_warning(void *data, char *msg) static int backtrace_stack(void *data, char *name) { - per_cpu(in_ignored_frame, smp_processor_id()) = - x86_is_stack_id(NMI_STACK, name) || - x86_is_stack_id(DEBUG_STACK, name); - return 0; } @@ -2324,9 +2319,6 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) { struct perf_callchain_entry *entry = data; - if (per_cpu(in_ignored_frame, smp_processor_id())) - return; - if (reliable) callchain_store(entry, addr); } diff --git a/arch/x86/kernel/dumpstack_32.c b/arch/x86/kernel/dumpstack_32.c index ae775ca..11540a1 100644 --- a/arch/x86/kernel/dumpstack_32.c +++ b/arch/x86/kernel/dumpstack_32.c @@ -18,11 +18,6 @@ #include "dumpstack.h" -/* Just a stub for now */ -int x86_is_stack_id(int id, char *name) -{ - return 0; -} void dump_trace(struct task_struct *task, struct pt_regs *regs, unsigned long *stack, unsigned long bp, diff --git a/arch/x86/kernel/dumpstack_64.c b/arch/x86/kernel/dumpstack_64.c index 0ad9597..676bc05 100644 --- a/arch/x86/kernel/dumpstack_64.c +++ b/arch/x86/kernel/dumpstack_64.c @@ -33,11 +33,6 @@ static char x86_stack_ids[][8] = { #endif }; -int x86_is_stack_id(int id, char *name) -{ - return x86_stack_ids[id - 1] == name; -} - static unsigned long *in_exception_stack(unsigned cpu, unsigned long stack, unsigned *usedp, char **idp) { -- cgit v1.1 From aa5add93e92019018e905146f8c3d3f8e3c08300 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 5 Jan 2010 17:46:56 -0500 Subject: x86/ptrace: Remove unused regs_get_argument_nth API Because of dropping function argument syntax from kprobe-tracer, we don't need this API anymore. Signed-off-by: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Arnaldo Carvalho de Melo Cc: systemtap Cc: DLE Cc: Frederic Weisbecker Cc: Roland McGrath Cc: Oleg Nesterov Cc: Mahesh Salgaonkar Cc: Benjamin Herrenschmidt Cc: Michael Neuling Cc: Steven Rostedt Cc: linuxppc-dev@ozlabs.org LKML-Reference: <20100105224656.19431.92588.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/ptrace.h | 4 ---- arch/x86/kernel/ptrace.c | 24 ------------------------ 2 files changed, 28 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 9d369f6..2010280 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -274,10 +274,6 @@ static inline unsigned long regs_get_kernel_stack_nth(struct pt_regs *regs, return 0; } -/* Get Nth argument at function call */ -extern unsigned long regs_get_argument_nth(struct pt_regs *regs, - unsigned int n); - /* * These are defined as per linux/ptrace.h, which see. */ diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index 017d937..73554a3 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -140,30 +140,6 @@ static const int arg_offs_table[] = { #endif }; -/** - * regs_get_argument_nth() - get Nth argument at function call - * @regs: pt_regs which contains registers at function entry. - * @n: argument number. - * - * regs_get_argument_nth() returns @n th argument of a function call. - * Since usually the kernel stack will be changed right after function entry, - * you must use this at function entry. If the @n th entry is NOT in the - * kernel stack or pt_regs, this returns 0. - */ -unsigned long regs_get_argument_nth(struct pt_regs *regs, unsigned int n) -{ - if (n < ARRAY_SIZE(arg_offs_table)) - return *(unsigned long *)((char *)regs + arg_offs_table[n]); - else { - /* - * The typical case: arg n is on the stack. - * (Note: stack[0] = return address, so skip it) - */ - n -= ARRAY_SIZE(arg_offs_table); - return regs_get_kernel_stack_nth(regs, 1 + n); - } -} - /* * does not yet catch signals sent when the child dies. * in exit.c or in signal.c. -- cgit v1.1 From 557a701c16553b0b691dbb64ef30361115a80f64 Mon Sep 17 00:00:00 2001 From: Thomas Renninger Date: Mon, 14 Dec 2009 11:44:15 +0100 Subject: [CPUFREQ] Fix use after free of struct powernow_k8_data Easy fix for a regression introduced in 2.6.31. On managed CPUs the cpufreq.c core will call driver->exit(cpu) on the managed cpus and powernow_k8 will free the core's data. Later driver->get(cpu) function might get called trying to read out the current freq of a managed cpu and the NULL pointer check does not work on the freed object -> better set it to NULL. ->get() is unsigned and must return 0 as invalid frequency. Reference: http://bugzilla.kernel.org/show_bug.cgi?id=14391 Signed-off-by: Thomas Renninger Tested-by: Michal Schmidt CC: stable@kernel.org Signed-off-by: Dave Jones --- arch/x86/kernel/cpu/cpufreq/powernow-k8.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c index a9df944..2da4fa3 100644 --- a/arch/x86/kernel/cpu/cpufreq/powernow-k8.c +++ b/arch/x86/kernel/cpu/cpufreq/powernow-k8.c @@ -1356,6 +1356,7 @@ static int __devexit powernowk8_cpu_exit(struct cpufreq_policy *pol) kfree(data->powernow_table); kfree(data); + per_cpu(powernow_data, pol->cpu) = NULL; return 0; } @@ -1375,7 +1376,7 @@ static unsigned int powernowk8_get(unsigned int cpu) int err; if (!data) - return -EINVAL; + return 0; smp_call_function_single(cpu, query_values_on_cpu, &err, true); if (err) -- cgit v1.1 From 339ce1a4dc2ca26444c4f65c31b71a5056f3bb0b Mon Sep 17 00:00:00 2001 From: Anton Blanchard Date: Mon, 18 Jan 2010 16:47:07 +1100 Subject: perf: Fix inconsistency between IP and callchain sampling When running perf across all cpus with backtracing (-a -g), sometimes we get samples without associated backtraces: 23.44% init [kernel] [k] restore 11.46% init eeba0c [k] 0x00000000eeba0c 6.77% swapper [kernel] [k] .perf_ctx_adjust_freq 5.73% init [kernel] [k] .__trace_hcall_entry 4.69% perf libc-2.9.so [.] 0x0000000006bb8c | |--11.11%-- 0xfffa941bbbc It turns out the backtrace code has a check for the idle task and the IP sampling does not. This creates problems when profiling an interrupt heavy workload (in my case 10Gbit ethernet) since we get no backtraces for interrupts received while idle (ie most of the workload). Right now x86 and sh check that current is not NULL, which should never happen so remove that too. Idle task's exclusion must be performed from the core code, on top of perf_event_attr:exclude_idle. Signed-off-by: Anton Blanchard Cc: Peter Zijlstra Cc: Paul Mackerras Cc: Ingo Molnar Cc: Benjamin Herrenschmidt Cc: Paul Mundt LKML-Reference: <20100118054707.GT12666@kryten> Signed-off-by: Frederic Weisbecker --- arch/x86/kernel/cpu/perf_event.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index b1bb8c5..ed1998b 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -2425,9 +2425,6 @@ perf_do_callchain(struct pt_regs *regs, struct perf_callchain_entry *entry) is_user = user_mode(regs); - if (!current || current->pid == 0) - return; - if (is_user && current->state != TASK_RUNNING) return; -- cgit v1.1 From 40f9249a73f6c251adea492b1c3d19d39e2a9bda Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Thu, 28 Jan 2010 16:44:01 +0530 Subject: x86/debug: Clear reserved bits of DR6 in do_debug() Clear the reserved bits from the stored copy of debug status register (DR6). This will help easy bitwise operations such as quick testing of a debug event origin. Signed-off-by: K.Prasad Cc: Roland McGrath Cc: Jan Kiszka Cc: Alan Stern Cc: Ingo Molnar LKML-Reference: <20100128111401.GB13935@in.ibm.com> Signed-off-by: Frederic Weisbecker --- arch/x86/include/asm/debugreg.h | 3 +++ arch/x86/kernel/traps.c | 3 +++ 2 files changed, 6 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/debugreg.h b/arch/x86/include/asm/debugreg.h index 8240f76..b81002f 100644 --- a/arch/x86/include/asm/debugreg.h +++ b/arch/x86/include/asm/debugreg.h @@ -14,6 +14,9 @@ which debugging register was responsible for the trap. The other bits are either reserved or not of interest to us. */ +/* Define reserved bits in DR6 which are always set to 1 */ +#define DR6_RESERVED (0xFFFF0FF0) + #define DR_TRAP0 (0x1) /* db0 */ #define DR_TRAP1 (0x2) /* db1 */ #define DR_TRAP2 (0x4) /* db2 */ diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 3339917..1168e44 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -534,6 +534,9 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) get_debugreg(dr6, 6); + /* Filter out all the reserved bits which are preset to 1 */ + dr6 &= ~DR6_RESERVED; + /* Catch kmemcheck conditions first of all! */ if ((dr6 & DR_STEP) && kmemcheck_trap(regs)) return; -- cgit v1.1 From e0e53db6133c32964fd17f20b17073a402f07ed3 Mon Sep 17 00:00:00 2001 From: "K.Prasad" Date: Thu, 28 Jan 2010 16:44:15 +0530 Subject: x86/hw-breakpoints: Optimize return code from notifier chain in hw_breakpoint_handler Processing of debug exceptions in do_debug() can stop if it originated from a hw-breakpoint exception by returning NOTIFY_STOP in most cases. But for certain cases such as: a) user-space breakpoints with pending SIGTRAP signal delivery (as in the case of ptrace induced breakpoints). b) exceptions due to other causes than breakpoints We will continue to process the exception by returning NOTIFY_DONE. Signed-off-by: K.Prasad Cc: Ingo Molnar Cc: Roland McGrath Cc: Alan Stern Cc: Jan Kiszka LKML-Reference: <20100128111415.GC13935@in.ibm.com> Signed-off-by: Frederic Weisbecker --- arch/x86/kernel/hw_breakpoint.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index 05d5fec..ae90b47 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -502,8 +502,6 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) rcu_read_lock(); bp = per_cpu(bp_per_reg[i], cpu); - if (bp) - rc = NOTIFY_DONE; /* * Reset the 'i'th TRAP bit in dr6 to denote completion of * exception handling @@ -522,7 +520,13 @@ static int __kprobes hw_breakpoint_handler(struct die_args *args) rcu_read_unlock(); } - if (dr6 & (~DR_TRAP_BITS)) + /* + * Further processing in do_debug() is needed for a) user-space + * breakpoints (to generate signals) and b) when the system has + * taken exception due to multiple causes + */ + if ((current->thread.debugreg6 & DR_TRAP_BITS) || + (dr6 & (~DR_TRAP_BITS))) rc = NOTIFY_DONE; set_debugreg(dr7, 7); -- cgit v1.1 From 1da53e023029c067ba1277a33038c65d6e4c99b3 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Mon, 18 Jan 2010 10:58:01 +0200 Subject: perf_events, x86: Improve x86 event scheduling This patch improves event scheduling by maximizing the use of PMU registers regardless of the order in which events are created in a group. The algorithm takes into account the list of counter constraints for each event. It assigns events to counters from the most constrained, i.e., works on only one counter, to the least constrained, i.e., works on any counter. Intel Fixed counter events and the BTS special event are also handled via this algorithm which is designed to be fairly generic. The patch also updates the validation of an event to use the scheduling algorithm. This will cause early failure in perf_event_open(). The 2nd version of this patch follows the model used by PPC, by running the scheduling algorithm and the actual assignment separately. Actual assignment takes place in hw_perf_enable() whereas scheduling is implemented in hw_perf_group_sched_in() and x86_pmu_enable(). Signed-off-by: Stephane Eranian [ fixup whitespace and style nits as well as adding is_x86_event() ] Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <4b5430c6.0f975e0a.1bf9.ffff85fe@mx.google.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event.h | 16 +- arch/x86/kernel/cpu/perf_event.c | 775 +++++++++++++++++++++++++++----------- 2 files changed, 574 insertions(+), 217 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 8d9f854..dbc0826 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -26,7 +26,14 @@ /* * Includes eventsel and unit mask as well: */ -#define ARCH_PERFMON_EVENT_MASK 0xffff + + +#define INTEL_ARCH_EVTSEL_MASK 0x000000FFULL +#define INTEL_ARCH_UNIT_MASK 0x0000FF00ULL +#define INTEL_ARCH_EDGE_MASK 0x00040000ULL +#define INTEL_ARCH_INV_MASK 0x00800000ULL +#define INTEL_ARCH_CNT_MASK 0xFF000000ULL +#define INTEL_ARCH_EVENT_MASK (INTEL_ARCH_UNIT_MASK|INTEL_ARCH_EVTSEL_MASK) /* * filter mask to validate fixed counter events. @@ -37,7 +44,12 @@ * The other filters are supported by fixed counters. * The any-thread option is supported starting with v3. */ -#define ARCH_PERFMON_EVENT_FILTER_MASK 0xff840000 +#define INTEL_ARCH_FIXED_MASK \ + (INTEL_ARCH_CNT_MASK| \ + INTEL_ARCH_INV_MASK| \ + INTEL_ARCH_EDGE_MASK|\ + INTEL_ARCH_UNIT_MASK|\ + INTEL_ARCH_EVENT_MASK) #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index ed1998b..995ac4a 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -7,6 +7,7 @@ * Copyright (C) 2009 Advanced Micro Devices, Inc., Robert Richter * Copyright (C) 2008-2009 Red Hat, Inc., Peter Zijlstra * Copyright (C) 2009 Intel Corporation, + * Copyright (C) 2009 Google, Inc., Stephane Eranian * * For licencing details see kernel-base/COPYING */ @@ -68,26 +69,37 @@ struct debug_store { u64 pebs_event_reset[MAX_PEBS_EVENTS]; }; +#define BITS_TO_U64(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64)) + +struct event_constraint { + u64 idxmsk[BITS_TO_U64(X86_PMC_IDX_MAX)]; + int code; + int cmask; +}; + struct cpu_hw_events { - struct perf_event *events[X86_PMC_IDX_MAX]; - unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; unsigned long interrupts; int enabled; struct debug_store *ds; -}; -struct event_constraint { - unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; - int code; + int n_events; + int n_added; + int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ + struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ }; -#define EVENT_CONSTRAINT(c, m) { .code = (c), .idxmsk[0] = (m) } -#define EVENT_CONSTRAINT_END { .code = 0, .idxmsk[0] = 0 } +#define EVENT_CONSTRAINT(c, n, m) { \ + .code = (c), \ + .cmask = (m), \ + .idxmsk[0] = (n) } -#define for_each_event_constraint(e, c) \ - for ((e) = (c); (e)->idxmsk[0]; (e)++) +#define EVENT_CONSTRAINT_END \ + { .code = 0, .cmask = 0, .idxmsk[0] = 0 } +#define for_each_event_constraint(e, c) \ + for ((e) = (c); (e)->cmask; (e)++) /* * struct x86_pmu - generic x86 pmu @@ -114,8 +126,9 @@ struct x86_pmu { u64 intel_ctrl; void (*enable_bts)(u64 config); void (*disable_bts)(void); - int (*get_event_idx)(struct cpu_hw_events *cpuc, - struct hw_perf_event *hwc); + void (*get_event_constraints)(struct cpu_hw_events *cpuc, struct perf_event *event, u64 *idxmsk); + void (*put_event_constraints)(struct cpu_hw_events *cpuc, struct perf_event *event); + const struct event_constraint *event_constraints; }; static struct x86_pmu x86_pmu __read_mostly; @@ -124,7 +137,8 @@ static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; -static const struct event_constraint *event_constraints; +static int x86_perf_event_set_period(struct perf_event *event, + struct hw_perf_event *hwc, int idx); /* * Not sure about some of these @@ -171,14 +185,14 @@ static u64 p6_pmu_raw_event(u64 hw_event) return hw_event & P6_EVNTSEL_MASK; } -static const struct event_constraint intel_p6_event_constraints[] = +static struct event_constraint intel_p6_event_constraints[] = { - EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ - EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ - EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */ - EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ - EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ - EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ + EVENT_CONSTRAINT(0xc1, 0x1, INTEL_ARCH_EVENT_MASK), /* FLOPS */ + EVENT_CONSTRAINT(0x10, 0x1, INTEL_ARCH_EVENT_MASK), /* FP_COMP_OPS_EXE */ + EVENT_CONSTRAINT(0x11, 0x1, INTEL_ARCH_EVENT_MASK), /* FP_ASSIST */ + EVENT_CONSTRAINT(0x12, 0x2, INTEL_ARCH_EVENT_MASK), /* MUL */ + EVENT_CONSTRAINT(0x13, 0x2, INTEL_ARCH_EVENT_MASK), /* DIV */ + EVENT_CONSTRAINT(0x14, 0x1, INTEL_ARCH_EVENT_MASK), /* CYCLES_DIV_BUSY */ EVENT_CONSTRAINT_END }; @@ -196,32 +210,43 @@ static const u64 intel_perfmon_event_map[] = [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, }; -static const struct event_constraint intel_core_event_constraints[] = -{ - EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ - EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ - EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ - EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ - EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ - EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ - EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ - EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ - EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ +static struct event_constraint intel_core_event_constraints[] = +{ + EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32)), INTEL_ARCH_FIXED_MASK), /* INSTRUCTIONS_RETIRED */ + EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33)), INTEL_ARCH_FIXED_MASK), /* UNHALTED_CORE_CYCLES */ + EVENT_CONSTRAINT(0x10, 0x1, INTEL_ARCH_EVENT_MASK), /* FP_COMP_OPS_EXE */ + EVENT_CONSTRAINT(0x11, 0x2, INTEL_ARCH_EVENT_MASK), /* FP_ASSIST */ + EVENT_CONSTRAINT(0x12, 0x2, INTEL_ARCH_EVENT_MASK), /* MUL */ + EVENT_CONSTRAINT(0x13, 0x2, INTEL_ARCH_EVENT_MASK), /* DIV */ + EVENT_CONSTRAINT(0x14, 0x1, INTEL_ARCH_EVENT_MASK), /* CYCLES_DIV_BUSY */ + EVENT_CONSTRAINT(0x18, 0x1, INTEL_ARCH_EVENT_MASK), /* IDLE_DURING_DIV */ + EVENT_CONSTRAINT(0x19, 0x2, INTEL_ARCH_EVENT_MASK), /* DELAYED_BYPASS */ + EVENT_CONSTRAINT(0xa1, 0x1, INTEL_ARCH_EVENT_MASK), /* RS_UOPS_DISPATCH_CYCLES */ + EVENT_CONSTRAINT(0xcb, 0x1, INTEL_ARCH_EVENT_MASK), /* MEM_LOAD_RETIRED */ EVENT_CONSTRAINT_END }; -static const struct event_constraint intel_nehalem_event_constraints[] = -{ - EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ - EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ - EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ - EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */ - EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */ - EVENT_CONSTRAINT(0x4c, 0x3), /* LOAD_HIT_PRE */ - EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ - EVENT_CONSTRAINT(0x52, 0x3), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */ - EVENT_CONSTRAINT(0x53, 0x3), /* L1D_CACHE_LOCK_FB_HIT */ - EVENT_CONSTRAINT(0xc5, 0x3), /* CACHE_LOCK_CYCLES */ +static struct event_constraint intel_nehalem_event_constraints[] = +{ + EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32)), INTEL_ARCH_FIXED_MASK), /* INSTRUCTIONS_RETIRED */ + EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33)), INTEL_ARCH_FIXED_MASK), /* UNHALTED_CORE_CYCLES */ + EVENT_CONSTRAINT(0x40, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_LD */ + EVENT_CONSTRAINT(0x41, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_ST */ + EVENT_CONSTRAINT(0x42, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_LOCK */ + EVENT_CONSTRAINT(0x43, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_ALL_REF */ + EVENT_CONSTRAINT(0x4e, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_PREFETCH */ + EVENT_CONSTRAINT(0x4c, 0x3, INTEL_ARCH_EVENT_MASK), /* LOAD_HIT_PRE */ + EVENT_CONSTRAINT(0x51, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D */ + EVENT_CONSTRAINT(0x52, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */ + EVENT_CONSTRAINT(0x53, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_LOCK_FB_HIT */ + EVENT_CONSTRAINT(0xc5, 0x3, INTEL_ARCH_EVENT_MASK), /* CACHE_LOCK_CYCLES */ + EVENT_CONSTRAINT_END +}; + +static struct event_constraint intel_gen_event_constraints[] = +{ + EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32)), INTEL_ARCH_FIXED_MASK), /* INSTRUCTIONS_RETIRED */ + EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33)), INTEL_ARCH_FIXED_MASK), /* UNHALTED_CORE_CYCLES */ EVENT_CONSTRAINT_END }; @@ -527,11 +552,11 @@ static u64 intel_pmu_raw_event(u64 hw_event) #define CORE_EVNTSEL_REG_MASK 0xFF000000ULL #define CORE_EVNTSEL_MASK \ - (CORE_EVNTSEL_EVENT_MASK | \ - CORE_EVNTSEL_UNIT_MASK | \ - CORE_EVNTSEL_EDGE_MASK | \ - CORE_EVNTSEL_INV_MASK | \ - CORE_EVNTSEL_REG_MASK) + (INTEL_ARCH_EVTSEL_MASK | \ + INTEL_ARCH_UNIT_MASK | \ + INTEL_ARCH_EDGE_MASK | \ + INTEL_ARCH_INV_MASK | \ + INTEL_ARCH_CNT_MASK) return hw_event & CORE_EVNTSEL_MASK; } @@ -1120,9 +1145,15 @@ static void amd_pmu_disable_all(void) void hw_perf_disable(void) { + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + if (!x86_pmu_initialized()) return; - return x86_pmu.disable_all(); + + if (cpuc->enabled) + cpuc->n_added = 0; + + x86_pmu.disable_all(); } static void p6_pmu_enable_all(void) @@ -1189,10 +1220,237 @@ static void amd_pmu_enable_all(void) } } +static const struct pmu pmu; + +static inline int is_x86_event(struct perf_event *event) +{ + return event->pmu == &pmu; +} + +static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) +{ + int i, j , w, num; + int weight, wmax; + unsigned long *c; + u64 constraints[X86_PMC_IDX_MAX][BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + struct hw_perf_event *hwc; + + bitmap_zero(used_mask, X86_PMC_IDX_MAX); + + for (i = 0; i < n; i++) { + x86_pmu.get_event_constraints(cpuc, + cpuc->event_list[i], + constraints[i]); + } + + /* + * weight = number of possible counters + * + * 1 = most constrained, only works on one counter + * wmax = least constrained, works on any counter + * + * assign events to counters starting with most + * constrained events. + */ + wmax = x86_pmu.num_events; + + /* + * when fixed event counters are present, + * wmax is incremented by 1 to account + * for one more choice + */ + if (x86_pmu.num_events_fixed) + wmax++; + + num = n; + for (w = 1; num && w <= wmax; w++) { + /* for each event */ + for (i = 0; i < n; i++) { + c = (unsigned long *)constraints[i]; + hwc = &cpuc->event_list[i]->hw; + + weight = bitmap_weight(c, X86_PMC_IDX_MAX); + if (weight != w) + continue; + + /* + * try to reuse previous assignment + * + * This is possible despite the fact that + * events or events order may have changed. + * + * What matters is the level of constraints + * of an event and this is constant for now. + * + * This is possible also because we always + * scan from most to least constrained. Thus, + * if a counter can be reused, it means no, + * more constrained events, needed it. And + * next events will either compete for it + * (which cannot be solved anyway) or they + * have fewer constraints, and they can use + * another counter. + */ + j = hwc->idx; + if (j != -1 && !test_bit(j, used_mask)) + goto skip; + + for_each_bit(j, c, X86_PMC_IDX_MAX) { + if (!test_bit(j, used_mask)) + break; + } + + if (j == X86_PMC_IDX_MAX) + break; +skip: + set_bit(j, used_mask); + +#if 0 + pr_debug("CPU%d config=0x%llx idx=%d assign=%c\n", + smp_processor_id(), + hwc->config, + j, + assign ? 'y' : 'n'); +#endif + + if (assign) + assign[i] = j; + num--; + } + } + /* + * scheduling failed or is just a simulation, + * free resources if necessary + */ + if (!assign || num) { + for (i = 0; i < n; i++) { + if (x86_pmu.put_event_constraints) + x86_pmu.put_event_constraints(cpuc, cpuc->event_list[i]); + } + } + return num ? -ENOSPC : 0; +} + +/* + * dogrp: true if must collect siblings events (group) + * returns total number of events and error code + */ +static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, bool dogrp) +{ + struct perf_event *event; + int n, max_count; + + max_count = x86_pmu.num_events + x86_pmu.num_events_fixed; + + /* current number of events already accepted */ + n = cpuc->n_events; + + if (is_x86_event(leader)) { + if (n >= max_count) + return -ENOSPC; + cpuc->event_list[n] = leader; + n++; + } + if (!dogrp) + return n; + + list_for_each_entry(event, &leader->sibling_list, group_entry) { + if (!is_x86_event(event) || + event->state == PERF_EVENT_STATE_OFF) + continue; + + if (n >= max_count) + return -ENOSPC; + + cpuc->event_list[n] = event; + n++; + } + return n; +} + + +static inline void x86_assign_hw_event(struct perf_event *event, + struct hw_perf_event *hwc, int idx) +{ + hwc->idx = idx; + + if (hwc->idx == X86_PMC_IDX_FIXED_BTS) { + hwc->config_base = 0; + hwc->event_base = 0; + } else if (hwc->idx >= X86_PMC_IDX_FIXED) { + hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; + /* + * We set it so that event_base + idx in wrmsr/rdmsr maps to + * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2: + */ + hwc->event_base = + MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED; + } else { + hwc->config_base = x86_pmu.eventsel; + hwc->event_base = x86_pmu.perfctr; + } +} + void hw_perf_enable(void) { + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct perf_event *event; + struct hw_perf_event *hwc; + int i; + if (!x86_pmu_initialized()) return; + if (cpuc->n_added) { + /* + * apply assignment obtained either from + * hw_perf_group_sched_in() or x86_pmu_enable() + * + * step1: save events moving to new counters + * step2: reprogram moved events into new counters + */ + for (i = 0; i < cpuc->n_events; i++) { + + event = cpuc->event_list[i]; + hwc = &event->hw; + + if (hwc->idx == -1 || hwc->idx == cpuc->assign[i]) + continue; + + x86_pmu.disable(hwc, hwc->idx); + + clear_bit(hwc->idx, cpuc->active_mask); + barrier(); + cpuc->events[hwc->idx] = NULL; + + x86_perf_event_update(event, hwc, hwc->idx); + + hwc->idx = -1; + } + + for (i = 0; i < cpuc->n_events; i++) { + + event = cpuc->event_list[i]; + hwc = &event->hw; + + if (hwc->idx == -1) { + x86_assign_hw_event(event, hwc, cpuc->assign[i]); + x86_perf_event_set_period(event, hwc, hwc->idx); + } + /* + * need to mark as active because x86_pmu_disable() + * clear active_mask and eventsp[] yet it preserves + * idx + */ + set_bit(hwc->idx, cpuc->active_mask); + cpuc->events[hwc->idx] = event; + + x86_pmu.enable(hwc, hwc->idx); + perf_event_update_userpage(event); + } + cpuc->n_added = 0; + perf_events_lapic_init(); + } x86_pmu.enable_all(); } @@ -1391,148 +1649,43 @@ static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx) x86_pmu_enable_event(hwc, idx); } -static int fixed_mode_idx(struct hw_perf_event *hwc) -{ - unsigned int hw_event; - - hw_event = hwc->config & ARCH_PERFMON_EVENT_MASK; - - if (unlikely((hw_event == - x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && - (hwc->sample_period == 1))) - return X86_PMC_IDX_FIXED_BTS; - - if (!x86_pmu.num_events_fixed) - return -1; - - /* - * fixed counters do not take all possible filters - */ - if (hwc->config & ARCH_PERFMON_EVENT_FILTER_MASK) - return -1; - - if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_INSTRUCTIONS))) - return X86_PMC_IDX_FIXED_INSTRUCTIONS; - if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_CPU_CYCLES))) - return X86_PMC_IDX_FIXED_CPU_CYCLES; - if (unlikely(hw_event == x86_pmu.event_map(PERF_COUNT_HW_BUS_CYCLES))) - return X86_PMC_IDX_FIXED_BUS_CYCLES; - - return -1; -} - -/* - * generic counter allocator: get next free counter - */ -static int -gen_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) -{ - int idx; - - idx = find_first_zero_bit(cpuc->used_mask, x86_pmu.num_events); - return idx == x86_pmu.num_events ? -1 : idx; -} - /* - * intel-specific counter allocator: check event constraints - */ -static int -intel_get_event_idx(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) -{ - const struct event_constraint *event_constraint; - int i, code; - - if (!event_constraints) - goto skip; - - code = hwc->config & CORE_EVNTSEL_EVENT_MASK; - - for_each_event_constraint(event_constraint, event_constraints) { - if (code == event_constraint->code) { - for_each_bit(i, event_constraint->idxmsk, X86_PMC_IDX_MAX) { - if (!test_and_set_bit(i, cpuc->used_mask)) - return i; - } - return -1; - } - } -skip: - return gen_get_event_idx(cpuc, hwc); -} - -static int -x86_schedule_event(struct cpu_hw_events *cpuc, struct hw_perf_event *hwc) -{ - int idx; - - idx = fixed_mode_idx(hwc); - if (idx == X86_PMC_IDX_FIXED_BTS) { - /* BTS is already occupied. */ - if (test_and_set_bit(idx, cpuc->used_mask)) - return -EAGAIN; - - hwc->config_base = 0; - hwc->event_base = 0; - hwc->idx = idx; - } else if (idx >= 0) { - /* - * Try to get the fixed event, if that is already taken - * then try to get a generic event: - */ - if (test_and_set_bit(idx, cpuc->used_mask)) - goto try_generic; - - hwc->config_base = MSR_ARCH_PERFMON_FIXED_CTR_CTRL; - /* - * We set it so that event_base + idx in wrmsr/rdmsr maps to - * MSR_ARCH_PERFMON_FIXED_CTR0 ... CTR2: - */ - hwc->event_base = - MSR_ARCH_PERFMON_FIXED_CTR0 - X86_PMC_IDX_FIXED; - hwc->idx = idx; - } else { - idx = hwc->idx; - /* Try to get the previous generic event again */ - if (idx == -1 || test_and_set_bit(idx, cpuc->used_mask)) { -try_generic: - idx = x86_pmu.get_event_idx(cpuc, hwc); - if (idx == -1) - return -EAGAIN; - - set_bit(idx, cpuc->used_mask); - hwc->idx = idx; - } - hwc->config_base = x86_pmu.eventsel; - hwc->event_base = x86_pmu.perfctr; - } - - return idx; -} - -/* - * Find a PMC slot for the freshly enabled / scheduled in event: + * activate a single event + * + * The event is added to the group of enabled events + * but only if it can be scehduled with existing events. + * + * Called with PMU disabled. If successful and return value 1, + * then guaranteed to call perf_enable() and hw_perf_enable() */ static int x86_pmu_enable(struct perf_event *event) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct hw_perf_event *hwc = &event->hw; - int idx; + struct hw_perf_event *hwc; + int assign[X86_PMC_IDX_MAX]; + int n, n0, ret; - idx = x86_schedule_event(cpuc, hwc); - if (idx < 0) - return idx; + hwc = &event->hw; - perf_events_lapic_init(); + n0 = cpuc->n_events; + n = collect_events(cpuc, event, false); + if (n < 0) + return n; - x86_pmu.disable(hwc, idx); - - cpuc->events[idx] = event; - set_bit(idx, cpuc->active_mask); + ret = x86_schedule_events(cpuc, n, assign); + if (ret) + return ret; + /* + * copy new assignment, now we know it is possible + * will be used by hw_perf_enable() + */ + memcpy(cpuc->assign, assign, n*sizeof(int)); - x86_perf_event_set_period(event, hwc, idx); - x86_pmu.enable(hwc, idx); + cpuc->n_events = n; + cpuc->n_added = n - n0; - perf_event_update_userpage(event); + if (hwc->idx != -1) + x86_perf_event_set_period(event, hwc, hwc->idx); return 0; } @@ -1576,7 +1729,7 @@ void perf_event_print_debug(void) pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); } - pr_info("CPU#%d: used: %016llx\n", cpu, *(u64 *)cpuc->used_mask); + pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); for (idx = 0; idx < x86_pmu.num_events; idx++) { rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); @@ -1664,7 +1817,7 @@ static void x86_pmu_disable(struct perf_event *event) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; + int i, idx = hwc->idx; /* * Must be done before we disable, otherwise the nmi handler @@ -1690,8 +1843,19 @@ static void x86_pmu_disable(struct perf_event *event) intel_pmu_drain_bts_buffer(cpuc); cpuc->events[idx] = NULL; - clear_bit(idx, cpuc->used_mask); + for (i = 0; i < cpuc->n_events; i++) { + if (event == cpuc->event_list[i]) { + + if (x86_pmu.put_event_constraints) + x86_pmu.put_event_constraints(cpuc, event); + + while (++i < cpuc->n_events) + cpuc->event_list[i-1] = cpuc->event_list[i]; + + --cpuc->n_events; + } + } perf_event_update_userpage(event); } @@ -1962,6 +2126,176 @@ perf_event_nmi_handler(struct notifier_block *self, return NOTIFY_STOP; } +static struct event_constraint bts_constraint = { + .code = 0, + .cmask = 0, + .idxmsk[0] = 1ULL << X86_PMC_IDX_FIXED_BTS +}; + +static int intel_special_constraints(struct perf_event *event, + u64 *idxmsk) +{ + unsigned int hw_event; + + hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK; + + if (unlikely((hw_event == + x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && + (event->hw.sample_period == 1))) { + + bitmap_copy((unsigned long *)idxmsk, + (unsigned long *)bts_constraint.idxmsk, + X86_PMC_IDX_MAX); + return 1; + } + return 0; +} + +static void intel_get_event_constraints(struct cpu_hw_events *cpuc, + struct perf_event *event, + u64 *idxmsk) +{ + const struct event_constraint *c; + + /* + * cleanup bitmask + */ + bitmap_zero((unsigned long *)idxmsk, X86_PMC_IDX_MAX); + + if (intel_special_constraints(event, idxmsk)) + return; + + if (x86_pmu.event_constraints) { + for_each_event_constraint(c, x86_pmu.event_constraints) { + if ((event->hw.config & c->cmask) == c->code) { + + bitmap_copy((unsigned long *)idxmsk, + (unsigned long *)c->idxmsk, + X86_PMC_IDX_MAX); + return; + } + } + } + /* no constraints, means supports all generic counters */ + bitmap_fill((unsigned long *)idxmsk, x86_pmu.num_events); +} + +static void amd_get_event_constraints(struct cpu_hw_events *cpuc, + struct perf_event *event, + u64 *idxmsk) +{ +} + +static int x86_event_sched_in(struct perf_event *event, + struct perf_cpu_context *cpuctx, int cpu) +{ + int ret = 0; + + event->state = PERF_EVENT_STATE_ACTIVE; + event->oncpu = cpu; + event->tstamp_running += event->ctx->time - event->tstamp_stopped; + + if (!is_x86_event(event)) + ret = event->pmu->enable(event); + + if (!ret && !is_software_event(event)) + cpuctx->active_oncpu++; + + if (!ret && event->attr.exclusive) + cpuctx->exclusive = 1; + + return ret; +} + +static void x86_event_sched_out(struct perf_event *event, + struct perf_cpu_context *cpuctx, int cpu) +{ + event->state = PERF_EVENT_STATE_INACTIVE; + event->oncpu = -1; + + if (!is_x86_event(event)) + event->pmu->disable(event); + + event->tstamp_running -= event->ctx->time - event->tstamp_stopped; + + if (!is_software_event(event)) + cpuctx->active_oncpu--; + + if (event->attr.exclusive || !cpuctx->active_oncpu) + cpuctx->exclusive = 0; +} + +/* + * Called to enable a whole group of events. + * Returns 1 if the group was enabled, or -EAGAIN if it could not be. + * Assumes the caller has disabled interrupts and has + * frozen the PMU with hw_perf_save_disable. + * + * called with PMU disabled. If successful and return value 1, + * then guaranteed to call perf_enable() and hw_perf_enable() + */ +int hw_perf_group_sched_in(struct perf_event *leader, + struct perf_cpu_context *cpuctx, + struct perf_event_context *ctx, int cpu) +{ + struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + struct perf_event *sub; + int assign[X86_PMC_IDX_MAX]; + int n0, n1, ret; + + /* n0 = total number of events */ + n0 = collect_events(cpuc, leader, true); + if (n0 < 0) + return n0; + + ret = x86_schedule_events(cpuc, n0, assign); + if (ret) + return ret; + + ret = x86_event_sched_in(leader, cpuctx, cpu); + if (ret) + return ret; + + n1 = 1; + list_for_each_entry(sub, &leader->sibling_list, group_entry) { + if (sub->state != PERF_EVENT_STATE_OFF) { + ret = x86_event_sched_in(sub, cpuctx, cpu); + if (ret) + goto undo; + ++n1; + } + } + /* + * copy new assignment, now we know it is possible + * will be used by hw_perf_enable() + */ + memcpy(cpuc->assign, assign, n0*sizeof(int)); + + cpuc->n_events = n0; + cpuc->n_added = n1; + ctx->nr_active += n1; + + /* + * 1 means successful and events are active + * This is not quite true because we defer + * actual activation until hw_perf_enable() but + * this way we* ensure caller won't try to enable + * individual events + */ + return 1; +undo: + x86_event_sched_out(leader, cpuctx, cpu); + n0 = 1; + list_for_each_entry(sub, &leader->sibling_list, group_entry) { + if (sub->state == PERF_EVENT_STATE_ACTIVE) { + x86_event_sched_out(sub, cpuctx, cpu); + if (++n0 == n1) + break; + } + } + return ret; +} + static __read_mostly struct notifier_block perf_event_nmi_notifier = { .notifier_call = perf_event_nmi_handler, .next = NULL, @@ -1993,7 +2327,8 @@ static __initconst struct x86_pmu p6_pmu = { */ .event_bits = 32, .event_mask = (1ULL << 32) - 1, - .get_event_idx = intel_get_event_idx, + .get_event_constraints = intel_get_event_constraints, + .event_constraints = intel_p6_event_constraints }; static __initconst struct x86_pmu intel_pmu = { @@ -2017,7 +2352,7 @@ static __initconst struct x86_pmu intel_pmu = { .max_period = (1ULL << 31) - 1, .enable_bts = intel_pmu_enable_bts, .disable_bts = intel_pmu_disable_bts, - .get_event_idx = intel_get_event_idx, + .get_event_constraints = intel_get_event_constraints }; static __initconst struct x86_pmu amd_pmu = { @@ -2038,7 +2373,7 @@ static __initconst struct x86_pmu amd_pmu = { .apic = 1, /* use highest bit to detect overflow */ .max_period = (1ULL << 47) - 1, - .get_event_idx = gen_get_event_idx, + .get_event_constraints = amd_get_event_constraints }; static __init int p6_pmu_init(void) @@ -2051,12 +2386,9 @@ static __init int p6_pmu_init(void) case 7: case 8: case 11: /* Pentium III */ - event_constraints = intel_p6_event_constraints; - break; case 9: case 13: /* Pentium M */ - event_constraints = intel_p6_event_constraints; break; default: pr_cont("unsupported p6 CPU model %d ", @@ -2121,23 +2453,29 @@ static __init int intel_pmu_init(void) memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + x86_pmu.event_constraints = intel_core_event_constraints; pr_cont("Core2 events, "); - event_constraints = intel_core_event_constraints; break; - default: case 26: memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, sizeof(hw_cache_event_ids)); - event_constraints = intel_nehalem_event_constraints; + x86_pmu.event_constraints = intel_nehalem_event_constraints; pr_cont("Nehalem/Corei7 events, "); break; case 28: memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + x86_pmu.event_constraints = intel_gen_event_constraints; pr_cont("Atom events, "); break; + default: + /* + * default constraints for v2 and up + */ + x86_pmu.event_constraints = intel_gen_event_constraints; + pr_cont("generic architected perfmon, "); } return 0; } @@ -2234,36 +2572,43 @@ static const struct pmu pmu = { .unthrottle = x86_pmu_unthrottle, }; -static int -validate_event(struct cpu_hw_events *cpuc, struct perf_event *event) -{ - struct hw_perf_event fake_event = event->hw; - - if (event->pmu && event->pmu != &pmu) - return 0; - - return x86_schedule_event(cpuc, &fake_event) >= 0; -} - +/* + * validate a single event group + * + * validation include: + * - check events are compatible which each other + * - events do not compete for the same counter + * - number of events <= number of counters + * + * validation ensures the group can be loaded onto the + * PMU if it was the only group available. + */ static int validate_group(struct perf_event *event) { - struct perf_event *sibling, *leader = event->group_leader; - struct cpu_hw_events fake_pmu; + struct perf_event *leader = event->group_leader; + struct cpu_hw_events fake_cpuc; + int n; - memset(&fake_pmu, 0, sizeof(fake_pmu)); + memset(&fake_cpuc, 0, sizeof(fake_cpuc)); - if (!validate_event(&fake_pmu, leader)) + /* + * the event is not yet connected with its + * siblings therefore we must first collect + * existing siblings, then add the new event + * before we can simulate the scheduling + */ + n = collect_events(&fake_cpuc, leader, true); + if (n < 0) return -ENOSPC; - list_for_each_entry(sibling, &leader->sibling_list, group_entry) { - if (!validate_event(&fake_pmu, sibling)) - return -ENOSPC; - } - - if (!validate_event(&fake_pmu, event)) + fake_cpuc.n_events = n; + n = collect_events(&fake_cpuc, event, false); + if (n < 0) return -ENOSPC; - return 0; + fake_cpuc.n_events = n; + + return x86_schedule_events(&fake_cpuc, n, NULL); } const struct pmu *hw_perf_event_init(struct perf_event *event) -- cgit v1.1 From 8113070d6639d2245c6c79afb8df42cedab30540 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Thu, 21 Jan 2010 17:39:01 +0200 Subject: perf_events: Add fast-path to the rescheduling code Implement correct fastpath scheduling, i.e., reuse previous assignment. Signed-off-by: Stephane Eranian [ split from larger patch] Signed-off-by: Peter Zijlstra LKML-Reference: <4b588464.1818d00a.4456.383b@mx.google.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 91 +++++++++++++++++++++++++++------------- 1 file changed, 61 insertions(+), 30 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 995ac4a..0bd23d01 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1245,6 +1245,46 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) } /* + * fastpath, try to reuse previous register + */ + for (i = 0, num = n; i < n; i++, num--) { + hwc = &cpuc->event_list[i]->hw; + c = (unsigned long *)constraints[i]; + + /* never assigned */ + if (hwc->idx == -1) + break; + + /* constraint still honored */ + if (!test_bit(hwc->idx, c)) + break; + + /* not already used */ + if (test_bit(hwc->idx, used_mask)) + break; + +#if 0 + pr_debug("CPU%d fast config=0x%llx idx=%d assign=%c\n", + smp_processor_id(), + hwc->config, + hwc->idx, + assign ? 'y' : 'n'); +#endif + + set_bit(hwc->idx, used_mask); + if (assign) + assign[i] = hwc->idx; + } + if (!num) + goto done; + + /* + * begin slow path + */ + + bitmap_zero(used_mask, X86_PMC_IDX_MAX); + + /* * weight = number of possible counters * * 1 = most constrained, only works on one counter @@ -1263,10 +1303,9 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) if (x86_pmu.num_events_fixed) wmax++; - num = n; - for (w = 1; num && w <= wmax; w++) { + for (w = 1, num = n; num && w <= wmax; w++) { /* for each event */ - for (i = 0; i < n; i++) { + for (i = 0; num && i < n; i++) { c = (unsigned long *)constraints[i]; hwc = &cpuc->event_list[i]->hw; @@ -1274,28 +1313,6 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) if (weight != w) continue; - /* - * try to reuse previous assignment - * - * This is possible despite the fact that - * events or events order may have changed. - * - * What matters is the level of constraints - * of an event and this is constant for now. - * - * This is possible also because we always - * scan from most to least constrained. Thus, - * if a counter can be reused, it means no, - * more constrained events, needed it. And - * next events will either compete for it - * (which cannot be solved anyway) or they - * have fewer constraints, and they can use - * another counter. - */ - j = hwc->idx; - if (j != -1 && !test_bit(j, used_mask)) - goto skip; - for_each_bit(j, c, X86_PMC_IDX_MAX) { if (!test_bit(j, used_mask)) break; @@ -1303,22 +1320,23 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) if (j == X86_PMC_IDX_MAX) break; -skip: - set_bit(j, used_mask); #if 0 - pr_debug("CPU%d config=0x%llx idx=%d assign=%c\n", + pr_debug("CPU%d slow config=0x%llx idx=%d assign=%c\n", smp_processor_id(), hwc->config, j, assign ? 'y' : 'n'); #endif + set_bit(j, used_mask); + if (assign) assign[i] = j; num--; } } +done: /* * scheduling failed or is just a simulation, * free resources if necessary @@ -1357,7 +1375,7 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, list_for_each_entry(event, &leader->sibling_list, group_entry) { if (!is_x86_event(event) || - event->state == PERF_EVENT_STATE_OFF) + event->state <= PERF_EVENT_STATE_OFF) continue; if (n >= max_count) @@ -2184,6 +2202,8 @@ static void amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, u64 *idxmsk) { + /* no constraints, means supports all generic counters */ + bitmap_fill((unsigned long *)idxmsk, x86_pmu.num_events); } static int x86_event_sched_in(struct perf_event *event, @@ -2258,7 +2278,7 @@ int hw_perf_group_sched_in(struct perf_event *leader, n1 = 1; list_for_each_entry(sub, &leader->sibling_list, group_entry) { - if (sub->state != PERF_EVENT_STATE_OFF) { + if (sub->state > PERF_EVENT_STATE_OFF) { ret = x86_event_sched_in(sub, cpuctx, cpu); if (ret) goto undo; @@ -2613,12 +2633,23 @@ static int validate_group(struct perf_event *event) const struct pmu *hw_perf_event_init(struct perf_event *event) { + const struct pmu *tmp; int err; err = __hw_perf_event_init(event); if (!err) { + /* + * we temporarily connect event to its pmu + * such that validate_group() can classify + * it as an x86 event using is_x86_event() + */ + tmp = event->pmu; + event->pmu = &pmu; + if (event->group_leader != event) err = validate_group(event); + + event->pmu = tmp; } if (err) { if (event->destroy) -- cgit v1.1 From 502568d563bcc37ac505a83341c0c95b88c015a8 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 22 Jan 2010 14:35:46 +0100 Subject: perf_event: x86: Allocate the fake_cpuc GCC was complaining the stack usage was too large, so allocate the structure. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian LKML-Reference: <20100122155535.411197266@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 29 +++++++++++++++++++---------- 1 file changed, 19 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 0bd23d01..7bd359a5 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -2606,10 +2606,13 @@ static const struct pmu pmu = { static int validate_group(struct perf_event *event) { struct perf_event *leader = event->group_leader; - struct cpu_hw_events fake_cpuc; - int n; + struct cpu_hw_events *fake_cpuc; + int ret, n; - memset(&fake_cpuc, 0, sizeof(fake_cpuc)); + ret = -ENOMEM; + fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO); + if (!fake_cpuc) + goto out; /* * the event is not yet connected with its @@ -2617,18 +2620,24 @@ static int validate_group(struct perf_event *event) * existing siblings, then add the new event * before we can simulate the scheduling */ - n = collect_events(&fake_cpuc, leader, true); + ret = -ENOSPC; + n = collect_events(fake_cpuc, leader, true); if (n < 0) - return -ENOSPC; + goto out_free; - fake_cpuc.n_events = n; - n = collect_events(&fake_cpuc, event, false); + fake_cpuc->n_events = n; + n = collect_events(fake_cpuc, event, false); if (n < 0) - return -ENOSPC; + goto out_free; - fake_cpuc.n_events = n; + fake_cpuc->n_events = n; - return x86_schedule_events(&fake_cpuc, n, NULL); + ret = x86_schedule_events(fake_cpuc, n, NULL); + +out_free: + kfree(fake_cpuc); +out: + return ret; } const struct pmu *hw_perf_event_init(struct perf_event *event) -- cgit v1.1 From 81269a085669b5130058a0275aa7ba9f94abd1fa Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 22 Jan 2010 14:55:22 +0100 Subject: perf_event: x86: Fixup constraints typing issue Constraints gets defined an u64 but in long quantities and then cast to long. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian LKML-Reference: <20100122155535.504916780@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 7bd359a5..7e181a5 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1232,7 +1232,7 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) int i, j , w, num; int weight, wmax; unsigned long *c; - u64 constraints[X86_PMC_IDX_MAX][BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + unsigned long constraints[X86_PMC_IDX_MAX][BITS_TO_LONGS(X86_PMC_IDX_MAX)]; unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; struct hw_perf_event *hwc; @@ -1249,7 +1249,7 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) */ for (i = 0, num = n; i < n; i++, num--) { hwc = &cpuc->event_list[i]->hw; - c = (unsigned long *)constraints[i]; + c = constraints[i]; /* never assigned */ if (hwc->idx == -1) @@ -1306,7 +1306,7 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) for (w = 1, num = n; num && w <= wmax; w++) { /* for each event */ for (i = 0; num && i < n; i++) { - c = (unsigned long *)constraints[i]; + c = constraints[i]; hwc = &cpuc->event_list[i]->hw; weight = bitmap_weight(c, X86_PMC_IDX_MAX); -- cgit v1.1 From c91e0f5da81c6f3a611a1bd6d0cca6717c90fdab Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 22 Jan 2010 15:25:59 +0100 Subject: perf_event: x86: Clean up some of the u64/long bitmask casting We need this to be u64 for direct assigment, but the bitmask functions all work on unsigned long, leading to cast heaven, solve this by using a union. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian LKML-Reference: <20100122155535.595961269@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 47 ++++++++++++++++++++-------------------- 1 file changed, 23 insertions(+), 24 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 7e181a5..921bbf7 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -69,10 +69,11 @@ struct debug_store { u64 pebs_event_reset[MAX_PEBS_EVENTS]; }; -#define BITS_TO_U64(nr) DIV_ROUND_UP(nr, BITS_PER_BYTE * sizeof(u64)) - struct event_constraint { - u64 idxmsk[BITS_TO_U64(X86_PMC_IDX_MAX)]; + union { + unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + u64 idxmsk64[1]; + }; int code; int cmask; }; @@ -90,13 +91,14 @@ struct cpu_hw_events { struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ }; -#define EVENT_CONSTRAINT(c, n, m) { \ - .code = (c), \ - .cmask = (m), \ - .idxmsk[0] = (n) } +#define EVENT_CONSTRAINT(c, n, m) { \ + { .idxmsk64[0] = (n) }, \ + .code = (c), \ + .cmask = (m), \ +} #define EVENT_CONSTRAINT_END \ - { .code = 0, .cmask = 0, .idxmsk[0] = 0 } + EVENT_CONSTRAINT(0, 0, 0) #define for_each_event_constraint(e, c) \ for ((e) = (c); (e)->cmask; (e)++) @@ -126,8 +128,11 @@ struct x86_pmu { u64 intel_ctrl; void (*enable_bts)(u64 config); void (*disable_bts)(void); - void (*get_event_constraints)(struct cpu_hw_events *cpuc, struct perf_event *event, u64 *idxmsk); - void (*put_event_constraints)(struct cpu_hw_events *cpuc, struct perf_event *event); + void (*get_event_constraints)(struct cpu_hw_events *cpuc, + struct perf_event *event, + unsigned long *idxmsk); + void (*put_event_constraints)(struct cpu_hw_events *cpuc, + struct perf_event *event); const struct event_constraint *event_constraints; }; @@ -2144,14 +2149,11 @@ perf_event_nmi_handler(struct notifier_block *self, return NOTIFY_STOP; } -static struct event_constraint bts_constraint = { - .code = 0, - .cmask = 0, - .idxmsk[0] = 1ULL << X86_PMC_IDX_FIXED_BTS -}; +static struct event_constraint bts_constraint = + EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); static int intel_special_constraints(struct perf_event *event, - u64 *idxmsk) + unsigned long *idxmsk) { unsigned int hw_event; @@ -2171,14 +2173,14 @@ static int intel_special_constraints(struct perf_event *event, static void intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, - u64 *idxmsk) + unsigned long *idxmsk) { const struct event_constraint *c; /* * cleanup bitmask */ - bitmap_zero((unsigned long *)idxmsk, X86_PMC_IDX_MAX); + bitmap_zero(idxmsk, X86_PMC_IDX_MAX); if (intel_special_constraints(event, idxmsk)) return; @@ -2186,10 +2188,7 @@ static void intel_get_event_constraints(struct cpu_hw_events *cpuc, if (x86_pmu.event_constraints) { for_each_event_constraint(c, x86_pmu.event_constraints) { if ((event->hw.config & c->cmask) == c->code) { - - bitmap_copy((unsigned long *)idxmsk, - (unsigned long *)c->idxmsk, - X86_PMC_IDX_MAX); + bitmap_copy(idxmsk, c->idxmsk, X86_PMC_IDX_MAX); return; } } @@ -2200,10 +2199,10 @@ static void intel_get_event_constraints(struct cpu_hw_events *cpuc, static void amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event, - u64 *idxmsk) + unsigned long *idxmsk) { /* no constraints, means supports all generic counters */ - bitmap_fill((unsigned long *)idxmsk, x86_pmu.num_events); + bitmap_fill(idxmsk, x86_pmu.num_events); } static int x86_event_sched_in(struct perf_event *event, -- cgit v1.1 From 8433be1184e4f22c37d4b8ed36cde529a47882f4 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 22 Jan 2010 15:38:26 +0100 Subject: perf_event: x86: Reduce some overly long lines with some MACROs Introduce INTEL_EVENT_CONSTRAINT and FIXED_EVENT_CONSTRAINT to reduce some line length and typing work. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian LKML-Reference: <20100122155535.688730371@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 68 ++++++++++++++++++++++------------------ 1 file changed, 37 insertions(+), 31 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 921bbf7..4d1ed10 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -97,6 +97,12 @@ struct cpu_hw_events { .cmask = (m), \ } +#define INTEL_EVENT_CONSTRAINT(c, n) \ + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) + +#define FIXED_EVENT_CONSTRAINT(c, n) \ + EVENT_CONSTRAINT(c, n, INTEL_ARCH_FIXED_MASK) + #define EVENT_CONSTRAINT_END \ EVENT_CONSTRAINT(0, 0, 0) @@ -192,12 +198,12 @@ static u64 p6_pmu_raw_event(u64 hw_event) static struct event_constraint intel_p6_event_constraints[] = { - EVENT_CONSTRAINT(0xc1, 0x1, INTEL_ARCH_EVENT_MASK), /* FLOPS */ - EVENT_CONSTRAINT(0x10, 0x1, INTEL_ARCH_EVENT_MASK), /* FP_COMP_OPS_EXE */ - EVENT_CONSTRAINT(0x11, 0x1, INTEL_ARCH_EVENT_MASK), /* FP_ASSIST */ - EVENT_CONSTRAINT(0x12, 0x2, INTEL_ARCH_EVENT_MASK), /* MUL */ - EVENT_CONSTRAINT(0x13, 0x2, INTEL_ARCH_EVENT_MASK), /* DIV */ - EVENT_CONSTRAINT(0x14, 0x1, INTEL_ARCH_EVENT_MASK), /* CYCLES_DIV_BUSY */ + INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ + INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ + INTEL_EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */ + INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ + INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ + INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ EVENT_CONSTRAINT_END }; @@ -217,41 +223,41 @@ static const u64 intel_perfmon_event_map[] = static struct event_constraint intel_core_event_constraints[] = { - EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32)), INTEL_ARCH_FIXED_MASK), /* INSTRUCTIONS_RETIRED */ - EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33)), INTEL_ARCH_FIXED_MASK), /* UNHALTED_CORE_CYCLES */ - EVENT_CONSTRAINT(0x10, 0x1, INTEL_ARCH_EVENT_MASK), /* FP_COMP_OPS_EXE */ - EVENT_CONSTRAINT(0x11, 0x2, INTEL_ARCH_EVENT_MASK), /* FP_ASSIST */ - EVENT_CONSTRAINT(0x12, 0x2, INTEL_ARCH_EVENT_MASK), /* MUL */ - EVENT_CONSTRAINT(0x13, 0x2, INTEL_ARCH_EVENT_MASK), /* DIV */ - EVENT_CONSTRAINT(0x14, 0x1, INTEL_ARCH_EVENT_MASK), /* CYCLES_DIV_BUSY */ - EVENT_CONSTRAINT(0x18, 0x1, INTEL_ARCH_EVENT_MASK), /* IDLE_DURING_DIV */ - EVENT_CONSTRAINT(0x19, 0x2, INTEL_ARCH_EVENT_MASK), /* DELAYED_BYPASS */ - EVENT_CONSTRAINT(0xa1, 0x1, INTEL_ARCH_EVENT_MASK), /* RS_UOPS_DISPATCH_CYCLES */ - EVENT_CONSTRAINT(0xcb, 0x1, INTEL_ARCH_EVENT_MASK), /* MEM_LOAD_RETIRED */ + FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ + FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ + INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ + INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ + INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ + INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ + INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ + INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ + INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ + INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ + INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ EVENT_CONSTRAINT_END }; static struct event_constraint intel_nehalem_event_constraints[] = { - EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32)), INTEL_ARCH_FIXED_MASK), /* INSTRUCTIONS_RETIRED */ - EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33)), INTEL_ARCH_FIXED_MASK), /* UNHALTED_CORE_CYCLES */ - EVENT_CONSTRAINT(0x40, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_LD */ - EVENT_CONSTRAINT(0x41, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_ST */ - EVENT_CONSTRAINT(0x42, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_LOCK */ - EVENT_CONSTRAINT(0x43, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_ALL_REF */ - EVENT_CONSTRAINT(0x4e, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_PREFETCH */ - EVENT_CONSTRAINT(0x4c, 0x3, INTEL_ARCH_EVENT_MASK), /* LOAD_HIT_PRE */ - EVENT_CONSTRAINT(0x51, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D */ - EVENT_CONSTRAINT(0x52, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */ - EVENT_CONSTRAINT(0x53, 0x3, INTEL_ARCH_EVENT_MASK), /* L1D_CACHE_LOCK_FB_HIT */ - EVENT_CONSTRAINT(0xc5, 0x3, INTEL_ARCH_EVENT_MASK), /* CACHE_LOCK_CYCLES */ + FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ + FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ + INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ + INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ + INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ + INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */ + INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */ + INTEL_EVENT_CONSTRAINT(0x4c, 0x3), /* LOAD_HIT_PRE */ + INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ + INTEL_EVENT_CONSTRAINT(0x52, 0x3), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */ + INTEL_EVENT_CONSTRAINT(0x53, 0x3), /* L1D_CACHE_LOCK_FB_HIT */ + INTEL_EVENT_CONSTRAINT(0xc5, 0x3), /* CACHE_LOCK_CYCLES */ EVENT_CONSTRAINT_END }; static struct event_constraint intel_gen_event_constraints[] = { - EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32)), INTEL_ARCH_FIXED_MASK), /* INSTRUCTIONS_RETIRED */ - EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33)), INTEL_ARCH_FIXED_MASK), /* UNHALTED_CORE_CYCLES */ + FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ + FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ EVENT_CONSTRAINT_END }; -- cgit v1.1 From 63b146490befc027a7e0923e333269e68b20d380 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 22 Jan 2010 16:32:17 +0100 Subject: perf_event: x86: Optimize the constraint searching bits Instead of copying bitmasks around, pass pointers to the constraint structure. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian LKML-Reference: <20100122155535.887853503@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 75 ++++++++++++++++++---------------------- 1 file changed, 34 insertions(+), 41 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 4d1ed10..092ad56 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -134,12 +134,14 @@ struct x86_pmu { u64 intel_ctrl; void (*enable_bts)(u64 config); void (*disable_bts)(void); - void (*get_event_constraints)(struct cpu_hw_events *cpuc, - struct perf_event *event, - unsigned long *idxmsk); + + struct event_constraint * + (*get_event_constraints)(struct cpu_hw_events *cpuc, + struct perf_event *event); + void (*put_event_constraints)(struct cpu_hw_events *cpuc, struct perf_event *event); - const struct event_constraint *event_constraints; + struct event_constraint *event_constraints; }; static struct x86_pmu x86_pmu __read_mostly; @@ -1242,17 +1244,15 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) { int i, j , w, num; int weight, wmax; - unsigned long *c; - unsigned long constraints[X86_PMC_IDX_MAX][BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; struct hw_perf_event *hwc; bitmap_zero(used_mask, X86_PMC_IDX_MAX); for (i = 0; i < n; i++) { - x86_pmu.get_event_constraints(cpuc, - cpuc->event_list[i], - constraints[i]); + constraints[i] = + x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); } /* @@ -1267,7 +1267,7 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) break; /* constraint still honored */ - if (!test_bit(hwc->idx, c)) + if (!test_bit(hwc->idx, c->idxmsk)) break; /* not already used */ @@ -1320,11 +1320,11 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) c = constraints[i]; hwc = &cpuc->event_list[i]->hw; - weight = bitmap_weight(c, X86_PMC_IDX_MAX); + weight = bitmap_weight(c->idxmsk, X86_PMC_IDX_MAX); if (weight != w) continue; - for_each_bit(j, c, X86_PMC_IDX_MAX) { + for_each_bit(j, c->idxmsk, X86_PMC_IDX_MAX) { if (!test_bit(j, used_mask)) break; } @@ -2155,11 +2155,13 @@ perf_event_nmi_handler(struct notifier_block *self, return NOTIFY_STOP; } +static struct event_constraint unconstrained; + static struct event_constraint bts_constraint = EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); -static int intel_special_constraints(struct perf_event *event, - unsigned long *idxmsk) +static struct event_constraint * +intel_special_constraints(struct perf_event *event) { unsigned int hw_event; @@ -2169,46 +2171,34 @@ static int intel_special_constraints(struct perf_event *event, x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && (event->hw.sample_period == 1))) { - bitmap_copy((unsigned long *)idxmsk, - (unsigned long *)bts_constraint.idxmsk, - X86_PMC_IDX_MAX); - return 1; + return &bts_constraint; } - return 0; + return NULL; } -static void intel_get_event_constraints(struct cpu_hw_events *cpuc, - struct perf_event *event, - unsigned long *idxmsk) +static struct event_constraint * +intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) { - const struct event_constraint *c; + struct event_constraint *c; - /* - * cleanup bitmask - */ - bitmap_zero(idxmsk, X86_PMC_IDX_MAX); - - if (intel_special_constraints(event, idxmsk)) - return; + c = intel_special_constraints(event); + if (c) + return c; if (x86_pmu.event_constraints) { for_each_event_constraint(c, x86_pmu.event_constraints) { - if ((event->hw.config & c->cmask) == c->code) { - bitmap_copy(idxmsk, c->idxmsk, X86_PMC_IDX_MAX); - return; - } + if ((event->hw.config & c->cmask) == c->code) + return c; } } - /* no constraints, means supports all generic counters */ - bitmap_fill((unsigned long *)idxmsk, x86_pmu.num_events); + + return &unconstrained; } -static void amd_get_event_constraints(struct cpu_hw_events *cpuc, - struct perf_event *event, - unsigned long *idxmsk) +static struct event_constraint * +amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) { - /* no constraints, means supports all generic counters */ - bitmap_fill(idxmsk, x86_pmu.num_events); + return &unconstrained; } static int x86_event_sched_in(struct perf_event *event, @@ -2576,6 +2566,9 @@ void __init init_hw_perf_events(void) perf_events_lapic_init(); register_die_notifier(&perf_event_nmi_notifier); + unconstrained = (struct event_constraint) + EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1, 0); + pr_info("... version: %d\n", x86_pmu.version); pr_info("... bit width: %d\n", x86_pmu.event_bits); pr_info("... generic registers: %d\n", x86_pmu.num_events); -- cgit v1.1 From 272d30be622c9c6cbd514b1211ff359292001baa Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 22 Jan 2010 16:32:17 +0100 Subject: perf_event: x86: Optimize constraint weight computation Add a weight member to the constraint structure and avoid recomputing the weight at runtime. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian LKML-Reference: <20100122155535.963944926@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 092ad56..2c22ce4 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -23,6 +23,7 @@ #include #include #include +#include #include #include @@ -76,6 +77,7 @@ struct event_constraint { }; int code; int cmask; + int weight; }; struct cpu_hw_events { @@ -95,6 +97,7 @@ struct cpu_hw_events { { .idxmsk64[0] = (n) }, \ .code = (c), \ .cmask = (m), \ + .weight = HWEIGHT64((u64)(n)), \ } #define INTEL_EVENT_CONSTRAINT(c, n) \ @@ -1242,8 +1245,7 @@ static inline int is_x86_event(struct perf_event *event) static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) { - int i, j , w, num; - int weight, wmax; + int i, j, w, num, wmax; struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; struct hw_perf_event *hwc; @@ -1320,8 +1322,7 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) c = constraints[i]; hwc = &cpuc->event_list[i]->hw; - weight = bitmap_weight(c->idxmsk, X86_PMC_IDX_MAX); - if (weight != w) + if (c->weight != w) continue; for_each_bit(j, c->idxmsk, X86_PMC_IDX_MAX) { -- cgit v1.1 From c933c1a603d5bf700ddce79216c1be0ec3bc0e6c Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 22 Jan 2010 16:40:12 +0100 Subject: perf_event: x86: Optimize the fast path a little more Remove num from the fast path and save a few ops. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian LKML-Reference: <20100122155536.056430539@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 2c22ce4..33c889f 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1245,9 +1245,9 @@ static inline int is_x86_event(struct perf_event *event) static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) { - int i, j, w, num, wmax; struct event_constraint *c, *constraints[X86_PMC_IDX_MAX]; unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + int i, j, w, wmax, num = 0; struct hw_perf_event *hwc; bitmap_zero(used_mask, X86_PMC_IDX_MAX); @@ -1260,7 +1260,7 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) /* * fastpath, try to reuse previous register */ - for (i = 0, num = n; i < n; i++, num--) { + for (i = 0; i < n; i++) { hwc = &cpuc->event_list[i]->hw; c = constraints[i]; @@ -1288,7 +1288,7 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) if (assign) assign[i] = hwc->idx; } - if (!num) + if (i == n) goto done; /* -- cgit v1.1 From 6c9687abeb24d5b7aae7db5be070c2139ad29e29 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 25 Jan 2010 11:57:25 +0100 Subject: perf_event: x86: Optimize x86_pmu_disable() x86_pmu_disable() removes the event from the cpuc->event_list[], however since an event can only be on that list once, stop looking after we found it. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 33c889f..66de282 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1884,6 +1884,7 @@ static void x86_pmu_disable(struct perf_event *event) cpuc->event_list[i-1] = cpuc->event_list[i]; --cpuc->n_events; + break; } } perf_event_update_userpage(event); -- cgit v1.1 From 184f412c3341cd24fbd26604634a5800b83dbdc3 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Wed, 27 Jan 2010 08:39:39 +0100 Subject: perf, x86: Clean up event constraints code a bit - Remove stray debug code - Improve ugly macros a bit - Remove some whitespace damage - (Also fix up some accumulated damage in perf_event.h) Signed-off-by: Ingo Molnar Cc: Stephane Eranian Cc: Peter Zijlstra LKML-Reference: --- arch/x86/kernel/cpu/perf_event.c | 37 ++++++++----------------------------- 1 file changed, 8 insertions(+), 29 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 66de282..fdbe248 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -93,24 +93,19 @@ struct cpu_hw_events { struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ }; -#define EVENT_CONSTRAINT(c, n, m) { \ +#define EVENT_CONSTRAINT(c, n, m) { \ { .idxmsk64[0] = (n) }, \ .code = (c), \ .cmask = (m), \ .weight = HWEIGHT64((u64)(n)), \ } -#define INTEL_EVENT_CONSTRAINT(c, n) \ - EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) +#define INTEL_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) +#define FIXED_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, INTEL_ARCH_FIXED_MASK) -#define FIXED_EVENT_CONSTRAINT(c, n) \ - EVENT_CONSTRAINT(c, n, INTEL_ARCH_FIXED_MASK) +#define EVENT_CONSTRAINT_END EVENT_CONSTRAINT(0, 0, 0) -#define EVENT_CONSTRAINT_END \ - EVENT_CONSTRAINT(0, 0, 0) - -#define for_each_event_constraint(e, c) \ - for ((e) = (c); (e)->cmask; (e)++) +#define for_each_event_constraint(e, c) for ((e) = (c); (e)->cmask; (e)++) /* * struct x86_pmu - generic x86 pmu @@ -1276,14 +1271,6 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) if (test_bit(hwc->idx, used_mask)) break; -#if 0 - pr_debug("CPU%d fast config=0x%llx idx=%d assign=%c\n", - smp_processor_id(), - hwc->config, - hwc->idx, - assign ? 'y' : 'n'); -#endif - set_bit(hwc->idx, used_mask); if (assign) assign[i] = hwc->idx; @@ -1333,14 +1320,6 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) if (j == X86_PMC_IDX_MAX) break; -#if 0 - pr_debug("CPU%d slow config=0x%llx idx=%d assign=%c\n", - smp_processor_id(), - hwc->config, - j, - assign ? 'y' : 'n'); -#endif - set_bit(j, used_mask); if (assign) @@ -2596,9 +2575,9 @@ static const struct pmu pmu = { * validate a single event group * * validation include: - * - check events are compatible which each other - * - events do not compete for the same counter - * - number of events <= number of counters + * - check events are compatible which each other + * - events do not compete for the same counter + * - number of events <= number of counters * * validation ensures the group can be loaded onto the * PMU if it was the only group available. -- cgit v1.1 From 2e8418736dff9c6fdadb2f87dcc2087cebf32167 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 25 Jan 2010 15:58:43 +0100 Subject: perf_event: x86: Deduplicate the disable code Share the meat of the x86_pmu_disable() code with hw_perf_enable(). Also remove the barrier() from that code, since I could not convince myself we actually need it. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 29 +++++++++++++---------------- 1 file changed, 13 insertions(+), 16 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index fdbe248..07fa0c2 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1401,6 +1401,8 @@ static inline void x86_assign_hw_event(struct perf_event *event, } } +static void __x86_pmu_disable(struct perf_event *event, struct cpu_hw_events *cpuc); + void hw_perf_enable(void) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -1426,13 +1428,7 @@ void hw_perf_enable(void) if (hwc->idx == -1 || hwc->idx == cpuc->assign[i]) continue; - x86_pmu.disable(hwc, hwc->idx); - - clear_bit(hwc->idx, cpuc->active_mask); - barrier(); - cpuc->events[hwc->idx] = NULL; - - x86_perf_event_update(event, hwc, hwc->idx); + __x86_pmu_disable(event, cpuc); hwc->idx = -1; } @@ -1822,11 +1818,10 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc) event->pending_kill = POLL_IN; } -static void x86_pmu_disable(struct perf_event *event) +static void __x86_pmu_disable(struct perf_event *event, struct cpu_hw_events *cpuc) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - int i, idx = hwc->idx; + int idx = hwc->idx; /* * Must be done before we disable, otherwise the nmi handler @@ -1836,12 +1831,6 @@ static void x86_pmu_disable(struct perf_event *event) x86_pmu.disable(hwc, idx); /* - * Make sure the cleared pointer becomes visible before we - * (potentially) free the event: - */ - barrier(); - - /* * Drain the remaining delta count out of a event * that we are disabling: */ @@ -1852,6 +1841,14 @@ static void x86_pmu_disable(struct perf_event *event) intel_pmu_drain_bts_buffer(cpuc); cpuc->events[idx] = NULL; +} + +static void x86_pmu_disable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + int i; + + __x86_pmu_disable(event, cpuc); for (i = 0; i < cpuc->n_events; i++) { if (event == cpuc->event_list[i]) { -- cgit v1.1 From ed8777fc132e589d48a0ba854fdbb5d8203b58e5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 27 Jan 2010 23:07:46 +0100 Subject: perf_events, x86: Fix event constraint masks Since constraints are specified on the event number, not number and unit mask shorten the constraint masks so that we'll actually match something. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian LKML-Reference: <20100127221121.967610372@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event.h | 2 +- arch/x86/kernel/cpu/perf_event.c | 13 +++++++++---- 2 files changed, 10 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index dbc0826..ff5ede1 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -49,7 +49,7 @@ INTEL_ARCH_INV_MASK| \ INTEL_ARCH_EDGE_MASK|\ INTEL_ARCH_UNIT_MASK|\ - INTEL_ARCH_EVENT_MASK) + INTEL_ARCH_EVTSEL_MASK) #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 07fa0c2..951213a 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -100,12 +100,17 @@ struct cpu_hw_events { .weight = HWEIGHT64((u64)(n)), \ } -#define INTEL_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) -#define FIXED_EVENT_CONSTRAINT(c, n) EVENT_CONSTRAINT(c, n, INTEL_ARCH_FIXED_MASK) +#define INTEL_EVENT_CONSTRAINT(c, n) \ + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK) -#define EVENT_CONSTRAINT_END EVENT_CONSTRAINT(0, 0, 0) +#define FIXED_EVENT_CONSTRAINT(c, n) \ + EVENT_CONSTRAINT(c, n, INTEL_ARCH_FIXED_MASK) -#define for_each_event_constraint(e, c) for ((e) = (c); (e)->cmask; (e)++) +#define EVENT_CONSTRAINT_END \ + EVENT_CONSTRAINT(0, 0, 0) + +#define for_each_event_constraint(e, c) \ + for ((e) = (c); (e)->cmask; (e)++) /* * struct x86_pmu - generic x86 pmu -- cgit v1.1 From 1a6e21f791fe85b40a9ddbafe999ab8ccffc3f78 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 27 Jan 2010 23:07:47 +0100 Subject: perf_events, x86: Clean up hw_perf_*_all() implementation Put the recursion avoidance code in the generic hook instead of replicating it in each implementation. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian LKML-Reference: <20100127221122.057507285@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 59 ++++++++++------------------------------ 1 file changed, 14 insertions(+), 45 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 951213a..cf10839 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1099,15 +1099,8 @@ static int __hw_perf_event_init(struct perf_event *event) static void p6_pmu_disable_all(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); u64 val; - if (!cpuc->enabled) - return; - - cpuc->enabled = 0; - barrier(); - /* p6 only has one enable register */ rdmsrl(MSR_P6_EVNTSEL0, val); val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; @@ -1118,12 +1111,6 @@ static void intel_pmu_disable_all(void) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - if (!cpuc->enabled) - return; - - cpuc->enabled = 0; - barrier(); - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) @@ -1135,17 +1122,6 @@ static void amd_pmu_disable_all(void) struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int idx; - if (!cpuc->enabled) - return; - - cpuc->enabled = 0; - /* - * ensure we write the disable before we start disabling the - * events proper, so that amd_pmu_enable_event() does the - * right thing. - */ - barrier(); - for (idx = 0; idx < x86_pmu.num_events; idx++) { u64 val; @@ -1166,23 +1142,20 @@ void hw_perf_disable(void) if (!x86_pmu_initialized()) return; - if (cpuc->enabled) - cpuc->n_added = 0; + if (!cpuc->enabled) + return; + + cpuc->n_added = 0; + cpuc->enabled = 0; + barrier(); x86_pmu.disable_all(); } static void p6_pmu_enable_all(void) { - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); unsigned long val; - if (cpuc->enabled) - return; - - cpuc->enabled = 1; - barrier(); - /* p6 only has one enable register */ rdmsrl(MSR_P6_EVNTSEL0, val); val |= ARCH_PERFMON_EVENTSEL0_ENABLE; @@ -1193,12 +1166,6 @@ static void intel_pmu_enable_all(void) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - if (cpuc->enabled) - return; - - cpuc->enabled = 1; - barrier(); - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { @@ -1217,12 +1184,6 @@ static void amd_pmu_enable_all(void) struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int idx; - if (cpuc->enabled) - return; - - cpuc->enabled = 1; - barrier(); - for (idx = 0; idx < x86_pmu.num_events; idx++) { struct perf_event *event = cpuc->events[idx]; u64 val; @@ -1417,6 +1378,10 @@ void hw_perf_enable(void) if (!x86_pmu_initialized()) return; + + if (cpuc->enabled) + return; + if (cpuc->n_added) { /* * apply assignment obtained either from @@ -1461,6 +1426,10 @@ void hw_perf_enable(void) cpuc->n_added = 0; perf_events_lapic_init(); } + + cpuc->enabled = 1; + barrier(); + x86_pmu.enable_all(); } -- cgit v1.1 From 452a339a976e7f782c786eb3f73080401e2fa3a6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 27 Jan 2010 23:07:48 +0100 Subject: perf_events, x86: Implement Intel Westmere support The new Intel documentation includes Westmere arch specific event maps that are significantly different from the Nehalem ones. Add support for this generation. Found the CPUID model numbers on wikipedia. Also ammend some Nehalem constraints, spotted those when looking for the differences between Nehalem and Westmere. Signed-off-by: Peter Zijlstra Cc: Arjan van de Ven Cc: "H. Peter Anvin" Cc: Stephane Eranian LKML-Reference: <20100127221122.151865645@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 124 ++++++++++++++++++++++++++++++++++++--- 1 file changed, 117 insertions(+), 7 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index cf10839..3fac0bf 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -244,18 +244,26 @@ static struct event_constraint intel_core_event_constraints[] = static struct event_constraint intel_nehalem_event_constraints[] = { - FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ - FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ + FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ + FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */ + INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */ INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */ - INTEL_EVENT_CONSTRAINT(0x4c, 0x3), /* LOAD_HIT_PRE */ INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ - INTEL_EVENT_CONSTRAINT(0x52, 0x3), /* L1D_CACHE_PREFETCH_LOCK_FB_HIT */ - INTEL_EVENT_CONSTRAINT(0x53, 0x3), /* L1D_CACHE_LOCK_FB_HIT */ - INTEL_EVENT_CONSTRAINT(0xc5, 0x3), /* CACHE_LOCK_CYCLES */ + INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ + EVENT_CONSTRAINT_END +}; + +static struct event_constraint intel_westmere_event_constraints[] = +{ + FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ + FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ + INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ + INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ + INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ EVENT_CONSTRAINT_END }; @@ -286,6 +294,97 @@ static u64 __read_mostly hw_cache_event_ids [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX]; +static __initconst u64 westmere_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ + [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ + [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ + [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ + [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ + [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ + [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ + [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ + [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ + [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ + [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.ANY */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ + [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + static __initconst u64 nehalem_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] @@ -2423,7 +2522,9 @@ static __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_core_event_constraints; pr_cont("Core2 events, "); break; - case 26: + + case 26: /* 45 nm nehalem, "Bloomfield" */ + case 30: /* 45 nm nehalem, "Lynnfield" */ memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -2437,6 +2538,15 @@ static __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_gen_event_constraints; pr_cont("Atom events, "); break; + + case 37: /* 32 nm nehalem, "Clarkdale" */ + case 44: /* 32 nm nehalem, "Gulftown" */ + memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + + x86_pmu.event_constraints = intel_westmere_event_constraints; + pr_cont("Westmere events, "); + break; default: /* * default constraints for v2 and up -- cgit v1.1 From 18c01f8abff51e4910cc5ffb4b710e8c6eea60c9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 27 Jan 2010 23:07:49 +0100 Subject: perf_events, x86: Remove spurious counter reset from x86_pmu_enable() At enable time the counter might still have a ->idx pointing to a previously occupied location that might now be taken by another event. Resetting the counter at that location with data from this event will destroy the other counter's count. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian LKML-Reference: <20100127221122.261477183@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 3fac0bf..518eb3e 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1762,9 +1762,6 @@ static int x86_pmu_enable(struct perf_event *event) cpuc->n_events = n; cpuc->n_added = n - n0; - if (hwc->idx != -1) - x86_perf_event_set_period(event, hwc, hwc->idx); - return 0; } -- cgit v1.1 From ab09809f2eee1dc2d8f8bea636e77d176ba6c648 Mon Sep 17 00:00:00 2001 From: Andy Shevchenko Date: Tue, 2 Feb 2010 14:38:12 -0800 Subject: x86, doc: Fix minor spelling error in arch/x86/mm/gup.c Fix minor spelling error in comment. No code change. Signed-off-by: Andy Shevchenko LKML-Reference: <201002022238.o12McDiF018720@imap1.linux-foundation.org> Cc: Ingo Molnar Signed-off-by: Andrew Morton Signed-off-by: H. Peter Anvin --- arch/x86/mm/gup.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/mm/gup.c b/arch/x86/mm/gup.c index 71da1bc..738e659 100644 --- a/arch/x86/mm/gup.c +++ b/arch/x86/mm/gup.c @@ -18,7 +18,7 @@ static inline pte_t gup_get_pte(pte_t *ptep) #else /* * With get_user_pages_fast, we walk down the pagetables without taking - * any locks. For this we would like to load the pointers atoimcally, + * any locks. For this we would like to load the pointers atomically, * but that is not possible (without expensive cmpxchg8b) on PAE. What * we do have is the guarantee that a pte will only either go from not * present to present, or present to not present or both -- it will not -- cgit v1.1 From 615d0ebbc782b67296e3226c293f520f93f93515 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 2 Feb 2010 16:49:04 -0500 Subject: kprobes: Disable booster when CONFIG_PREEMPT=y Disable kprobe booster when CONFIG_PREEMPT=y at this time, because it can't ensure that all kernel threads preempted on kprobe's boosted slot run out from the slot even using freeze_processes(). The booster on preemptive kernel will be resumed if synchronize_tasks() or something like that is introduced. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Ananth N Mavinakayanahalli Cc: Frederic Weisbecker Cc: Jim Keniston Cc: Mathieu Desnoyers Cc: Steven Rostedt LKML-Reference: <20100202214904.4694.24330.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 5b8c750..9453815 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -429,7 +429,7 @@ void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, static void __kprobes setup_singlestep(struct kprobe *p, struct pt_regs *regs, struct kprobe_ctlblk *kcb) { -#if !defined(CONFIG_PREEMPT) || defined(CONFIG_FREEZER) +#if !defined(CONFIG_PREEMPT) if (p->ainsn.boostable == 1 && !p->post_handler) { /* Boost up -- we can execute copied instructions directly */ reset_current_kprobe(); -- cgit v1.1 From 2cfa19780d61740f65790c5bae363b759d7c96fa Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 2 Feb 2010 16:49:11 -0500 Subject: ftrace/alternatives: Introducing *_text_reserved functions Introducing *_text_reserved functions for checking the text address range is partially reserved or not. This patch provides checking routines for x86 smp alternatives and dynamic ftrace. Since both functions modify fixed pieces of kernel text, they should reserve and protect those from other dynamic text modifier, like kprobes. This will also be extended when introducing other subsystems which modify fixed pieces of kernel text. Dynamic text modifiers should avoid those. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Steven Rostedt Cc: przemyslaw@pawelczyk.it Cc: Frederic Weisbecker Cc: Ananth N Mavinakayanahalli Cc: Jim Keniston Cc: Mathieu Desnoyers Cc: Jason Baron LKML-Reference: <20100202214911.4694.16587.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/alternative.h | 5 +++++ arch/x86/kernel/alternative.c | 16 ++++++++++++++++ 2 files changed, 21 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/alternative.h b/arch/x86/include/asm/alternative.h index 69b74a7..ac80b7d 100644 --- a/arch/x86/include/asm/alternative.h +++ b/arch/x86/include/asm/alternative.h @@ -65,12 +65,17 @@ extern void alternatives_smp_module_add(struct module *mod, char *name, void *text, void *text_end); extern void alternatives_smp_module_del(struct module *mod); extern void alternatives_smp_switch(int smp); +extern int alternatives_text_reserved(void *start, void *end); #else static inline void alternatives_smp_module_add(struct module *mod, char *name, void *locks, void *locks_end, void *text, void *text_end) {} static inline void alternatives_smp_module_del(struct module *mod) {} static inline void alternatives_smp_switch(int smp) {} +static inline int alternatives_text_reserved(void *start, void *end) +{ + return 0; +} #endif /* CONFIG_SMP */ /* alternative assembly primitive: */ diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index de7353c..3c13284 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -390,6 +390,22 @@ void alternatives_smp_switch(int smp) mutex_unlock(&smp_alt); } +/* Return 1 if the address range is reserved for smp-alternatives */ +int alternatives_text_reserved(void *start, void *end) +{ + struct smp_alt_module *mod; + u8 **ptr; + + list_for_each_entry(mod, &smp_alt_modules, next) { + if (mod->text > end || mod->text_end < start) + continue; + for (ptr = mod->locks; ptr < mod->locks_end; ptr++) + if (start <= *ptr && end >= *ptr) + return 1; + } + + return 0; +} #endif #ifdef CONFIG_PARAVIRT -- cgit v1.1 From 4554dbcb85a4ed2abaa2b6fa15649b796699ec89 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Tue, 2 Feb 2010 16:49:18 -0500 Subject: kprobes: Check probe address is reserved Check whether the address of new probe is already reserved by ftrace or alternatives (on x86) when registering new probe. If reserved, it returns an error and not register the probe. Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE Cc: Steven Rostedt Cc: przemyslaw@pawelczyk.it Cc: Frederic Weisbecker Cc: Ananth N Mavinakayanahalli Cc: Jim Keniston Cc: Mathieu Desnoyers Cc: Jason Baron LKML-Reference: <20100202214918.4694.94179.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/kprobes.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 9453815..5de9f4a 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -337,6 +337,9 @@ static void __kprobes arch_copy_kprobe(struct kprobe *p) int __kprobes arch_prepare_kprobe(struct kprobe *p) { + if (alternatives_text_reserved(p->addr, p->addr)) + return -EINVAL; + if (!can_probe((unsigned long)p->addr)) return -EILSEQ; /* insn: must be on special executable page on x86. */ -- cgit v1.1 From 8c48e444191de0ff84e85d41180d7bc3e74f14ef Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 29 Jan 2010 13:25:31 +0100 Subject: perf_events, x86: Implement intel core solo/duo support Implement Intel Core Solo/Duo, aka. Intel Architectural Performance Monitoring Version 1. Signed-off-by: Peter Zijlstra Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker Cc: Arjan van de Ven LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 133 ++++++++++++++++++--------------------- 1 file changed, 61 insertions(+), 72 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 1846ead..5b91992 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -228,6 +228,17 @@ static const u64 intel_perfmon_event_map[] = static struct event_constraint intel_core_event_constraints[] = { + INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ + INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ + INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ + INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ + INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ + INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */ + EVENT_CONSTRAINT_END +}; + +static struct event_constraint intel_core2_event_constraints[] = +{ FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ @@ -1216,7 +1227,7 @@ static void intel_pmu_disable_all(void) intel_pmu_disable_bts(); } -static void amd_pmu_disable_all(void) +static void x86_pmu_disable_all(void) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int idx; @@ -1226,11 +1237,11 @@ static void amd_pmu_disable_all(void) if (!test_bit(idx, cpuc->active_mask)) continue; - rdmsrl(MSR_K7_EVNTSEL0 + idx, val); + rdmsrl(x86_pmu.eventsel + idx, val); if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE)) continue; val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; - wrmsrl(MSR_K7_EVNTSEL0 + idx, val); + wrmsrl(x86_pmu.eventsel + idx, val); } } @@ -1278,7 +1289,7 @@ static void intel_pmu_enable_all(void) } } -static void amd_pmu_enable_all(void) +static void x86_pmu_enable_all(void) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int idx; @@ -1292,7 +1303,7 @@ static void amd_pmu_enable_all(void) val = event->hw.config; val |= ARCH_PERFMON_EVENTSEL0_ENABLE; - wrmsrl(MSR_K7_EVNTSEL0 + idx, val); + wrmsrl(x86_pmu.eventsel + idx, val); } } @@ -1546,7 +1557,7 @@ static inline void intel_pmu_ack_status(u64 ack) wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); } -static inline void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) +static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) { (void)checking_wrmsrl(hwc->config_base + idx, hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); @@ -1598,12 +1609,6 @@ intel_pmu_disable_event(struct hw_perf_event *hwc, int idx) x86_pmu_disable_event(hwc, idx); } -static inline void -amd_pmu_disable_event(struct hw_perf_event *hwc, int idx) -{ - x86_pmu_disable_event(hwc, idx); -} - static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); /* @@ -1723,15 +1728,14 @@ static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx) return; } - x86_pmu_enable_event(hwc, idx); + __x86_pmu_enable_event(hwc, idx); } -static void amd_pmu_enable_event(struct hw_perf_event *hwc, int idx) +static void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - if (cpuc->enabled) - x86_pmu_enable_event(hwc, idx); + __x86_pmu_enable_event(hwc, idx); } /* @@ -1988,50 +1992,6 @@ static void intel_pmu_reset(void) local_irq_restore(flags); } -static int p6_pmu_handle_irq(struct pt_regs *regs) -{ - struct perf_sample_data data; - struct cpu_hw_events *cpuc; - struct perf_event *event; - struct hw_perf_event *hwc; - int idx, handled = 0; - u64 val; - - data.addr = 0; - data.raw = NULL; - - cpuc = &__get_cpu_var(cpu_hw_events); - - for (idx = 0; idx < x86_pmu.num_events; idx++) { - if (!test_bit(idx, cpuc->active_mask)) - continue; - - event = cpuc->events[idx]; - hwc = &event->hw; - - val = x86_perf_event_update(event, hwc, idx); - if (val & (1ULL << (x86_pmu.event_bits - 1))) - continue; - - /* - * event overflow - */ - handled = 1; - data.period = event->hw.last_period; - - if (!x86_perf_event_set_period(event, hwc, idx)) - continue; - - if (perf_event_overflow(event, 1, &data, regs)) - p6_pmu_disable_event(hwc, idx); - } - - if (handled) - inc_irq_stat(apic_perf_irqs); - - return handled; -} - /* * This handler is triggered by the local APIC, so the APIC IRQ handling * rules apply: @@ -2098,7 +2058,7 @@ again: return 1; } -static int amd_pmu_handle_irq(struct pt_regs *regs) +static int x86_pmu_handle_irq(struct pt_regs *regs) { struct perf_sample_data data; struct cpu_hw_events *cpuc; @@ -2133,7 +2093,7 @@ static int amd_pmu_handle_irq(struct pt_regs *regs) continue; if (perf_event_overflow(event, 1, &data, regs)) - amd_pmu_disable_event(hwc, idx); + x86_pmu.disable(hwc, idx); } if (handled) @@ -2374,7 +2334,7 @@ static __read_mostly struct notifier_block perf_event_nmi_notifier = { static __initconst struct x86_pmu p6_pmu = { .name = "p6", - .handle_irq = p6_pmu_handle_irq, + .handle_irq = x86_pmu_handle_irq, .disable_all = p6_pmu_disable_all, .enable_all = p6_pmu_enable_all, .enable = p6_pmu_enable_event, @@ -2401,6 +2361,29 @@ static __initconst struct x86_pmu p6_pmu = { .event_constraints = intel_p6_event_constraints }; +static __initconst struct x86_pmu core_pmu = { + .name = "core", + .handle_irq = x86_pmu_handle_irq, + .disable_all = x86_pmu_disable_all, + .enable_all = x86_pmu_enable_all, + .enable = x86_pmu_enable_event, + .disable = x86_pmu_disable_event, + .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, + .perfctr = MSR_ARCH_PERFMON_PERFCTR0, + .event_map = intel_pmu_event_map, + .raw_event = intel_pmu_raw_event, + .max_events = ARRAY_SIZE(intel_perfmon_event_map), + .apic = 1, + /* + * Intel PMCs cannot be accessed sanely above 32 bit width, + * so we install an artificial 1<<31 period regardless of + * the generic event period: + */ + .max_period = (1ULL << 31) - 1, + .get_event_constraints = intel_get_event_constraints, + .event_constraints = intel_core_event_constraints, +}; + static __initconst struct x86_pmu intel_pmu = { .name = "Intel", .handle_irq = intel_pmu_handle_irq, @@ -2427,11 +2410,11 @@ static __initconst struct x86_pmu intel_pmu = { static __initconst struct x86_pmu amd_pmu = { .name = "AMD", - .handle_irq = amd_pmu_handle_irq, - .disable_all = amd_pmu_disable_all, - .enable_all = amd_pmu_enable_all, - .enable = amd_pmu_enable_event, - .disable = amd_pmu_disable_event, + .handle_irq = x86_pmu_handle_irq, + .disable_all = x86_pmu_disable_all, + .enable_all = x86_pmu_enable_all, + .enable = x86_pmu_enable_event, + .disable = x86_pmu_disable_event, .eventsel = MSR_K7_EVNTSEL0, .perfctr = MSR_K7_PERFCTR0, .event_map = amd_pmu_event_map, @@ -2498,9 +2481,10 @@ static __init int intel_pmu_init(void) version = eax.split.version_id; if (version < 2) - return -ENODEV; + x86_pmu = core_pmu; + else + x86_pmu = intel_pmu; - x86_pmu = intel_pmu; x86_pmu.version = version; x86_pmu.num_events = eax.split.num_events; x86_pmu.event_bits = eax.split.bit_width; @@ -2510,12 +2494,17 @@ static __init int intel_pmu_init(void) * Quirk: v2 perfmon does not report fixed-purpose events, so * assume at least 3 events: */ - x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); + if (version > 1) + x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); /* * Install the hw-cache-events table: */ switch (boot_cpu_data.x86_model) { + case 14: /* 65 nm core solo/duo, "Yonah" */ + pr_cont("Core events, "); + break; + case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ @@ -2523,7 +2512,7 @@ static __init int intel_pmu_init(void) memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, sizeof(hw_cache_event_ids)); - x86_pmu.event_constraints = intel_core_event_constraints; + x86_pmu.event_constraints = intel_core2_event_constraints; pr_cont("Core2 events, "); break; -- cgit v1.1 From fce877e3a429940a986e085a41e8b57f2d922e36 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 29 Jan 2010 13:25:12 +0100 Subject: bitops: Ensure the compile time HWEIGHT is only used for such Avoid accidental misuse by failing to compile things Suggested-by: Andrew Morton Signed-off-by: Peter Zijlstra Cc: Linus Torvalds LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 5b91992..96cfc1a 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -93,13 +93,16 @@ struct cpu_hw_events { struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ }; -#define EVENT_CONSTRAINT(c, n, m) { \ +#define __EVENT_CONSTRAINT(c, n, m, w) {\ { .idxmsk64[0] = (n) }, \ .code = (c), \ .cmask = (m), \ - .weight = HWEIGHT64((u64)(n)), \ + .weight = (w), \ } +#define EVENT_CONSTRAINT(c, n, m) \ + __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) + #define INTEL_EVENT_CONSTRAINT(c, n) \ EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK) @@ -2622,7 +2625,8 @@ void __init init_hw_perf_events(void) register_die_notifier(&perf_event_nmi_notifier); unconstrained = (struct event_constraint) - EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1, 0); + __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1, + 0, x86_pmu.num_events); pr_info("... version: %d\n", x86_pmu.version); pr_info("... bit width: %d\n", x86_pmu.event_bits); -- cgit v1.1 From 447a194b393f32699607fd99617a40abd6a95114 Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Mon, 1 Feb 2010 14:50:01 +0200 Subject: perf_events, x86: Fix bug in hw_perf_enable() We cannot assume that because hwc->idx == assign[i], we can avoid reprogramming the counter in hw_perf_enable(). The event may have been scheduled out and another event may have been programmed into this counter. Thus, we need a more robust way of verifying if the counter still contains config/data related to an event. This patch adds a generation number to each counter on each cpu. Using this mechanism we can verify reliabilty whether the content of a counter corresponds to an event. Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <4b66dc67.0b38560a.1635.ffffae18@mx.google.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 34 ++++++++++++++++++++++++++++------ 1 file changed, 28 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 96cfc1a..a920f17 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -90,6 +90,7 @@ struct cpu_hw_events { int n_events; int n_added; int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ + u64 tags[X86_PMC_IDX_MAX]; struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ }; @@ -1142,6 +1143,8 @@ static int __hw_perf_event_init(struct perf_event *event) hwc->config = ARCH_PERFMON_EVENTSEL_INT; hwc->idx = -1; + hwc->last_cpu = -1; + hwc->last_tag = ~0ULL; /* * Count user and OS events unless requested not to. @@ -1457,11 +1460,14 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, return n; } - static inline void x86_assign_hw_event(struct perf_event *event, - struct hw_perf_event *hwc, int idx) + struct cpu_hw_events *cpuc, int i) { - hwc->idx = idx; + struct hw_perf_event *hwc = &event->hw; + + hwc->idx = cpuc->assign[i]; + hwc->last_cpu = smp_processor_id(); + hwc->last_tag = ++cpuc->tags[i]; if (hwc->idx == X86_PMC_IDX_FIXED_BTS) { hwc->config_base = 0; @@ -1480,6 +1486,15 @@ static inline void x86_assign_hw_event(struct perf_event *event, } } +static inline int match_prev_assignment(struct hw_perf_event *hwc, + struct cpu_hw_events *cpuc, + int i) +{ + return hwc->idx == cpuc->assign[i] && + hwc->last_cpu == smp_processor_id() && + hwc->last_tag == cpuc->tags[i]; +} + static void __x86_pmu_disable(struct perf_event *event, struct cpu_hw_events *cpuc); void hw_perf_enable(void) @@ -1508,7 +1523,14 @@ void hw_perf_enable(void) event = cpuc->event_list[i]; hwc = &event->hw; - if (hwc->idx == -1 || hwc->idx == cpuc->assign[i]) + /* + * we can avoid reprogramming counter if: + * - assigned same counter as last time + * - running on same CPU as last time + * - no other event has used the counter since + */ + if (hwc->idx == -1 || + match_prev_assignment(hwc, cpuc, i)) continue; __x86_pmu_disable(event, cpuc); @@ -1522,12 +1544,12 @@ void hw_perf_enable(void) hwc = &event->hw; if (hwc->idx == -1) { - x86_assign_hw_event(event, hwc, cpuc->assign[i]); + x86_assign_hw_event(event, cpuc, i); x86_perf_event_set_period(event, hwc, hwc->idx); } /* * need to mark as active because x86_pmu_disable() - * clear active_mask and eventsp[] yet it preserves + * clear active_mask and events[] yet it preserves * idx */ set_bit(hwc->idx, cpuc->active_mask); -- cgit v1.1 From 076dc4a65a6d99a16979e2c7917e669fb8c91ee5 Mon Sep 17 00:00:00 2001 From: Masami Hiramatsu Date: Fri, 5 Feb 2010 12:16:47 -0500 Subject: x86/alternatives: Fix build warning Fixes these warnings: arch/x86/kernel/alternative.c: In function 'alternatives_text_reserved': arch/x86/kernel/alternative.c:402: warning: comparison of distinct pointer types lacks a cast arch/x86/kernel/alternative.c:402: warning: comparison of distinct pointer types lacks a cast arch/x86/kernel/alternative.c:405: warning: comparison of distinct pointer types lacks a cast arch/x86/kernel/alternative.c:405: warning: comparison of distinct pointer types lacks a cast Caused by: 2cfa197: ftrace/alternatives: Introducing *_text_reserved functions Changes in v2: - Use local variables to compare, instead of type casts. Reported-by: Ingo Molnar Signed-off-by: Masami Hiramatsu Cc: systemtap Cc: DLE LKML-Reference: <20100205171647.15750.37221.stgit@dhcp-100-2-132.bos.redhat.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/alternative.c | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/alternative.c b/arch/x86/kernel/alternative.c index 3c13284..e63b80e 100644 --- a/arch/x86/kernel/alternative.c +++ b/arch/x86/kernel/alternative.c @@ -395,12 +395,14 @@ int alternatives_text_reserved(void *start, void *end) { struct smp_alt_module *mod; u8 **ptr; + u8 *text_start = start; + u8 *text_end = end; list_for_each_entry(mod, &smp_alt_modules, next) { - if (mod->text > end || mod->text_end < start) + if (mod->text > text_end || mod->text_end < text_start) continue; for (ptr = mod->locks; ptr < mod->locks_end; ptr++) - if (start <= *ptr && end >= *ptr) + if (text_start <= *ptr && text_end >= *ptr) return 1; } -- cgit v1.1 From 923de3cf5bf12049628019010e36623fca5ef6d1 Mon Sep 17 00:00:00 2001 From: Jason Wang Date: Wed, 27 Jan 2010 19:13:49 +0800 Subject: kvmclock: count total_sleep_time when updating guest clock Current kvm wallclock does not consider the total_sleep_time which could cause wrong wallclock in guest after host suspend/resume. This patch solve this issue by counting total_sleep_time to get the correct host boot time. Cc: stable@kernel.org Signed-off-by: Jason Wang Acked-by: Glauber Costa Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/x86.c | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 1ddcad4..a1e1bc9 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -670,7 +670,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) { static int version; struct pvclock_wall_clock wc; - struct timespec now, sys, boot; + struct timespec boot; if (!wall_clock) return; @@ -685,9 +685,7 @@ static void kvm_write_wall_clock(struct kvm *kvm, gpa_t wall_clock) * wall clock specified here. guest system time equals host * system time for us, thus we must fill in host boot time here. */ - now = current_kernel_time(); - ktime_get_ts(&sys); - boot = ns_to_timespec(timespec_to_ns(&now) - timespec_to_ns(&sys)); + getboottime(&boot); wc.sec = boot.tv_sec; wc.nsec = boot.tv_nsec; @@ -762,6 +760,7 @@ static void kvm_write_guest_time(struct kvm_vcpu *v) local_irq_save(flags); kvm_get_msr(v, MSR_IA32_TSC, &vcpu->hv_clock.tsc_timestamp); ktime_get_ts(&ts); + monotonic_to_bootbased(&ts); local_irq_restore(flags); /* With all the info we got, fill in the values */ -- cgit v1.1 From ee73f656a604d5aa9df86a97102e4e462dd79924 Mon Sep 17 00:00:00 2001 From: Marcelo Tosatti Date: Fri, 29 Jan 2010 17:28:41 -0200 Subject: KVM: PIT: control word is write-only PIT control word (address 0x43) is write-only, reads are undefined. Cc: stable@kernel.org Signed-off-by: Marcelo Tosatti --- arch/x86/kvm/i8254.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kvm/i8254.c b/arch/x86/kvm/i8254.c index 296aba4..15578f1 100644 --- a/arch/x86/kvm/i8254.c +++ b/arch/x86/kvm/i8254.c @@ -467,6 +467,9 @@ static int pit_ioport_read(struct kvm_io_device *this, return -EOPNOTSUPP; addr &= KVM_PIT_CHANNEL_MASK; + if (addr == 3) + return 0; + s = &pit_state->channels[addr]; mutex_lock(&pit_state->lock); -- cgit v1.1 From cf9db6c41f739a294286847aab1e85f39aef1781 Mon Sep 17 00:00:00 2001 From: "Serge E. Hallyn" Date: Mon, 8 Feb 2010 20:35:02 -0600 Subject: x86-32: Make AT_VECTOR_SIZE_ARCH=2 Both x86-32 and x86-64 with 32-bit compat use ARCH_DLINFO_IA32, which defines two saved_auxv entries. But system.h only defines AT_VECTOR_SIZE_ARCH as 2 for CONFIG_IA32_EMULATION, not for CONFIG_X86_32. Fix that. Signed-off-by: Serge E. Hallyn LKML-Reference: <20100209023502.GA15408@us.ibm.com> Signed-off-by: H. Peter Anvin --- arch/x86/include/asm/system.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/system.h b/arch/x86/include/asm/system.h index ecb544e..e04740f 100644 --- a/arch/x86/include/asm/system.h +++ b/arch/x86/include/asm/system.h @@ -11,9 +11,9 @@ #include /* entries in ARCH_DLINFO: */ -#ifdef CONFIG_IA32_EMULATION +#if defined(CONFIG_IA32_EMULATION) || !defined(CONFIG_X86_64) # define AT_VECTOR_SIZE_ARCH 2 -#else +#else /* else it's non-compat x86-64 */ # define AT_VECTOR_SIZE_ARCH 1 #endif -- cgit v1.1 From 681ee44d40d7c93b42118320e4620d07d8704fd6 Mon Sep 17 00:00:00 2001 From: Suresh Siddha Date: Tue, 9 Feb 2010 18:01:44 -0800 Subject: x86, apic: Don't use logical-flat mode when CPU hotplug may exceed 8 CPUs We need to fall back from logical-flat APIC mode to physical-flat mode when we have more than 8 CPUs. However, in the presence of CPU hotplug(with bios listing not enabled but possible cpus as disabled cpus in MADT), we have to consider the number of possible CPUs rather than the number of current CPUs; otherwise we may cross the 8-CPU boundary when CPUs are added later. 32bit apic code can use more cleanups (like the removal of vendor checks in 32bit default_setup_apic_routing()) and more unifications with 64bit code. Yinghai has some patches in works already. This patch addresses the boot issue that is reported in the virtualization guest context. [ hpa: incorporated function annotation feedback from Yinghai Lu ] Signed-off-by: Suresh Siddha LKML-Reference: <1265767304.2833.19.camel@sbs-t61.sc.intel.com> Acked-by: Shaohui Zheng Reviewed-by: Yinghai Lu Cc: Signed-off-by: H. Peter Anvin --- arch/x86/kernel/acpi/boot.c | 5 ----- arch/x86/kernel/apic/apic.c | 17 ----------------- arch/x86/kernel/apic/probe_32.c | 29 +++++++++++++++++++++++++++-- arch/x86/kernel/apic/probe_64.c | 2 +- arch/x86/kernel/mpparse.c | 7 ------- arch/x86/kernel/smpboot.c | 2 -- 6 files changed, 28 insertions(+), 34 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 036d28a..0acbcdf 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1185,9 +1185,6 @@ static void __init acpi_process_madt(void) if (!error) { acpi_lapic = 1; -#ifdef CONFIG_X86_BIGSMP - generic_bigsmp_probe(); -#endif /* * Parse MADT IO-APIC entries */ @@ -1197,8 +1194,6 @@ static void __init acpi_process_madt(void) acpi_ioapic = 1; smp_found_config = 1; - if (apic->setup_apic_routing) - apic->setup_apic_routing(); } } if (error == -EINVAL) { diff --git a/arch/x86/kernel/apic/apic.c b/arch/x86/kernel/apic/apic.c index 3987e44..dfca210 100644 --- a/arch/x86/kernel/apic/apic.c +++ b/arch/x86/kernel/apic/apic.c @@ -1641,9 +1641,7 @@ int __init APIC_init_uniprocessor(void) #endif enable_IR_x2apic(); -#ifdef CONFIG_X86_64 default_setup_apic_routing(); -#endif verify_local_APIC(); connect_bsp_APIC(); @@ -1891,21 +1889,6 @@ void __cpuinit generic_processor_info(int apicid, int version) if (apicid > max_physical_apicid) max_physical_apicid = apicid; -#ifdef CONFIG_X86_32 - if (num_processors > 8) { - switch (boot_cpu_data.x86_vendor) { - case X86_VENDOR_INTEL: - if (!APIC_XAPIC(version)) { - def_to_bigsmp = 0; - break; - } - /* If P4 and above fall through */ - case X86_VENDOR_AMD: - def_to_bigsmp = 1; - } - } -#endif - #if defined(CONFIG_SMP) || defined(CONFIG_X86_64) early_per_cpu(x86_cpu_to_apicid, cpu) = apicid; early_per_cpu(x86_bios_cpu_apicid, cpu) = apicid; diff --git a/arch/x86/kernel/apic/probe_32.c b/arch/x86/kernel/apic/probe_32.c index 1a6559f..99d2fe0 100644 --- a/arch/x86/kernel/apic/probe_32.c +++ b/arch/x86/kernel/apic/probe_32.c @@ -52,7 +52,32 @@ static int __init print_ipi_mode(void) } late_initcall(print_ipi_mode); -void default_setup_apic_routing(void) +void __init default_setup_apic_routing(void) +{ + int version = apic_version[boot_cpu_physical_apicid]; + + if (num_possible_cpus() > 8) { + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_INTEL: + if (!APIC_XAPIC(version)) { + def_to_bigsmp = 0; + break; + } + /* If P4 and above fall through */ + case X86_VENDOR_AMD: + def_to_bigsmp = 1; + } + } + +#ifdef CONFIG_X86_BIGSMP + generic_bigsmp_probe(); +#endif + + if (apic->setup_apic_routing) + apic->setup_apic_routing(); +} + +static void setup_apic_flat_routing(void) { #ifdef CONFIG_X86_IO_APIC printk(KERN_INFO @@ -103,7 +128,7 @@ struct apic apic_default = { .init_apic_ldr = default_init_apic_ldr, .ioapic_phys_id_map = default_ioapic_phys_id_map, - .setup_apic_routing = default_setup_apic_routing, + .setup_apic_routing = setup_apic_flat_routing, .multi_timer_check = NULL, .apicid_to_node = default_apicid_to_node, .cpu_to_logical_apicid = default_cpu_to_logical_apicid, diff --git a/arch/x86/kernel/apic/probe_64.c b/arch/x86/kernel/apic/probe_64.c index 450fe20..83e9be4 100644 --- a/arch/x86/kernel/apic/probe_64.c +++ b/arch/x86/kernel/apic/probe_64.c @@ -67,7 +67,7 @@ void __init default_setup_apic_routing(void) } #endif - if (apic == &apic_flat && num_processors > 8) + if (apic == &apic_flat && num_possible_cpus() > 8) apic = &apic_physflat; printk(KERN_INFO "Setting APIC routing to %s\n", apic->name); diff --git a/arch/x86/kernel/mpparse.c b/arch/x86/kernel/mpparse.c index 40b54ce..a2c1edd 100644 --- a/arch/x86/kernel/mpparse.c +++ b/arch/x86/kernel/mpparse.c @@ -359,13 +359,6 @@ static int __init smp_read_mpc(struct mpc_table *mpc, unsigned early) x86_init.mpparse.mpc_record(1); } -#ifdef CONFIG_X86_BIGSMP - generic_bigsmp_probe(); -#endif - - if (apic->setup_apic_routing) - apic->setup_apic_routing(); - if (!num_processors) printk(KERN_ERR "MPTABLE: no processors registered!\n"); return num_processors; diff --git a/arch/x86/kernel/smpboot.c b/arch/x86/kernel/smpboot.c index 678d0b8..b4e870c 100644 --- a/arch/x86/kernel/smpboot.c +++ b/arch/x86/kernel/smpboot.c @@ -1083,9 +1083,7 @@ void __init native_smp_prepare_cpus(unsigned int max_cpus) set_cpu_sibling_map(0); enable_IR_x2apic(); -#ifdef CONFIG_X86_64 default_setup_apic_routing(); -#endif if (smp_sanity_check(max_cpus) < 0) { printk(KERN_INFO "SMP disabled\n"); -- cgit v1.1 From 97c169d39b6846a564dc8d883832e7fef9bdb77d Mon Sep 17 00:00:00 2001 From: Len Brown Date: Tue, 16 Feb 2010 03:30:06 -0500 Subject: ACPI: remove Asus P2B-DS from acpi=ht blacklist We realized when we broke acpi=ht http://bugzilla.kernel.org/show_bug.cgi?id=14886 that acpi=ht is not needed on this box and folks have been using acpi=force on it anyway. Signed-off-by: Len Brown --- arch/x86/kernel/acpi/boot.c | 8 -------- 1 file changed, 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/acpi/boot.c b/arch/x86/kernel/acpi/boot.c index 0acbcdf..af1c583 100644 --- a/arch/x86/kernel/acpi/boot.c +++ b/arch/x86/kernel/acpi/boot.c @@ -1344,14 +1344,6 @@ static struct dmi_system_id __initdata acpi_dmi_table[] = { }, { .callback = force_acpi_ht, - .ident = "ASUS P2B-DS", - .matches = { - DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), - DMI_MATCH(DMI_BOARD_NAME, "P2B-DS"), - }, - }, - { - .callback = force_acpi_ht, .ident = "ASUS CUR-DLS", .matches = { DMI_MATCH(DMI_BOARD_VENDOR, "ASUSTeK Computer INC."), -- cgit v1.1 From 1252f238db48ec419f40c1bdf30fda649860eed9 Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 16 Feb 2010 15:02:13 +0100 Subject: x86: set_personality_ia32() misses force_personality32 05d43ed8a "x86: get rid of the insane TIF_ABI_PENDING bit" forgot about force_personality32. Fix. Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- arch/x86/kernel/process_64.c | 1 + 1 file changed, 1 insertion(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index 41a26a8..126f0b4 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -527,6 +527,7 @@ void set_personality_ia32(void) /* Make sure to be in 32bit mode */ set_thread_flag(TIF_IA32); + current->personality |= force_personality32; /* Prepare the first "return" to user space */ current_thread_info()->status |= TS_COMPAT; -- cgit v1.1 From 11557b24fdec13cb1c3d5f681688401a651ed54e Mon Sep 17 00:00:00 2001 From: Oleg Nesterov Date: Tue, 16 Feb 2010 15:24:01 +0100 Subject: x86: ELF_PLAT_INIT() shouldn't worry about TIF_IA32 The 64-bit version of ELF_PLAT_INIT() clears TIF_IA32, but at this point it has already been cleared by SET_PERSONALITY == set_personality_64bit. Signed-off-by: Oleg Nesterov Signed-off-by: Linus Torvalds --- arch/x86/include/asm/elf.h | 5 +---- 1 file changed, 1 insertion(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/elf.h b/arch/x86/include/asm/elf.h index 1994d3f..f2ad216 100644 --- a/arch/x86/include/asm/elf.h +++ b/arch/x86/include/asm/elf.h @@ -170,10 +170,7 @@ static inline void elf_common_init(struct thread_struct *t, } #define ELF_PLAT_INIT(_r, load_addr) \ -do { \ - elf_common_init(¤t->thread, _r, 0); \ - clear_thread_flag(TIF_IA32); \ -} while (0) + elf_common_init(¤t->thread, _r, 0) #define COMPAT_ELF_PLAT_INIT(regs, load_addr) \ elf_common_init(¤t->thread, regs, __USER_DS) -- cgit v1.1 From d76a0812ac4139ceb54daab3cc70e1bd8bd9d43a Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Mon, 8 Feb 2010 17:06:01 +0200 Subject: perf_events: Add new start/stop PMU callbacks In certain situations, the kernel may need to stop and start the same event rapidly. The current PMU callbacks do not distinguish between stop and release (i.e., stop + free the resource). Thus, a counter may be released, then it will be immediately re-acquired. Event scheduling will again take place with no guarantee to assign the same counter. On some processors, this may event yield to failure to assign the event back due to competion between cores. This patch is adding a new pair of callback to stop and restart a counter without actually release the underlying counter resource. On stop, the counter is stopped, its values saved and that's it. On start, the value is reloaded and counter is restarted (on x86, actual restart is delayed until perf_enable()). Signed-off-by: Stephane Eranian [ added fallback to ->enable/->disable for all other PMUs fixed x86_pmu_start() to call x86_pmu.enable() merged __x86_pmu_disable into x86_pmu_stop() ] Signed-off-by: Peter Zijlstra LKML-Reference: <4b703875.0a04d00a.7896.ffffb824@mx.google.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 24 ++++++++++++++++++++---- 1 file changed, 20 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index a920f17..9173ea9 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1495,7 +1495,7 @@ static inline int match_prev_assignment(struct hw_perf_event *hwc, hwc->last_tag == cpuc->tags[i]; } -static void __x86_pmu_disable(struct perf_event *event, struct cpu_hw_events *cpuc); +static void x86_pmu_stop(struct perf_event *event); void hw_perf_enable(void) { @@ -1533,7 +1533,7 @@ void hw_perf_enable(void) match_prev_assignment(hwc, cpuc, i)) continue; - __x86_pmu_disable(event, cpuc); + x86_pmu_stop(event); hwc->idx = -1; } @@ -1801,6 +1801,19 @@ static int x86_pmu_enable(struct perf_event *event) return 0; } +static int x86_pmu_start(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + if (hwc->idx == -1) + return -EAGAIN; + + x86_perf_event_set_period(event, hwc, hwc->idx); + x86_pmu.enable(hwc, hwc->idx); + + return 0; +} + static void x86_pmu_unthrottle(struct perf_event *event) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -1924,8 +1937,9 @@ static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc) event->pending_kill = POLL_IN; } -static void __x86_pmu_disable(struct perf_event *event, struct cpu_hw_events *cpuc) +static void x86_pmu_stop(struct perf_event *event) { + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; int idx = hwc->idx; @@ -1954,7 +1968,7 @@ static void x86_pmu_disable(struct perf_event *event) struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int i; - __x86_pmu_disable(event, cpuc); + x86_pmu_stop(event); for (i = 0; i < cpuc->n_events; i++) { if (event == cpuc->event_list[i]) { @@ -2667,6 +2681,8 @@ static inline void x86_pmu_read(struct perf_event *event) static const struct pmu pmu = { .enable = x86_pmu_enable, .disable = x86_pmu_disable, + .start = x86_pmu_start, + .stop = x86_pmu_stop, .read = x86_pmu_read, .unthrottle = x86_pmu_unthrottle, }; -- cgit v1.1 From 38331f62c20456454eed9ebea2525f072c6f1d2e Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Mon, 8 Feb 2010 17:17:01 +0200 Subject: perf_events, x86: AMD event scheduling This patch adds correct AMD NorthBridge event scheduling. NB events are events measuring L3 cache, Hypertransport traffic. They are identified by an event code >= 0xe0. They measure events on the Northbride which is shared by all cores on a package. NB events are counted on a shared set of counters. When a NB event is programmed in a counter, the data actually comes from a shared counter. Thus, access to those counters needs to be synchronized. We implement the synchronization such that no two cores can be measuring NB events using the same counters. Thus, we maintain a per-NB allocation table. The available slot is propagated using the event_constraint structure. Signed-off-by: Stephane Eranian Signed-off-by: Peter Zijlstra LKML-Reference: <4b703957.0702d00a.6bf2.7b7d@mx.google.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 265 ++++++++++++++++++++++++++++++++++++++- 1 file changed, 262 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 9173ea9..aa12f36 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -80,6 +80,13 @@ struct event_constraint { int weight; }; +struct amd_nb { + int nb_id; /* NorthBridge id */ + int refcnt; /* reference count */ + struct perf_event *owners[X86_PMC_IDX_MAX]; + struct event_constraint event_constraints[X86_PMC_IDX_MAX]; +}; + struct cpu_hw_events { struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; @@ -92,6 +99,7 @@ struct cpu_hw_events { int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ u64 tags[X86_PMC_IDX_MAX]; struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ + struct amd_nb *amd_nb; }; #define __EVENT_CONSTRAINT(c, n, m, w) {\ @@ -153,6 +161,8 @@ struct x86_pmu { static struct x86_pmu x86_pmu __read_mostly; +static raw_spinlock_t amd_nb_lock; + static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; @@ -802,7 +812,7 @@ static u64 amd_pmu_event_map(int hw_event) static u64 amd_pmu_raw_event(u64 hw_event) { -#define K7_EVNTSEL_EVENT_MASK 0x7000000FFULL +#define K7_EVNTSEL_EVENT_MASK 0xF000000FFULL #define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL #define K7_EVNTSEL_EDGE_MASK 0x000040000ULL #define K7_EVNTSEL_INV_MASK 0x000800000ULL @@ -2210,6 +2220,7 @@ perf_event_nmi_handler(struct notifier_block *self, } static struct event_constraint unconstrained; +static struct event_constraint emptyconstraint; static struct event_constraint bts_constraint = EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); @@ -2249,10 +2260,146 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event return &unconstrained; } +/* + * AMD64 events are detected based on their event codes. + */ +static inline int amd_is_nb_event(struct hw_perf_event *hwc) +{ + return (hwc->config & 0xe0) == 0xe0; +} + +static void amd_put_event_constraints(struct cpu_hw_events *cpuc, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct amd_nb *nb = cpuc->amd_nb; + int i; + + /* + * only care about NB events + */ + if (!(nb && amd_is_nb_event(hwc))) + return; + + /* + * need to scan whole list because event may not have + * been assigned during scheduling + * + * no race condition possible because event can only + * be removed on one CPU at a time AND PMU is disabled + * when we come here + */ + for (i = 0; i < x86_pmu.num_events; i++) { + if (nb->owners[i] == event) { + cmpxchg(nb->owners+i, event, NULL); + break; + } + } +} + + /* + * AMD64 NorthBridge events need special treatment because + * counter access needs to be synchronized across all cores + * of a package. Refer to BKDG section 3.12 + * + * NB events are events measuring L3 cache, Hypertransport + * traffic. They are identified by an event code >= 0xe00. + * They measure events on the NorthBride which is shared + * by all cores on a package. NB events are counted on a + * shared set of counters. When a NB event is programmed + * in a counter, the data actually comes from a shared + * counter. Thus, access to those counters needs to be + * synchronized. + * + * We implement the synchronization such that no two cores + * can be measuring NB events using the same counters. Thus, + * we maintain a per-NB allocation table. The available slot + * is propagated using the event_constraint structure. + * + * We provide only one choice for each NB event based on + * the fact that only NB events have restrictions. Consequently, + * if a counter is available, there is a guarantee the NB event + * will be assigned to it. If no slot is available, an empty + * constraint is returned and scheduling will eventually fail + * for this event. + * + * Note that all cores attached the same NB compete for the same + * counters to host NB events, this is why we use atomic ops. Some + * multi-chip CPUs may have more than one NB. + * + * Given that resources are allocated (cmpxchg), they must be + * eventually freed for others to use. This is accomplished by + * calling amd_put_event_constraints(). + * + * Non NB events are not impacted by this restriction. + */ static struct event_constraint * amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) { - return &unconstrained; + struct hw_perf_event *hwc = &event->hw; + struct amd_nb *nb = cpuc->amd_nb; + struct perf_event *old = NULL; + int max = x86_pmu.num_events; + int i, j, k = -1; + + /* + * if not NB event or no NB, then no constraints + */ + if (!(nb && amd_is_nb_event(hwc))) + return &unconstrained; + + /* + * detect if already present, if so reuse + * + * cannot merge with actual allocation + * because of possible holes + * + * event can already be present yet not assigned (in hwc->idx) + * because of successive calls to x86_schedule_events() from + * hw_perf_group_sched_in() without hw_perf_enable() + */ + for (i = 0; i < max; i++) { + /* + * keep track of first free slot + */ + if (k == -1 && !nb->owners[i]) + k = i; + + /* already present, reuse */ + if (nb->owners[i] == event) + goto done; + } + /* + * not present, so grab a new slot + * starting either at: + */ + if (hwc->idx != -1) { + /* previous assignment */ + i = hwc->idx; + } else if (k != -1) { + /* start from free slot found */ + i = k; + } else { + /* + * event not found, no slot found in + * first pass, try again from the + * beginning + */ + i = 0; + } + j = i; + do { + old = cmpxchg(nb->owners+i, NULL, event); + if (!old) + break; + if (++i == max) + i = 0; + } while (i != j); +done: + if (!old) + return &nb->event_constraints[i]; + + return &emptyconstraint; } static int x86_event_sched_in(struct perf_event *event, @@ -2465,7 +2612,8 @@ static __initconst struct x86_pmu amd_pmu = { .apic = 1, /* use highest bit to detect overflow */ .max_period = (1ULL << 47) - 1, - .get_event_constraints = amd_get_event_constraints + .get_event_constraints = amd_get_event_constraints, + .put_event_constraints = amd_put_event_constraints }; static __init int p6_pmu_init(void) @@ -2589,6 +2737,91 @@ static __init int intel_pmu_init(void) return 0; } +static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) +{ + struct amd_nb *nb; + int i; + + nb = kmalloc(sizeof(struct amd_nb), GFP_KERNEL); + if (!nb) + return NULL; + + memset(nb, 0, sizeof(*nb)); + nb->nb_id = nb_id; + + /* + * initialize all possible NB constraints + */ + for (i = 0; i < x86_pmu.num_events; i++) { + set_bit(i, nb->event_constraints[i].idxmsk); + nb->event_constraints[i].weight = 1; + } + return nb; +} + +static void amd_pmu_cpu_online(int cpu) +{ + struct cpu_hw_events *cpu1, *cpu2; + struct amd_nb *nb = NULL; + int i, nb_id; + + if (boot_cpu_data.x86_max_cores < 2) + return; + + /* + * function may be called too early in the + * boot process, in which case nb_id is bogus + */ + nb_id = amd_get_nb_id(cpu); + if (nb_id == BAD_APICID) + return; + + cpu1 = &per_cpu(cpu_hw_events, cpu); + cpu1->amd_nb = NULL; + + raw_spin_lock(&amd_nb_lock); + + for_each_online_cpu(i) { + cpu2 = &per_cpu(cpu_hw_events, i); + nb = cpu2->amd_nb; + if (!nb) + continue; + if (nb->nb_id == nb_id) + goto found; + } + + nb = amd_alloc_nb(cpu, nb_id); + if (!nb) { + pr_err("perf_events: failed NB allocation for CPU%d\n", cpu); + raw_spin_unlock(&amd_nb_lock); + return; + } +found: + nb->refcnt++; + cpu1->amd_nb = nb; + + raw_spin_unlock(&amd_nb_lock); +} + +static void amd_pmu_cpu_offline(int cpu) +{ + struct cpu_hw_events *cpuhw; + + if (boot_cpu_data.x86_max_cores < 2) + return; + + cpuhw = &per_cpu(cpu_hw_events, cpu); + + raw_spin_lock(&amd_nb_lock); + + if (--cpuhw->amd_nb->refcnt == 0) + kfree(cpuhw->amd_nb); + + cpuhw->amd_nb = NULL; + + raw_spin_unlock(&amd_nb_lock); +} + static __init int amd_pmu_init(void) { /* Performance-monitoring supported from K7 and later: */ @@ -2601,6 +2834,11 @@ static __init int amd_pmu_init(void) memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + /* + * explicitly initialize the boot cpu, other cpus will get + * the cpu hotplug callbacks from smp_init() + */ + amd_pmu_cpu_online(smp_processor_id()); return 0; } @@ -2934,4 +3172,25 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) void hw_perf_event_setup_online(int cpu) { init_debug_store_on_cpu(cpu); + + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + amd_pmu_cpu_online(cpu); + break; + default: + return; + } +} + +void hw_perf_event_setup_offline(int cpu) +{ + init_debug_store_on_cpu(cpu); + + switch (boot_cpu_data.x86_vendor) { + case X86_VENDOR_AMD: + amd_pmu_cpu_offline(cpu); + break; + default: + return; + } } -- cgit v1.1 From 6e37738a2fac964583debe91099bc3248554f6e5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 11 Feb 2010 13:21:58 +0100 Subject: perf_events: Simplify code by removing cpu argument to hw_perf_group_sched_in() Since the cpu argument to hw_perf_group_sched_in() is always smp_processor_id(), simplify the code a little by removing this argument and using the current cpu where needed. Signed-off-by: Peter Zijlstra Cc: David Miller Cc: Paul Mackerras Cc: Benjamin Herrenschmidt Cc: Frederic Weisbecker LKML-Reference: <1265890918.5396.3.camel@laptop> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index aa12f36..ad09656 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -2403,12 +2403,12 @@ done: } static int x86_event_sched_in(struct perf_event *event, - struct perf_cpu_context *cpuctx, int cpu) + struct perf_cpu_context *cpuctx) { int ret = 0; event->state = PERF_EVENT_STATE_ACTIVE; - event->oncpu = cpu; + event->oncpu = smp_processor_id(); event->tstamp_running += event->ctx->time - event->tstamp_stopped; if (!is_x86_event(event)) @@ -2424,7 +2424,7 @@ static int x86_event_sched_in(struct perf_event *event, } static void x86_event_sched_out(struct perf_event *event, - struct perf_cpu_context *cpuctx, int cpu) + struct perf_cpu_context *cpuctx) { event->state = PERF_EVENT_STATE_INACTIVE; event->oncpu = -1; @@ -2452,9 +2452,9 @@ static void x86_event_sched_out(struct perf_event *event, */ int hw_perf_group_sched_in(struct perf_event *leader, struct perf_cpu_context *cpuctx, - struct perf_event_context *ctx, int cpu) + struct perf_event_context *ctx) { - struct cpu_hw_events *cpuc = &per_cpu(cpu_hw_events, cpu); + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct perf_event *sub; int assign[X86_PMC_IDX_MAX]; int n0, n1, ret; @@ -2468,14 +2468,14 @@ int hw_perf_group_sched_in(struct perf_event *leader, if (ret) return ret; - ret = x86_event_sched_in(leader, cpuctx, cpu); + ret = x86_event_sched_in(leader, cpuctx); if (ret) return ret; n1 = 1; list_for_each_entry(sub, &leader->sibling_list, group_entry) { if (sub->state > PERF_EVENT_STATE_OFF) { - ret = x86_event_sched_in(sub, cpuctx, cpu); + ret = x86_event_sched_in(sub, cpuctx); if (ret) goto undo; ++n1; @@ -2500,11 +2500,11 @@ int hw_perf_group_sched_in(struct perf_event *leader, */ return 1; undo: - x86_event_sched_out(leader, cpuctx, cpu); + x86_event_sched_out(leader, cpuctx); n0 = 1; list_for_each_entry(sub, &leader->sibling_list, group_entry) { if (sub->state == PERF_EVENT_STATE_ACTIVE) { - x86_event_sched_out(sub, cpuctx, cpu); + x86_event_sched_out(sub, cpuctx); if (++n0 == n1) break; } -- cgit v1.1 From 6667661df4bc76083edf1e08831c20f64429709d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 10 Feb 2010 16:10:48 +0100 Subject: perf_events, x86: Remove superflous MSR writes We re-program the event control register every time we reset the count, this appears to be superflous, hence remove it. Signed-off-by: Peter Zijlstra Cc: Arjan van de Ven LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 3 --- 1 file changed, 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index ad09656..dd09ccc 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -2009,9 +2009,6 @@ static int intel_pmu_save_and_restart(struct perf_event *event) x86_perf_event_update(event, hwc, idx); ret = x86_perf_event_set_period(event, hwc, idx); - if (event->state == PERF_EVENT_STATE_ACTIVE) - intel_pmu_enable_event(hwc, idx); - return ret; } -- cgit v1.1 From 013cfc50672bbb638796545231683231647edb07 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 28 Jan 2010 18:05:26 +0100 Subject: oprofile/x86: remove OPROFILE_IBS config option OProfile support for IBS is now for several versions in the kernel. The feature is stable now and the code can be activated permanently. As a side effect IBS now works also on nosmp configs. Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_amd.c | 31 +------------------------------ 1 file changed, 1 insertion(+), 30 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 39686c2..2b9c68d 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -22,6 +22,7 @@ #include #include #include +#include #include "op_x86_model.h" #include "op_counter.h" @@ -43,8 +44,6 @@ static unsigned long reset_value[NUM_VIRT_COUNTERS]; -#ifdef CONFIG_OPROFILE_IBS - /* IbsFetchCtl bits/masks */ #define IBS_FETCH_RAND_EN (1ULL<<57) #define IBS_FETCH_VAL (1ULL<<49) @@ -72,8 +71,6 @@ struct op_ibs_config { static struct op_ibs_config ibs_config; -#endif - #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX static void op_mux_fill_in_addresses(struct op_msrs * const msrs) @@ -185,8 +182,6 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, } } -#ifdef CONFIG_OPROFILE_IBS - static inline void op_amd_handle_ibs(struct pt_regs * const regs, struct op_msrs const * const msrs) @@ -272,15 +267,6 @@ static void op_amd_stop_ibs(void) wrmsrl(MSR_AMD64_IBSOPCTL, 0); } -#else - -static inline void op_amd_handle_ibs(struct pt_regs * const regs, - struct op_msrs const * const msrs) { } -static inline void op_amd_start_ibs(void) { } -static inline void op_amd_stop_ibs(void) { } - -#endif - static int op_amd_check_ctrs(struct pt_regs * const regs, struct op_msrs const * const msrs) { @@ -355,8 +341,6 @@ static void op_amd_shutdown(struct op_msrs const * const msrs) } } -#ifdef CONFIG_OPROFILE_IBS - static u8 ibs_eilvt_off; static inline void apic_init_ibs_nmi_per_cpu(void *arg) @@ -507,19 +491,6 @@ static void op_amd_exit(void) ibs_exit(); } -#else - -/* no IBS support */ - -static int op_amd_init(struct oprofile_operations *ops) -{ - return 0; -} - -static void op_amd_exit(void) {} - -#endif /* CONFIG_OPROFILE_IBS */ - struct op_x86_model_spec op_amd_spec = { .num_counters = NUM_COUNTERS, .num_controls = NUM_CONTROLS, -- cgit v1.1 From 89baaaa98a10cad5cc8516c7208b02d9fc711890 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 28 Jan 2010 16:50:45 +0100 Subject: oprofile/x86: remove node check in AMD IBS initialization Standard AMD systems have the same number of nodes as there are northbridge devices. However, there may kernel configurations (especially for 32 bit) or system setups exist, where the node number is different or it can not be detected properly. Thus the check is not reliable and may fail though IBS setup was fine. For this reason it is better to remove the check. Cc: stable Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_amd.c | 10 ---------- 1 file changed, 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 2b9c68d..4eb3071 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -389,16 +389,6 @@ static int init_ibs_nmi(void) return 1; } -#ifdef CONFIG_NUMA - /* Sanity check */ - /* Works only for 64bit with proper numa implementation. */ - if (nodes != num_possible_nodes()) { - printk(KERN_DEBUG "Failed to setup CPU node(s) for IBS, " - "found: %d, expected %d", - nodes, num_possible_nodes()); - return 1; - } -#endif return 0; } -- cgit v1.1 From 64683da6643e8c6c93f1f99548399b08c029fd13 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 4 Feb 2010 10:57:23 +0100 Subject: oprofile/x86: implement IBS cpuid feature detection This patch adds IBS feature detection using cpuid flags. An IBS capability mask is introduced to test for certain IBS features. The bit mask is the same as for IBS cpuid feature flags (Fn8000_001B_EAX), but bit 0 is used to indicate the existence of IBS. The patch also changes the handling of the IbsOpCntCtl bit (periodic op counter count control). The oprofilefs file for this feature (ibs_op/dispatched_ops) will be only exposed if the feature is available, also the default for the bit is set to count clock cycles. In general, the userland can detect the availability of a feature by checking for the corresponding file in oprofilefs. If it exists, the feature also exists. This may lead to a dynamic file layout depending on the cpu type with that the userland has to deal with. Current opcontrol is compatible. Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_amd.c | 80 +++++++++++++++++++++++++++++++--------- 1 file changed, 63 insertions(+), 17 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 4eb3071..6557683 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -23,6 +23,8 @@ #include #include #include +#include +#include #include "op_x86_model.h" #include "op_counter.h" @@ -58,7 +60,7 @@ static unsigned long reset_value[NUM_VIRT_COUNTERS]; #define IBS_FETCH_SIZE 6 #define IBS_OP_SIZE 12 -static int has_ibs; /* AMD Family10h and later */ +static u32 ibs_caps; struct op_ibs_config { unsigned long op_enabled; @@ -71,6 +73,40 @@ struct op_ibs_config { static struct op_ibs_config ibs_config; +/* + * IBS cpuid feature detection + */ + +#define IBS_CPUID_FEATURES 0x8000001b + +/* + * Same bit mask as for IBS cpuid feature flags (Fn8000_001B_EAX), but + * bit 0 is used to indicate the existence of IBS. + */ +#define IBS_CAPS_AVAIL (1LL<<0) +#define IBS_CAPS_OPCNT (1LL<<4) + +static u32 get_ibs_caps(void) +{ + u32 ibs_caps; + unsigned int max_level; + + if (!boot_cpu_has(X86_FEATURE_IBS)) + return 0; + + /* check IBS cpuid feature flags */ + max_level = cpuid_eax(0x80000000); + if (max_level < IBS_CPUID_FEATURES) + return IBS_CAPS_AVAIL; + + ibs_caps = cpuid_eax(IBS_CPUID_FEATURES); + if (!(ibs_caps & IBS_CAPS_AVAIL)) + /* cpuid flags not valid */ + return IBS_CAPS_AVAIL; + + return ibs_caps; +} + #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX static void op_mux_fill_in_addresses(struct op_msrs * const msrs) @@ -189,7 +225,7 @@ op_amd_handle_ibs(struct pt_regs * const regs, u64 val, ctl; struct op_entry entry; - if (!has_ibs) + if (!ibs_caps) return; if (ibs_config.fetch_enabled) { @@ -241,16 +277,21 @@ op_amd_handle_ibs(struct pt_regs * const regs, static inline void op_amd_start_ibs(void) { u64 val; - if (has_ibs && ibs_config.fetch_enabled) { + + if (!ibs_caps) + return; + + if (ibs_config.fetch_enabled) { val = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0; val |= IBS_FETCH_ENABLE; wrmsrl(MSR_AMD64_IBSFETCHCTL, val); } - if (has_ibs && ibs_config.op_enabled) { + if (ibs_config.op_enabled) { val = (ibs_config.max_cnt_op >> 4) & 0xFFFF; - val |= ibs_config.dispatched_ops ? IBS_OP_CNT_CTL : 0; + if (ibs_caps & IBS_CAPS_OPCNT && ibs_config.dispatched_ops) + val |= IBS_OP_CNT_CTL; val |= IBS_OP_ENABLE; wrmsrl(MSR_AMD64_IBSOPCTL, val); } @@ -258,11 +299,14 @@ static inline void op_amd_start_ibs(void) static void op_amd_stop_ibs(void) { - if (has_ibs && ibs_config.fetch_enabled) + if (!ibs_caps) + return; + + if (ibs_config.fetch_enabled) /* clear max count and enable */ wrmsrl(MSR_AMD64_IBSFETCHCTL, 0); - if (has_ibs && ibs_config.op_enabled) + if (ibs_config.op_enabled) /* clear max count and enable */ wrmsrl(MSR_AMD64_IBSOPCTL, 0); } @@ -395,29 +439,30 @@ static int init_ibs_nmi(void) /* uninitialize the APIC for the IBS interrupts if needed */ static void clear_ibs_nmi(void) { - if (has_ibs) + if (ibs_caps) on_each_cpu(apic_clear_ibs_nmi_per_cpu, NULL, 1); } /* initialize the APIC for the IBS interrupts if available */ static void ibs_init(void) { - has_ibs = boot_cpu_has(X86_FEATURE_IBS); + ibs_caps = get_ibs_caps(); - if (!has_ibs) + if (!ibs_caps) return; if (init_ibs_nmi()) { - has_ibs = 0; + ibs_caps = 0; return; } - printk(KERN_INFO "oprofile: AMD IBS detected\n"); + printk(KERN_INFO "oprofile: AMD IBS detected (0x%08x)\n", + (unsigned)ibs_caps); } static void ibs_exit(void) { - if (!has_ibs) + if (!ibs_caps) return; clear_ibs_nmi(); @@ -437,7 +482,7 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root) if (ret) return ret; - if (!has_ibs) + if (!ibs_caps) return ret; /* model specific files */ @@ -447,7 +492,7 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root) ibs_config.fetch_enabled = 0; ibs_config.max_cnt_op = 250000; ibs_config.op_enabled = 0; - ibs_config.dispatched_ops = 1; + ibs_config.dispatched_ops = 0; dir = oprofilefs_mkdir(sb, root, "ibs_fetch"); oprofilefs_create_ulong(sb, dir, "enable", @@ -462,8 +507,9 @@ static int setup_ibs_files(struct super_block *sb, struct dentry *root) &ibs_config.op_enabled); oprofilefs_create_ulong(sb, dir, "max_count", &ibs_config.max_cnt_op); - oprofilefs_create_ulong(sb, dir, "dispatched_ops", - &ibs_config.dispatched_ops); + if (ibs_caps & IBS_CAPS_OPCNT) + oprofilefs_create_ulong(sb, dir, "dispatched_ops", + &ibs_config.dispatched_ops); return 0; } -- cgit v1.1 From f125be1469303f7b9324447f251d74a0da24952f Mon Sep 17 00:00:00 2001 From: Suravee Suthikulpanit Date: Mon, 18 Jan 2010 11:25:45 -0600 Subject: oprofile/x86: implement lsfr pseudo-random number generator for IBS This patch implements a linear feedback shift register (LFSR) for pseudo-random number generation for IBS. For IBS measurements it would be good to minimize memory traffic in the interrupt handler since every access pollutes the data caches. Computing a maximal period LFSR just needs shifts and ORs. The LFSR method is good enough to randomize the ops at low overhead. 16 pseudo-random bits are enough for the implementation and it doesn't matter that the pattern repeats with a fairly short cycle. It only needs to break up (hard) periodic sampling behavior. The logic was designed by Paul Drongowski. Signed-off-by: Suravee Suthikulpanit Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_amd.c | 23 +++++++++++++++++++++++ 1 file changed, 23 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 6557683..97c84eb 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -218,6 +218,29 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, } } +/* + * 16-bit Linear Feedback Shift Register (LFSR) + * + * 16 14 13 11 + * Feedback polynomial = X + X + X + X + 1 + */ +static unsigned int lfsr_random(void) +{ + static unsigned int lfsr_value = 0xF00D; + unsigned int bit; + + /* Compute next bit to shift in */ + bit = ((lfsr_value >> 0) ^ + (lfsr_value >> 2) ^ + (lfsr_value >> 3) ^ + (lfsr_value >> 5)) & 0x0001; + + /* Advance to next register value */ + lfsr_value = (lfsr_value >> 1) | (bit << 15); + + return lfsr_value; +} + static inline void op_amd_handle_ibs(struct pt_regs * const regs, struct op_msrs const * const msrs) -- cgit v1.1 From ba52078e1917c5116c0802298d88ad0e54a6728b Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 23 Feb 2010 15:46:49 +0100 Subject: oprofile/x86: implement randomization for IBS periodic op counter IBS selects an op (execution operation) for sampling by counting either cycles or dispatched ops. Better statistical samples can be produced by adding a software generated random offset to the periodic op counter value with each sample. This patch adds software randomization to the IBS periodic op counter. The lower 12 bits of the 20 bit counter are randomized. IbsOpCurCnt is initialized with a 12 bit random value. There is a work around if the hw can not write to IbsOpCurCnt. Then the lower 8 bits of the 16 bit IbsOpMaxCnt [15:0] value are randomized in the range of -128 to +127 by adding/subtracting an offset to the maximum count (IbsOpMaxCnt). The linear feedback shift register (LFSR) algorithm is used for pseudo-random number generation to have low impact to the memory system. Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_amd.c | 69 ++++++++++++++++++++++++++++++++++++---- 1 file changed, 63 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 97c84eb..a9d1947 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -52,7 +52,7 @@ static unsigned long reset_value[NUM_VIRT_COUNTERS]; #define IBS_FETCH_ENABLE (1ULL<<48) #define IBS_FETCH_CNT_MASK 0xFFFF0000ULL -/*IbsOpCtl bits */ +/* IbsOpCtl bits */ #define IBS_OP_CNT_CTL (1ULL<<19) #define IBS_OP_VAL (1ULL<<18) #define IBS_OP_ENABLE (1ULL<<17) @@ -72,6 +72,7 @@ struct op_ibs_config { }; static struct op_ibs_config ibs_config; +static u64 ibs_op_ctl; /* * IBS cpuid feature detection @@ -84,8 +85,16 @@ static struct op_ibs_config ibs_config; * bit 0 is used to indicate the existence of IBS. */ #define IBS_CAPS_AVAIL (1LL<<0) +#define IBS_CAPS_RDWROPCNT (1LL<<3) #define IBS_CAPS_OPCNT (1LL<<4) +/* + * IBS randomization macros + */ +#define IBS_RANDOM_BITS 12 +#define IBS_RANDOM_MASK ((1ULL << IBS_RANDOM_BITS) - 1) +#define IBS_RANDOM_MAXCNT_OFFSET (1ULL << (IBS_RANDOM_BITS - 5)) + static u32 get_ibs_caps(void) { u32 ibs_caps; @@ -241,6 +250,38 @@ static unsigned int lfsr_random(void) return lfsr_value; } +/* + * IBS software randomization + * + * The IBS periodic op counter is randomized in software. The lower 12 + * bits of the 20 bit counter are randomized. IbsOpCurCnt is + * initialized with a 12 bit random value. + */ +static inline u64 op_amd_randomize_ibs_op(u64 val) +{ + unsigned int random = lfsr_random(); + + if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) + /* + * Work around if the hw can not write to IbsOpCurCnt + * + * Randomize the lower 8 bits of the 16 bit + * IbsOpMaxCnt [15:0] value in the range of -128 to + * +127 by adding/subtracting an offset to the + * maximum count (IbsOpMaxCnt). + * + * To avoid over or underflows and protect upper bits + * starting at bit 16, the initial value for + * IbsOpMaxCnt must fit in the range from 0x0081 to + * 0xff80. + */ + val += (s8)(random >> 4); + else + val |= (u64)(random & IBS_RANDOM_MASK) << 32; + + return val; +} + static inline void op_amd_handle_ibs(struct pt_regs * const regs, struct op_msrs const * const msrs) @@ -290,8 +331,7 @@ op_amd_handle_ibs(struct pt_regs * const regs, oprofile_write_commit(&entry); /* reenable the IRQ */ - ctl &= ~IBS_OP_VAL & 0xFFFFFFFF; - ctl |= IBS_OP_ENABLE; + ctl = op_amd_randomize_ibs_op(ibs_op_ctl); wrmsrl(MSR_AMD64_IBSOPCTL, ctl); } } @@ -312,10 +352,27 @@ static inline void op_amd_start_ibs(void) } if (ibs_config.op_enabled) { - val = (ibs_config.max_cnt_op >> 4) & 0xFFFF; + ibs_op_ctl = ibs_config.max_cnt_op >> 4; + if (!(ibs_caps & IBS_CAPS_RDWROPCNT)) { + /* + * IbsOpCurCnt not supported. See + * op_amd_randomize_ibs_op() for details. + */ + ibs_op_ctl = clamp(ibs_op_ctl, 0x0081ULL, 0xFF80ULL); + } else { + /* + * The start value is randomized with a + * positive offset, we need to compensate it + * with the half of the randomized range. Also + * avoid underflows. + */ + ibs_op_ctl = min(ibs_op_ctl + IBS_RANDOM_MAXCNT_OFFSET, + 0xFFFFULL); + } if (ibs_caps & IBS_CAPS_OPCNT && ibs_config.dispatched_ops) - val |= IBS_OP_CNT_CTL; - val |= IBS_OP_ENABLE; + ibs_op_ctl |= IBS_OP_CNT_CTL; + ibs_op_ctl |= IBS_OP_ENABLE; + val = op_amd_randomize_ibs_op(ibs_op_ctl); wrmsrl(MSR_AMD64_IBSOPCTL, val); } } -- cgit v1.1 From 98a2e73a0690b3610f049a64154d8145e5771713 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 23 Feb 2010 18:14:58 +0100 Subject: oprofile/x86: warn user if a counter is already active This patch generates a warning if a counter is already active. Implemented for AMD and P6 models. P4 is not supported. Cc: Naga Chumbalkar Cc: Shashi Belur Cc: Tony Jones Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_amd.c | 11 ++++++++++- arch/x86/oprofile/op_model_ppro.c | 11 ++++++++++- arch/x86/oprofile/op_x86_model.h | 11 +++++++++++ 3 files changed, 31 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index a9d1947..ef9d735 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -194,9 +194,18 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, /* clear all counters */ for (i = 0; i < NUM_CONTROLS; ++i) { - if (unlikely(!msrs->controls[i].addr)) + if (unlikely(!msrs->controls[i].addr)) { + if (counter_config[i].enabled && !smp_processor_id()) + /* + * counter is reserved, this is on all + * cpus, so report only for cpu #0 + */ + op_x86_warn_reserved(i); continue; + } rdmsrl(msrs->controls[i].addr, val); + if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) + op_x86_warn_in_use(i); val &= model->reserved; wrmsrl(msrs->controls[i].addr, val); } diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 8eb0587..c344525 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -82,9 +82,18 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model, /* clear all counters */ for (i = 0; i < num_counters; ++i) { - if (unlikely(!msrs->controls[i].addr)) + if (unlikely(!msrs->controls[i].addr)) { + if (counter_config[i].enabled && !smp_processor_id()) + /* + * counter is reserved, this is on all + * cpus, so report only for cpu #0 + */ + op_x86_warn_reserved(i); continue; + } rdmsrl(msrs->controls[i].addr, val); + if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) + op_x86_warn_in_use(i); val &= model->reserved; wrmsrl(msrs->controls[i].addr, val); } diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index 7b8e75d..59fa2bd 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -57,6 +57,17 @@ struct op_x86_model_spec { struct op_counter_config; +static inline void op_x86_warn_in_use(int counter) +{ + pr_warning("oprofile: counter #%d on cpu #%d may already be used\n", + counter, smp_processor_id()); +} + +static inline void op_x86_warn_reserved(int counter) +{ + pr_warning("oprofile: counter #%d is already reserved\n", counter); +} + extern u64 op_x86_get_ctrl(struct op_x86_model_spec const *model, struct op_counter_config *counter_config); extern int op_x86_phys_to_virt(int phys); -- cgit v1.1 From 8588d1067147e14d1dd521fbadd1d2564f8cc794 Mon Sep 17 00:00:00 2001 From: Naga Chumbalkar Date: Tue, 23 Feb 2010 18:14:58 +0100 Subject: oprofile/x86: add comment to counter-in-use warning Currently, oprofile fails silently on platforms where a non-OS entity such as the system firmware "enables" and uses a performance counter. There is a warning in the code for this case. The warning indicates an already running counter. If oprofile doesn't collect data, then try using a different performance counter on your platform to monitor the desired event. Delete the counter from the desired event by editing the /usr/share/oprofile///events file. If the event cannot be monitored by any other counter, contact your hardware or BIOS vendor. Cc: Shashi Belur Cc: Tony Jones Signed-off-by: Naga Chumbalkar Signed-off-by: Robert Richter --- arch/x86/oprofile/op_x86_model.h | 9 +++++++++ 1 file changed, 9 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/op_x86_model.h b/arch/x86/oprofile/op_x86_model.h index 59fa2bd..ff82a75 100644 --- a/arch/x86/oprofile/op_x86_model.h +++ b/arch/x86/oprofile/op_x86_model.h @@ -59,6 +59,15 @@ struct op_counter_config; static inline void op_x86_warn_in_use(int counter) { + /* + * The warning indicates an already running counter. If + * oprofile doesn't collect data, then try using a different + * performance counter on your platform to monitor the desired + * event. Delete counter #%d from the desired event by editing + * the /usr/share/oprofile/%s//events file. If the event + * cannot be monitored by any other counter, contact your + * hardware or BIOS vendor. + */ pr_warning("oprofile: counter #%d on cpu #%d may already be used\n", counter, smp_processor_id()); } -- cgit v1.1 From 68dc819ce829f7e7977a56524e710473bdb55115 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 25 Feb 2010 19:16:46 +0100 Subject: oprofile/x86: fix perfctr nmi reservation for mulitplexing Multiple virtual counters share one physical counter. The reservation of virtual counters fails due to duplicate allocation of the same counter. The counters are already reserved. Thus, virtual counter reservation may removed at all. This also makes the code easier. Cc: stable@kernel.org Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 11 ++++++----- arch/x86/oprofile/op_model_amd.c | 19 ------------------- 2 files changed, 6 insertions(+), 24 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 3347f69..7170d1e2 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -179,7 +179,6 @@ static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) if (counter_config[i].enabled) { multiplex[i].saved = -(u64)counter_config[i].count; } else { - multiplex[i].addr = 0; multiplex[i].saved = 0; } } @@ -189,25 +188,27 @@ static void nmi_cpu_setup_mux(int cpu, struct op_msrs const * const msrs) static void nmi_cpu_save_mpx_registers(struct op_msrs *msrs) { + struct op_msr *counters = msrs->counters; struct op_msr *multiplex = msrs->multiplex; int i; for (i = 0; i < model->num_counters; ++i) { int virt = op_x86_phys_to_virt(i); - if (multiplex[virt].addr) - rdmsrl(multiplex[virt].addr, multiplex[virt].saved); + if (counters[i].addr) + rdmsrl(counters[i].addr, multiplex[virt].saved); } } static void nmi_cpu_restore_mpx_registers(struct op_msrs *msrs) { + struct op_msr *counters = msrs->counters; struct op_msr *multiplex = msrs->multiplex; int i; for (i = 0; i < model->num_counters; ++i) { int virt = op_x86_phys_to_virt(i); - if (multiplex[virt].addr) - wrmsrl(multiplex[virt].addr, multiplex[virt].saved); + if (counters[i].addr) + wrmsrl(counters[i].addr, multiplex[virt].saved); } } diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index ef9d735..2aab018 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -118,19 +118,6 @@ static u32 get_ibs_caps(void) #ifdef CONFIG_OPROFILE_EVENT_MULTIPLEX -static void op_mux_fill_in_addresses(struct op_msrs * const msrs) -{ - int i; - - for (i = 0; i < NUM_VIRT_COUNTERS; i++) { - int hw_counter = op_x86_virt_to_phys(i); - if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) - msrs->multiplex[i].addr = MSR_K7_PERFCTR0 + hw_counter; - else - msrs->multiplex[i].addr = 0; - } -} - static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, struct op_msrs const * const msrs) { @@ -149,10 +136,6 @@ static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, } } -#else - -static inline void op_mux_fill_in_addresses(struct op_msrs * const msrs) { } - #endif /* functions for op_amd_spec */ @@ -174,8 +157,6 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs) else msrs->controls[i].addr = 0; } - - op_mux_fill_in_addresses(msrs); } static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, -- cgit v1.1 From c17c8fbf349482e89b57d1b800e83e9f4cf40c47 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 25 Feb 2010 20:20:25 +0100 Subject: oprofile/x86: use kzalloc() instead of kmalloc() Cc: stable@kernel.org Signed-off-by: Robert Richter --- arch/x86/oprofile/nmi_int.c | 6 +++--- arch/x86/oprofile/op_model_amd.c | 4 ---- arch/x86/oprofile/op_model_p4.c | 6 ------ arch/x86/oprofile/op_model_ppro.c | 6 +----- 4 files changed, 4 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/nmi_int.c b/arch/x86/oprofile/nmi_int.c index 7170d1e2..2c505ee 100644 --- a/arch/x86/oprofile/nmi_int.c +++ b/arch/x86/oprofile/nmi_int.c @@ -159,7 +159,7 @@ static int nmi_setup_mux(void) for_each_possible_cpu(i) { per_cpu(cpu_msrs, i).multiplex = - kmalloc(multiplex_size, GFP_KERNEL); + kzalloc(multiplex_size, GFP_KERNEL); if (!per_cpu(cpu_msrs, i).multiplex) return 0; } @@ -304,11 +304,11 @@ static int allocate_msrs(void) int i; for_each_possible_cpu(i) { - per_cpu(cpu_msrs, i).counters = kmalloc(counters_size, + per_cpu(cpu_msrs, i).counters = kzalloc(counters_size, GFP_KERNEL); if (!per_cpu(cpu_msrs, i).counters) return 0; - per_cpu(cpu_msrs, i).controls = kmalloc(controls_size, + per_cpu(cpu_msrs, i).controls = kzalloc(controls_size, GFP_KERNEL); if (!per_cpu(cpu_msrs, i).controls) return 0; diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 2aab018..f4ebc45 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -147,15 +147,11 @@ static void op_amd_fill_in_addresses(struct op_msrs * const msrs) for (i = 0; i < NUM_COUNTERS; i++) { if (reserve_perfctr_nmi(MSR_K7_PERFCTR0 + i)) msrs->counters[i].addr = MSR_K7_PERFCTR0 + i; - else - msrs->counters[i].addr = 0; } for (i = 0; i < NUM_CONTROLS; i++) { if (reserve_evntsel_nmi(MSR_K7_EVNTSEL0 + i)) msrs->controls[i].addr = MSR_K7_EVNTSEL0 + i; - else - msrs->controls[i].addr = 0; } } diff --git a/arch/x86/oprofile/op_model_p4.c b/arch/x86/oprofile/op_model_p4.c index ac6b354..e6a160a 100644 --- a/arch/x86/oprofile/op_model_p4.c +++ b/arch/x86/oprofile/op_model_p4.c @@ -394,12 +394,6 @@ static void p4_fill_in_addresses(struct op_msrs * const msrs) setup_num_counters(); stag = get_stagger(); - /* initialize some registers */ - for (i = 0; i < num_counters; ++i) - msrs->counters[i].addr = 0; - for (i = 0; i < num_controls; ++i) - msrs->controls[i].addr = 0; - /* the counter & cccr registers we pay attention to */ for (i = 0; i < num_counters; ++i) { addr = p4_counters[VIRT_CTR(stag, i)].counter_address; diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index c344525..5d1727b 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -37,15 +37,11 @@ static void ppro_fill_in_addresses(struct op_msrs * const msrs) for (i = 0; i < num_counters; i++) { if (reserve_perfctr_nmi(MSR_P6_PERFCTR0 + i)) msrs->counters[i].addr = MSR_P6_PERFCTR0 + i; - else - msrs->counters[i].addr = 0; } for (i = 0; i < num_counters; i++) { if (reserve_evntsel_nmi(MSR_P6_EVNTSEL0 + i)) msrs->controls[i].addr = MSR_P6_EVNTSEL0 + i; - else - msrs->controls[i].addr = 0; } } @@ -57,7 +53,7 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model, int i; if (!reset_value) { - reset_value = kmalloc(sizeof(reset_value[0]) * num_counters, + reset_value = kzalloc(sizeof(reset_value[0]) * num_counters, GFP_ATOMIC); if (!reset_value) return; -- cgit v1.1 From cfc9c0b450176a077205ef39092f0dc1a04e020a Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Fri, 26 Feb 2010 13:45:24 +0100 Subject: oprofile/x86: fix msr access to reserved counters During switching virtual counters there is access to perfctr msrs. If the counter is not available this fails due to an invalid address. This patch fixes this. Cc: stable@kernel.org Signed-off-by: Robert Richter --- arch/x86/oprofile/op_model_amd.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index f4ebc45..6a58256 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -127,7 +127,7 @@ static void op_mux_switch_ctrl(struct op_x86_model_spec const *model, /* enable active counters */ for (i = 0; i < NUM_COUNTERS; ++i) { int virt = op_x86_phys_to_virt(i); - if (!counter_config[virt].enabled) + if (!reset_value[virt]) continue; rdmsrl(msrs->controls[i].addr, val); val &= model->reserved; @@ -163,7 +163,8 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, /* setup reset_value */ for (i = 0; i < NUM_VIRT_COUNTERS; ++i) { - if (counter_config[i].enabled) + if (counter_config[i].enabled + && msrs->counters[op_x86_virt_to_phys(i)].addr) reset_value[i] = counter_config[i].count; else reset_value[i] = 0; @@ -197,9 +198,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, /* enable active counters */ for (i = 0; i < NUM_COUNTERS; ++i) { int virt = op_x86_phys_to_virt(i); - if (!counter_config[virt].enabled) - continue; - if (!msrs->counters[i].addr) + if (!reset_value[virt]) continue; /* setup counter registers */ -- cgit v1.1 From f22f54f4491acd987a6c5a92de52b60ca8b58b61 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 26 Feb 2010 12:05:05 +0100 Subject: perf_events, x86: Split PMU definitions into separate files Split amd,p6,intel into separate files so that we can easily deal with CONFIG_CPU_SUP_* things, needed to make things build now that perf_event.c relies on symbols from amd.c Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 1524 +------------------------------- arch/x86/kernel/cpu/perf_event_amd.c | 416 +++++++++ arch/x86/kernel/cpu/perf_event_intel.c | 971 ++++++++++++++++++++ arch/x86/kernel/cpu/perf_event_p6.c | 157 ++++ 4 files changed, 1554 insertions(+), 1514 deletions(-) create mode 100644 arch/x86/kernel/cpu/perf_event_amd.c create mode 100644 arch/x86/kernel/cpu/perf_event_intel.c create mode 100644 arch/x86/kernel/cpu/perf_event_p6.c (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index dd09ccc..641ccb9 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -161,8 +161,6 @@ struct x86_pmu { static struct x86_pmu x86_pmu __read_mostly; -static raw_spinlock_t amd_nb_lock; - static DEFINE_PER_CPU(struct cpu_hw_events, cpu_hw_events) = { .enabled = 1, }; @@ -171,140 +169,6 @@ static int x86_perf_event_set_period(struct perf_event *event, struct hw_perf_event *hwc, int idx); /* - * Not sure about some of these - */ -static const u64 p6_perfmon_event_map[] = -{ - [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, - [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, - [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, - [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, - [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, -}; - -static u64 p6_pmu_event_map(int hw_event) -{ - return p6_perfmon_event_map[hw_event]; -} - -/* - * Event setting that is specified not to count anything. - * We use this to effectively disable a counter. - * - * L2_RQSTS with 0 MESI unit mask. - */ -#define P6_NOP_EVENT 0x0000002EULL - -static u64 p6_pmu_raw_event(u64 hw_event) -{ -#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL -#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL -#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL -#define P6_EVNTSEL_INV_MASK 0x00800000ULL -#define P6_EVNTSEL_REG_MASK 0xFF000000ULL - -#define P6_EVNTSEL_MASK \ - (P6_EVNTSEL_EVENT_MASK | \ - P6_EVNTSEL_UNIT_MASK | \ - P6_EVNTSEL_EDGE_MASK | \ - P6_EVNTSEL_INV_MASK | \ - P6_EVNTSEL_REG_MASK) - - return hw_event & P6_EVNTSEL_MASK; -} - -static struct event_constraint intel_p6_event_constraints[] = -{ - INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ - INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ - INTEL_EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */ - INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ - INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ - INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ - EVENT_CONSTRAINT_END -}; - -/* - * Intel PerfMon v3. Used on Core2 and later. - */ -static const u64 intel_perfmon_event_map[] = -{ - [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, - [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, - [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, - [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, - [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, -}; - -static struct event_constraint intel_core_event_constraints[] = -{ - INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ - INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ - INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ - INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ - INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ - INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */ - EVENT_CONSTRAINT_END -}; - -static struct event_constraint intel_core2_event_constraints[] = -{ - FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ - FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ - INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ - INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ - INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ - INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ - INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ - INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ - INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ - INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ - INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ - EVENT_CONSTRAINT_END -}; - -static struct event_constraint intel_nehalem_event_constraints[] = -{ - FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ - FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ - INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ - INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ - INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ - INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */ - INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */ - INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */ - INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ - INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ - EVENT_CONSTRAINT_END -}; - -static struct event_constraint intel_westmere_event_constraints[] = -{ - FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ - FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ - INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ - INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ - INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ - EVENT_CONSTRAINT_END -}; - -static struct event_constraint intel_gen_event_constraints[] = -{ - FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ - FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ - EVENT_CONSTRAINT_END -}; - -static u64 intel_pmu_event_map(int hw_event) -{ - return intel_perfmon_event_map[hw_event]; -} - -/* * Generalized hw caching related hw_event table, filled * in on a per model basis. A value of 0 means * 'not supported', -1 means 'hw_event makes no sense on @@ -319,515 +183,6 @@ static u64 __read_mostly hw_cache_event_ids [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX]; -static __initconst u64 westmere_hw_cache_event_ids - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = -{ - [ C(L1D) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ - [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ - [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ - [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ - }, - }, - [ C(L1I ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ - [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x0, - [ C(RESULT_MISS) ] = 0x0, - }, - }, - [ C(LL ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ - [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ - [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ - [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ - }, - }, - [ C(DTLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ - [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ - [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x0, - [ C(RESULT_MISS) ] = 0x0, - }, - }, - [ C(ITLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ - [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.ANY */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, - [ C(BPU ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ - [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, -}; - -static __initconst u64 nehalem_hw_cache_event_ids - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = -{ - [ C(L1D) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ - [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ - [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ - [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ - }, - }, - [ C(L1I ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ - [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x0, - [ C(RESULT_MISS) ] = 0x0, - }, - }, - [ C(LL ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ - [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ - [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ - [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ - }, - }, - [ C(DTLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ - [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ - [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x0, - [ C(RESULT_MISS) ] = 0x0, - }, - }, - [ C(ITLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ - [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, - [ C(BPU ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ - [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, -}; - -static __initconst u64 core2_hw_cache_event_ids - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = -{ - [ C(L1D) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ - [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ - [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */ - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(L1I ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */ - [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(LL ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ - [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ - [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(DTLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ - [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ - [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(ITLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ - [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, - [ C(BPU ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ - [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, -}; - -static __initconst u64 atom_hw_cache_event_ids - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = -{ - [ C(L1D) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */ - [ C(RESULT_MISS) ] = 0, - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */ - [ C(RESULT_MISS) ] = 0, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(L1I ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ - [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(LL ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ - [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ - [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(DTLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */ - [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */ - [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */ - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(ITLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ - [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, - [ C(BPU ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ - [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, -}; - -static u64 intel_pmu_raw_event(u64 hw_event) -{ -#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL -#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL -#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL -#define CORE_EVNTSEL_INV_MASK 0x00800000ULL -#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL - -#define CORE_EVNTSEL_MASK \ - (INTEL_ARCH_EVTSEL_MASK | \ - INTEL_ARCH_UNIT_MASK | \ - INTEL_ARCH_EDGE_MASK | \ - INTEL_ARCH_INV_MASK | \ - INTEL_ARCH_CNT_MASK) - - return hw_event & CORE_EVNTSEL_MASK; -} - -static __initconst u64 amd_hw_cache_event_ids - [PERF_COUNT_HW_CACHE_MAX] - [PERF_COUNT_HW_CACHE_OP_MAX] - [PERF_COUNT_HW_CACHE_RESULT_MAX] = -{ - [ C(L1D) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ - [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */ - [ C(RESULT_MISS) ] = 0, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */ - [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */ - }, - }, - [ C(L1I ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */ - [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */ - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(LL ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */ - [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */ - [ C(RESULT_MISS) ] = 0, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(DTLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ - [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = 0, - [ C(RESULT_MISS) ] = 0, - }, - }, - [ C(ITLB) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ - [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, - [ C(BPU ) ] = { - [ C(OP_READ) ] = { - [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */ - [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */ - }, - [ C(OP_WRITE) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - [ C(OP_PREFETCH) ] = { - [ C(RESULT_ACCESS) ] = -1, - [ C(RESULT_MISS) ] = -1, - }, - }, -}; - -/* - * AMD Performance Monitor K7 and later. - */ -static const u64 amd_perfmon_event_map[] = -{ - [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, - [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, - [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, - [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, - [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, -}; - -static u64 amd_pmu_event_map(int hw_event) -{ - return amd_perfmon_event_map[hw_event]; -} - -static u64 amd_pmu_raw_event(u64 hw_event) -{ -#define K7_EVNTSEL_EVENT_MASK 0xF000000FFULL -#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL -#define K7_EVNTSEL_EDGE_MASK 0x000040000ULL -#define K7_EVNTSEL_INV_MASK 0x000800000ULL -#define K7_EVNTSEL_REG_MASK 0x0FF000000ULL - -#define K7_EVNTSEL_MASK \ - (K7_EVNTSEL_EVENT_MASK | \ - K7_EVNTSEL_UNIT_MASK | \ - K7_EVNTSEL_EDGE_MASK | \ - K7_EVNTSEL_INV_MASK | \ - K7_EVNTSEL_REG_MASK) - - return hw_event & K7_EVNTSEL_MASK; -} - /* * Propagate event elapsed time into the generic event. * Can only be executed on the CPU where the event is active. @@ -1079,42 +434,6 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) return 0; } -static void intel_pmu_enable_bts(u64 config) -{ - unsigned long debugctlmsr; - - debugctlmsr = get_debugctlmsr(); - - debugctlmsr |= X86_DEBUGCTL_TR; - debugctlmsr |= X86_DEBUGCTL_BTS; - debugctlmsr |= X86_DEBUGCTL_BTINT; - - if (!(config & ARCH_PERFMON_EVENTSEL_OS)) - debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; - - if (!(config & ARCH_PERFMON_EVENTSEL_USR)) - debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; - - update_debugctlmsr(debugctlmsr); -} - -static void intel_pmu_disable_bts(void) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - unsigned long debugctlmsr; - - if (!cpuc->ds) - return; - - debugctlmsr = get_debugctlmsr(); - - debugctlmsr &= - ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | - X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); - - update_debugctlmsr(debugctlmsr); -} - /* * Setup the hardware configuration for a given attr_type */ @@ -1223,26 +542,6 @@ static int __hw_perf_event_init(struct perf_event *event) return 0; } -static void p6_pmu_disable_all(void) -{ - u64 val; - - /* p6 only has one enable register */ - rdmsrl(MSR_P6_EVNTSEL0, val); - val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; - wrmsrl(MSR_P6_EVNTSEL0, val); -} - -static void intel_pmu_disable_all(void) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); - - if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) - intel_pmu_disable_bts(); -} - static void x86_pmu_disable_all(void) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -1278,33 +577,6 @@ void hw_perf_disable(void) x86_pmu.disable_all(); } -static void p6_pmu_enable_all(void) -{ - unsigned long val; - - /* p6 only has one enable register */ - rdmsrl(MSR_P6_EVNTSEL0, val); - val |= ARCH_PERFMON_EVENTSEL0_ENABLE; - wrmsrl(MSR_P6_EVNTSEL0, val); -} - -static void intel_pmu_enable_all(void) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - - wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); - - if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { - struct perf_event *event = - cpuc->events[X86_PMC_IDX_FIXED_BTS]; - - if (WARN_ON_ONCE(!event)) - return; - - intel_pmu_enable_bts(event->hw.config); - } -} - static void x86_pmu_enable_all(void) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -1578,20 +850,6 @@ void hw_perf_enable(void) x86_pmu.enable_all(); } -static inline u64 intel_pmu_get_status(void) -{ - u64 status; - - rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); - - return status; -} - -static inline void intel_pmu_ack_status(u64 ack) -{ - wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); -} - static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) { (void)checking_wrmsrl(hwc->config_base + idx, @@ -1603,47 +861,6 @@ static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx) (void)checking_wrmsrl(hwc->config_base + idx, hwc->config); } -static inline void -intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx) -{ - int idx = __idx - X86_PMC_IDX_FIXED; - u64 ctrl_val, mask; - - mask = 0xfULL << (idx * 4); - - rdmsrl(hwc->config_base, ctrl_val); - ctrl_val &= ~mask; - (void)checking_wrmsrl(hwc->config_base, ctrl_val); -} - -static inline void -p6_pmu_disable_event(struct hw_perf_event *hwc, int idx) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - u64 val = P6_NOP_EVENT; - - if (cpuc->enabled) - val |= ARCH_PERFMON_EVENTSEL0_ENABLE; - - (void)checking_wrmsrl(hwc->config_base + idx, val); -} - -static inline void -intel_pmu_disable_event(struct hw_perf_event *hwc, int idx) -{ - if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { - intel_pmu_disable_bts(); - return; - } - - if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { - intel_pmu_disable_fixed(hwc, idx); - return; - } - - x86_pmu_disable_event(hwc, idx); -} - static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); /* @@ -1702,70 +919,6 @@ x86_perf_event_set_period(struct perf_event *event, return ret; } -static inline void -intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx) -{ - int idx = __idx - X86_PMC_IDX_FIXED; - u64 ctrl_val, bits, mask; - int err; - - /* - * Enable IRQ generation (0x8), - * and enable ring-3 counting (0x2) and ring-0 counting (0x1) - * if requested: - */ - bits = 0x8ULL; - if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) - bits |= 0x2; - if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) - bits |= 0x1; - - /* - * ANY bit is supported in v3 and up - */ - if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY) - bits |= 0x4; - - bits <<= (idx * 4); - mask = 0xfULL << (idx * 4); - - rdmsrl(hwc->config_base, ctrl_val); - ctrl_val &= ~mask; - ctrl_val |= bits; - err = checking_wrmsrl(hwc->config_base, ctrl_val); -} - -static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - u64 val; - - val = hwc->config; - if (cpuc->enabled) - val |= ARCH_PERFMON_EVENTSEL0_ENABLE; - - (void)checking_wrmsrl(hwc->config_base + idx, val); -} - - -static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx) -{ - if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { - if (!__get_cpu_var(cpu_hw_events).enabled) - return; - - intel_pmu_enable_bts(hwc->config); - return; - } - - if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { - intel_pmu_enable_fixed(hwc, idx); - return; - } - - __x86_pmu_enable_event(hwc, idx); -} - static void x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -1887,66 +1040,6 @@ void perf_event_print_debug(void) local_irq_restore(flags); } -static void intel_pmu_drain_bts_buffer(struct cpu_hw_events *cpuc) -{ - struct debug_store *ds = cpuc->ds; - struct bts_record { - u64 from; - u64 to; - u64 flags; - }; - struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; - struct bts_record *at, *top; - struct perf_output_handle handle; - struct perf_event_header header; - struct perf_sample_data data; - struct pt_regs regs; - - if (!event) - return; - - if (!ds) - return; - - at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; - top = (struct bts_record *)(unsigned long)ds->bts_index; - - if (top <= at) - return; - - ds->bts_index = ds->bts_buffer_base; - - - data.period = event->hw.last_period; - data.addr = 0; - data.raw = NULL; - regs.ip = 0; - - /* - * Prepare a generic sample, i.e. fill in the invariant fields. - * We will overwrite the from and to address before we output - * the sample. - */ - perf_prepare_sample(&header, &data, event, ®s); - - if (perf_output_begin(&handle, event, - header.size * (top - at), 1, 1)) - return; - - for (; at < top; at++) { - data.ip = at->from; - data.addr = at->to; - - perf_output_sample(&handle, &header, &data, event); - } - - perf_output_end(&handle); - - /* There's new data available. */ - event->hw.interrupts++; - event->pending_kill = POLL_IN; -} - static void x86_pmu_stop(struct perf_event *event) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -1966,10 +1059,6 @@ static void x86_pmu_stop(struct perf_event *event) */ x86_perf_event_update(event, hwc, idx); - /* Drain the remaining BTS records. */ - if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) - intel_pmu_drain_bts_buffer(cpuc); - cpuc->events[idx] = NULL; } @@ -1996,114 +1085,6 @@ static void x86_pmu_disable(struct perf_event *event) perf_event_update_userpage(event); } -/* - * Save and restart an expired event. Called by NMI contexts, - * so it has to be careful about preempting normal event ops: - */ -static int intel_pmu_save_and_restart(struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - int idx = hwc->idx; - int ret; - - x86_perf_event_update(event, hwc, idx); - ret = x86_perf_event_set_period(event, hwc, idx); - - return ret; -} - -static void intel_pmu_reset(void) -{ - struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds; - unsigned long flags; - int idx; - - if (!x86_pmu.num_events) - return; - - local_irq_save(flags); - - printk("clearing PMU state on CPU#%d\n", smp_processor_id()); - - for (idx = 0; idx < x86_pmu.num_events; idx++) { - checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); - checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); - } - for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { - checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); - } - if (ds) - ds->bts_index = ds->bts_buffer_base; - - local_irq_restore(flags); -} - -/* - * This handler is triggered by the local APIC, so the APIC IRQ handling - * rules apply: - */ -static int intel_pmu_handle_irq(struct pt_regs *regs) -{ - struct perf_sample_data data; - struct cpu_hw_events *cpuc; - int bit, loops; - u64 ack, status; - - data.addr = 0; - data.raw = NULL; - - cpuc = &__get_cpu_var(cpu_hw_events); - - perf_disable(); - intel_pmu_drain_bts_buffer(cpuc); - status = intel_pmu_get_status(); - if (!status) { - perf_enable(); - return 0; - } - - loops = 0; -again: - if (++loops > 100) { - WARN_ONCE(1, "perfevents: irq loop stuck!\n"); - perf_event_print_debug(); - intel_pmu_reset(); - perf_enable(); - return 1; - } - - inc_irq_stat(apic_perf_irqs); - ack = status; - for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { - struct perf_event *event = cpuc->events[bit]; - - clear_bit(bit, (unsigned long *) &status); - if (!test_bit(bit, cpuc->active_mask)) - continue; - - if (!intel_pmu_save_and_restart(event)) - continue; - - data.period = event->hw.last_period; - - if (perf_event_overflow(event, 1, &data, regs)) - intel_pmu_disable_event(&event->hw, bit); - } - - intel_pmu_ack_status(ack); - - /* - * Repeat if there is more work to be done: - */ - status = intel_pmu_get_status(); - if (status) - goto again; - - perf_enable(); - - return 1; -} - static int x86_pmu_handle_irq(struct pt_regs *regs) { struct perf_sample_data data; @@ -2216,37 +1197,20 @@ perf_event_nmi_handler(struct notifier_block *self, return NOTIFY_STOP; } +static __read_mostly struct notifier_block perf_event_nmi_notifier = { + .notifier_call = perf_event_nmi_handler, + .next = NULL, + .priority = 1 +}; + static struct event_constraint unconstrained; static struct event_constraint emptyconstraint; -static struct event_constraint bts_constraint = - EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); - -static struct event_constraint * -intel_special_constraints(struct perf_event *event) -{ - unsigned int hw_event; - - hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK; - - if (unlikely((hw_event == - x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && - (event->hw.sample_period == 1))) { - - return &bts_constraint; - } - return NULL; -} - static struct event_constraint * -intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) +x86_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) { struct event_constraint *c; - c = intel_special_constraints(event); - if (c) - return c; - if (x86_pmu.event_constraints) { for_each_event_constraint(c, x86_pmu.event_constraints) { if ((event->hw.config & c->cmask) == c->code) @@ -2257,148 +1221,6 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event return &unconstrained; } -/* - * AMD64 events are detected based on their event codes. - */ -static inline int amd_is_nb_event(struct hw_perf_event *hwc) -{ - return (hwc->config & 0xe0) == 0xe0; -} - -static void amd_put_event_constraints(struct cpu_hw_events *cpuc, - struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - struct amd_nb *nb = cpuc->amd_nb; - int i; - - /* - * only care about NB events - */ - if (!(nb && amd_is_nb_event(hwc))) - return; - - /* - * need to scan whole list because event may not have - * been assigned during scheduling - * - * no race condition possible because event can only - * be removed on one CPU at a time AND PMU is disabled - * when we come here - */ - for (i = 0; i < x86_pmu.num_events; i++) { - if (nb->owners[i] == event) { - cmpxchg(nb->owners+i, event, NULL); - break; - } - } -} - - /* - * AMD64 NorthBridge events need special treatment because - * counter access needs to be synchronized across all cores - * of a package. Refer to BKDG section 3.12 - * - * NB events are events measuring L3 cache, Hypertransport - * traffic. They are identified by an event code >= 0xe00. - * They measure events on the NorthBride which is shared - * by all cores on a package. NB events are counted on a - * shared set of counters. When a NB event is programmed - * in a counter, the data actually comes from a shared - * counter. Thus, access to those counters needs to be - * synchronized. - * - * We implement the synchronization such that no two cores - * can be measuring NB events using the same counters. Thus, - * we maintain a per-NB allocation table. The available slot - * is propagated using the event_constraint structure. - * - * We provide only one choice for each NB event based on - * the fact that only NB events have restrictions. Consequently, - * if a counter is available, there is a guarantee the NB event - * will be assigned to it. If no slot is available, an empty - * constraint is returned and scheduling will eventually fail - * for this event. - * - * Note that all cores attached the same NB compete for the same - * counters to host NB events, this is why we use atomic ops. Some - * multi-chip CPUs may have more than one NB. - * - * Given that resources are allocated (cmpxchg), they must be - * eventually freed for others to use. This is accomplished by - * calling amd_put_event_constraints(). - * - * Non NB events are not impacted by this restriction. - */ -static struct event_constraint * -amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) -{ - struct hw_perf_event *hwc = &event->hw; - struct amd_nb *nb = cpuc->amd_nb; - struct perf_event *old = NULL; - int max = x86_pmu.num_events; - int i, j, k = -1; - - /* - * if not NB event or no NB, then no constraints - */ - if (!(nb && amd_is_nb_event(hwc))) - return &unconstrained; - - /* - * detect if already present, if so reuse - * - * cannot merge with actual allocation - * because of possible holes - * - * event can already be present yet not assigned (in hwc->idx) - * because of successive calls to x86_schedule_events() from - * hw_perf_group_sched_in() without hw_perf_enable() - */ - for (i = 0; i < max; i++) { - /* - * keep track of first free slot - */ - if (k == -1 && !nb->owners[i]) - k = i; - - /* already present, reuse */ - if (nb->owners[i] == event) - goto done; - } - /* - * not present, so grab a new slot - * starting either at: - */ - if (hwc->idx != -1) { - /* previous assignment */ - i = hwc->idx; - } else if (k != -1) { - /* start from free slot found */ - i = k; - } else { - /* - * event not found, no slot found in - * first pass, try again from the - * beginning - */ - i = 0; - } - j = i; - do { - old = cmpxchg(nb->owners+i, NULL, event); - if (!old) - break; - if (++i == max) - i = 0; - } while (i != j); -done: - if (!old) - return &nb->event_constraints[i]; - - return &emptyconstraint; -} - static int x86_event_sched_in(struct perf_event *event, struct perf_cpu_context *cpuctx) { @@ -2509,335 +1331,9 @@ undo: return ret; } -static __read_mostly struct notifier_block perf_event_nmi_notifier = { - .notifier_call = perf_event_nmi_handler, - .next = NULL, - .priority = 1 -}; - -static __initconst struct x86_pmu p6_pmu = { - .name = "p6", - .handle_irq = x86_pmu_handle_irq, - .disable_all = p6_pmu_disable_all, - .enable_all = p6_pmu_enable_all, - .enable = p6_pmu_enable_event, - .disable = p6_pmu_disable_event, - .eventsel = MSR_P6_EVNTSEL0, - .perfctr = MSR_P6_PERFCTR0, - .event_map = p6_pmu_event_map, - .raw_event = p6_pmu_raw_event, - .max_events = ARRAY_SIZE(p6_perfmon_event_map), - .apic = 1, - .max_period = (1ULL << 31) - 1, - .version = 0, - .num_events = 2, - /* - * Events have 40 bits implemented. However they are designed such - * that bits [32-39] are sign extensions of bit 31. As such the - * effective width of a event for P6-like PMU is 32 bits only. - * - * See IA-32 Intel Architecture Software developer manual Vol 3B - */ - .event_bits = 32, - .event_mask = (1ULL << 32) - 1, - .get_event_constraints = intel_get_event_constraints, - .event_constraints = intel_p6_event_constraints -}; - -static __initconst struct x86_pmu core_pmu = { - .name = "core", - .handle_irq = x86_pmu_handle_irq, - .disable_all = x86_pmu_disable_all, - .enable_all = x86_pmu_enable_all, - .enable = x86_pmu_enable_event, - .disable = x86_pmu_disable_event, - .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, - .perfctr = MSR_ARCH_PERFMON_PERFCTR0, - .event_map = intel_pmu_event_map, - .raw_event = intel_pmu_raw_event, - .max_events = ARRAY_SIZE(intel_perfmon_event_map), - .apic = 1, - /* - * Intel PMCs cannot be accessed sanely above 32 bit width, - * so we install an artificial 1<<31 period regardless of - * the generic event period: - */ - .max_period = (1ULL << 31) - 1, - .get_event_constraints = intel_get_event_constraints, - .event_constraints = intel_core_event_constraints, -}; - -static __initconst struct x86_pmu intel_pmu = { - .name = "Intel", - .handle_irq = intel_pmu_handle_irq, - .disable_all = intel_pmu_disable_all, - .enable_all = intel_pmu_enable_all, - .enable = intel_pmu_enable_event, - .disable = intel_pmu_disable_event, - .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, - .perfctr = MSR_ARCH_PERFMON_PERFCTR0, - .event_map = intel_pmu_event_map, - .raw_event = intel_pmu_raw_event, - .max_events = ARRAY_SIZE(intel_perfmon_event_map), - .apic = 1, - /* - * Intel PMCs cannot be accessed sanely above 32 bit width, - * so we install an artificial 1<<31 period regardless of - * the generic event period: - */ - .max_period = (1ULL << 31) - 1, - .enable_bts = intel_pmu_enable_bts, - .disable_bts = intel_pmu_disable_bts, - .get_event_constraints = intel_get_event_constraints -}; - -static __initconst struct x86_pmu amd_pmu = { - .name = "AMD", - .handle_irq = x86_pmu_handle_irq, - .disable_all = x86_pmu_disable_all, - .enable_all = x86_pmu_enable_all, - .enable = x86_pmu_enable_event, - .disable = x86_pmu_disable_event, - .eventsel = MSR_K7_EVNTSEL0, - .perfctr = MSR_K7_PERFCTR0, - .event_map = amd_pmu_event_map, - .raw_event = amd_pmu_raw_event, - .max_events = ARRAY_SIZE(amd_perfmon_event_map), - .num_events = 4, - .event_bits = 48, - .event_mask = (1ULL << 48) - 1, - .apic = 1, - /* use highest bit to detect overflow */ - .max_period = (1ULL << 47) - 1, - .get_event_constraints = amd_get_event_constraints, - .put_event_constraints = amd_put_event_constraints -}; - -static __init int p6_pmu_init(void) -{ - switch (boot_cpu_data.x86_model) { - case 1: - case 3: /* Pentium Pro */ - case 5: - case 6: /* Pentium II */ - case 7: - case 8: - case 11: /* Pentium III */ - case 9: - case 13: - /* Pentium M */ - break; - default: - pr_cont("unsupported p6 CPU model %d ", - boot_cpu_data.x86_model); - return -ENODEV; - } - - x86_pmu = p6_pmu; - - return 0; -} - -static __init int intel_pmu_init(void) -{ - union cpuid10_edx edx; - union cpuid10_eax eax; - unsigned int unused; - unsigned int ebx; - int version; - - if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { - /* check for P6 processor family */ - if (boot_cpu_data.x86 == 6) { - return p6_pmu_init(); - } else { - return -ENODEV; - } - } - - /* - * Check whether the Architectural PerfMon supports - * Branch Misses Retired hw_event or not. - */ - cpuid(10, &eax.full, &ebx, &unused, &edx.full); - if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) - return -ENODEV; - - version = eax.split.version_id; - if (version < 2) - x86_pmu = core_pmu; - else - x86_pmu = intel_pmu; - - x86_pmu.version = version; - x86_pmu.num_events = eax.split.num_events; - x86_pmu.event_bits = eax.split.bit_width; - x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1; - - /* - * Quirk: v2 perfmon does not report fixed-purpose events, so - * assume at least 3 events: - */ - if (version > 1) - x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); - - /* - * Install the hw-cache-events table: - */ - switch (boot_cpu_data.x86_model) { - case 14: /* 65 nm core solo/duo, "Yonah" */ - pr_cont("Core events, "); - break; - - case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ - case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ - case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ - case 29: /* six-core 45 nm xeon "Dunnington" */ - memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, - sizeof(hw_cache_event_ids)); - - x86_pmu.event_constraints = intel_core2_event_constraints; - pr_cont("Core2 events, "); - break; - - case 26: /* 45 nm nehalem, "Bloomfield" */ - case 30: /* 45 nm nehalem, "Lynnfield" */ - memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, - sizeof(hw_cache_event_ids)); - - x86_pmu.event_constraints = intel_nehalem_event_constraints; - pr_cont("Nehalem/Corei7 events, "); - break; - case 28: - memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, - sizeof(hw_cache_event_ids)); - - x86_pmu.event_constraints = intel_gen_event_constraints; - pr_cont("Atom events, "); - break; - - case 37: /* 32 nm nehalem, "Clarkdale" */ - case 44: /* 32 nm nehalem, "Gulftown" */ - memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, - sizeof(hw_cache_event_ids)); - - x86_pmu.event_constraints = intel_westmere_event_constraints; - pr_cont("Westmere events, "); - break; - default: - /* - * default constraints for v2 and up - */ - x86_pmu.event_constraints = intel_gen_event_constraints; - pr_cont("generic architected perfmon, "); - } - return 0; -} - -static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) -{ - struct amd_nb *nb; - int i; - - nb = kmalloc(sizeof(struct amd_nb), GFP_KERNEL); - if (!nb) - return NULL; - - memset(nb, 0, sizeof(*nb)); - nb->nb_id = nb_id; - - /* - * initialize all possible NB constraints - */ - for (i = 0; i < x86_pmu.num_events; i++) { - set_bit(i, nb->event_constraints[i].idxmsk); - nb->event_constraints[i].weight = 1; - } - return nb; -} - -static void amd_pmu_cpu_online(int cpu) -{ - struct cpu_hw_events *cpu1, *cpu2; - struct amd_nb *nb = NULL; - int i, nb_id; - - if (boot_cpu_data.x86_max_cores < 2) - return; - - /* - * function may be called too early in the - * boot process, in which case nb_id is bogus - */ - nb_id = amd_get_nb_id(cpu); - if (nb_id == BAD_APICID) - return; - - cpu1 = &per_cpu(cpu_hw_events, cpu); - cpu1->amd_nb = NULL; - - raw_spin_lock(&amd_nb_lock); - - for_each_online_cpu(i) { - cpu2 = &per_cpu(cpu_hw_events, i); - nb = cpu2->amd_nb; - if (!nb) - continue; - if (nb->nb_id == nb_id) - goto found; - } - - nb = amd_alloc_nb(cpu, nb_id); - if (!nb) { - pr_err("perf_events: failed NB allocation for CPU%d\n", cpu); - raw_spin_unlock(&amd_nb_lock); - return; - } -found: - nb->refcnt++; - cpu1->amd_nb = nb; - - raw_spin_unlock(&amd_nb_lock); -} - -static void amd_pmu_cpu_offline(int cpu) -{ - struct cpu_hw_events *cpuhw; - - if (boot_cpu_data.x86_max_cores < 2) - return; - - cpuhw = &per_cpu(cpu_hw_events, cpu); - - raw_spin_lock(&amd_nb_lock); - - if (--cpuhw->amd_nb->refcnt == 0) - kfree(cpuhw->amd_nb); - - cpuhw->amd_nb = NULL; - - raw_spin_unlock(&amd_nb_lock); -} - -static __init int amd_pmu_init(void) -{ - /* Performance-monitoring supported from K7 and later: */ - if (boot_cpu_data.x86 < 6) - return -ENODEV; - - x86_pmu = amd_pmu; - - /* Events are common for all AMDs */ - memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, - sizeof(hw_cache_event_ids)); - - /* - * explicitly initialize the boot cpu, other cpus will get - * the cpu hotplug callbacks from smp_init() - */ - amd_pmu_cpu_online(smp_processor_id()); - return 0; -} +#include "perf_event_amd.c" +#include "perf_event_p6.c" +#include "perf_event_intel.c" static void __init pmu_check_apic(void) { diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c new file mode 100644 index 0000000..6d28e08 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -0,0 +1,416 @@ +#ifdef CONFIG_CPU_SUP_AMD + +static raw_spinlock_t amd_nb_lock; + +static __initconst u64 amd_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ + [ C(RESULT_MISS) ] = 0x0041, /* Data Cache Misses */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0142, /* Data Cache Refills :system */ + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0267, /* Data Prefetcher :attempts */ + [ C(RESULT_MISS) ] = 0x0167, /* Data Prefetcher :cancelled */ + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction cache fetches */ + [ C(RESULT_MISS) ] = 0x0081, /* Instruction cache misses */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x014B, /* Prefetch Instructions :Load */ + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x037D, /* Requests to L2 Cache :IC+DC */ + [ C(RESULT_MISS) ] = 0x037E, /* L2 Cache Misses : IC+DC */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x017F, /* L2 Fill/Writeback */ + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0040, /* Data Cache Accesses */ + [ C(RESULT_MISS) ] = 0x0046, /* L1 DTLB and L2 DLTB Miss */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0080, /* Instruction fecthes */ + [ C(RESULT_MISS) ] = 0x0085, /* Instr. fetch ITLB misses */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c2, /* Retired Branch Instr. */ + [ C(RESULT_MISS) ] = 0x00c3, /* Retired Mispredicted BI */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + +/* + * AMD Performance Monitor K7 and later. + */ +static const u64 amd_perfmon_event_map[] = +{ + [PERF_COUNT_HW_CPU_CYCLES] = 0x0076, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0080, + [PERF_COUNT_HW_CACHE_MISSES] = 0x0081, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, +}; + +static u64 amd_pmu_event_map(int hw_event) +{ + return amd_perfmon_event_map[hw_event]; +} + +static u64 amd_pmu_raw_event(u64 hw_event) +{ +#define K7_EVNTSEL_EVENT_MASK 0xF000000FFULL +#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL +#define K7_EVNTSEL_EDGE_MASK 0x000040000ULL +#define K7_EVNTSEL_INV_MASK 0x000800000ULL +#define K7_EVNTSEL_REG_MASK 0x0FF000000ULL + +#define K7_EVNTSEL_MASK \ + (K7_EVNTSEL_EVENT_MASK | \ + K7_EVNTSEL_UNIT_MASK | \ + K7_EVNTSEL_EDGE_MASK | \ + K7_EVNTSEL_INV_MASK | \ + K7_EVNTSEL_REG_MASK) + + return hw_event & K7_EVNTSEL_MASK; +} + +/* + * AMD64 events are detected based on their event codes. + */ +static inline int amd_is_nb_event(struct hw_perf_event *hwc) +{ + return (hwc->config & 0xe0) == 0xe0; +} + +static void amd_put_event_constraints(struct cpu_hw_events *cpuc, + struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct amd_nb *nb = cpuc->amd_nb; + int i; + + /* + * only care about NB events + */ + if (!(nb && amd_is_nb_event(hwc))) + return; + + /* + * need to scan whole list because event may not have + * been assigned during scheduling + * + * no race condition possible because event can only + * be removed on one CPU at a time AND PMU is disabled + * when we come here + */ + for (i = 0; i < x86_pmu.num_events; i++) { + if (nb->owners[i] == event) { + cmpxchg(nb->owners+i, event, NULL); + break; + } + } +} + + /* + * AMD64 NorthBridge events need special treatment because + * counter access needs to be synchronized across all cores + * of a package. Refer to BKDG section 3.12 + * + * NB events are events measuring L3 cache, Hypertransport + * traffic. They are identified by an event code >= 0xe00. + * They measure events on the NorthBride which is shared + * by all cores on a package. NB events are counted on a + * shared set of counters. When a NB event is programmed + * in a counter, the data actually comes from a shared + * counter. Thus, access to those counters needs to be + * synchronized. + * + * We implement the synchronization such that no two cores + * can be measuring NB events using the same counters. Thus, + * we maintain a per-NB allocation table. The available slot + * is propagated using the event_constraint structure. + * + * We provide only one choice for each NB event based on + * the fact that only NB events have restrictions. Consequently, + * if a counter is available, there is a guarantee the NB event + * will be assigned to it. If no slot is available, an empty + * constraint is returned and scheduling will eventually fail + * for this event. + * + * Note that all cores attached the same NB compete for the same + * counters to host NB events, this is why we use atomic ops. Some + * multi-chip CPUs may have more than one NB. + * + * Given that resources are allocated (cmpxchg), they must be + * eventually freed for others to use. This is accomplished by + * calling amd_put_event_constraints(). + * + * Non NB events are not impacted by this restriction. + */ +static struct event_constraint * +amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + struct amd_nb *nb = cpuc->amd_nb; + struct perf_event *old = NULL; + int max = x86_pmu.num_events; + int i, j, k = -1; + + /* + * if not NB event or no NB, then no constraints + */ + if (!(nb && amd_is_nb_event(hwc))) + return &unconstrained; + + /* + * detect if already present, if so reuse + * + * cannot merge with actual allocation + * because of possible holes + * + * event can already be present yet not assigned (in hwc->idx) + * because of successive calls to x86_schedule_events() from + * hw_perf_group_sched_in() without hw_perf_enable() + */ + for (i = 0; i < max; i++) { + /* + * keep track of first free slot + */ + if (k == -1 && !nb->owners[i]) + k = i; + + /* already present, reuse */ + if (nb->owners[i] == event) + goto done; + } + /* + * not present, so grab a new slot + * starting either at: + */ + if (hwc->idx != -1) { + /* previous assignment */ + i = hwc->idx; + } else if (k != -1) { + /* start from free slot found */ + i = k; + } else { + /* + * event not found, no slot found in + * first pass, try again from the + * beginning + */ + i = 0; + } + j = i; + do { + old = cmpxchg(nb->owners+i, NULL, event); + if (!old) + break; + if (++i == max) + i = 0; + } while (i != j); +done: + if (!old) + return &nb->event_constraints[i]; + + return &emptyconstraint; +} + +static __initconst struct x86_pmu amd_pmu = { + .name = "AMD", + .handle_irq = x86_pmu_handle_irq, + .disable_all = x86_pmu_disable_all, + .enable_all = x86_pmu_enable_all, + .enable = x86_pmu_enable_event, + .disable = x86_pmu_disable_event, + .eventsel = MSR_K7_EVNTSEL0, + .perfctr = MSR_K7_PERFCTR0, + .event_map = amd_pmu_event_map, + .raw_event = amd_pmu_raw_event, + .max_events = ARRAY_SIZE(amd_perfmon_event_map), + .num_events = 4, + .event_bits = 48, + .event_mask = (1ULL << 48) - 1, + .apic = 1, + /* use highest bit to detect overflow */ + .max_period = (1ULL << 47) - 1, + .get_event_constraints = amd_get_event_constraints, + .put_event_constraints = amd_put_event_constraints +}; + +static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) +{ + struct amd_nb *nb; + int i; + + nb = kmalloc(sizeof(struct amd_nb), GFP_KERNEL); + if (!nb) + return NULL; + + memset(nb, 0, sizeof(*nb)); + nb->nb_id = nb_id; + + /* + * initialize all possible NB constraints + */ + for (i = 0; i < x86_pmu.num_events; i++) { + set_bit(i, nb->event_constraints[i].idxmsk); + nb->event_constraints[i].weight = 1; + } + return nb; +} + +static void amd_pmu_cpu_online(int cpu) +{ + struct cpu_hw_events *cpu1, *cpu2; + struct amd_nb *nb = NULL; + int i, nb_id; + + if (boot_cpu_data.x86_max_cores < 2) + return; + + /* + * function may be called too early in the + * boot process, in which case nb_id is bogus + */ + nb_id = amd_get_nb_id(cpu); + if (nb_id == BAD_APICID) + return; + + cpu1 = &per_cpu(cpu_hw_events, cpu); + cpu1->amd_nb = NULL; + + raw_spin_lock(&amd_nb_lock); + + for_each_online_cpu(i) { + cpu2 = &per_cpu(cpu_hw_events, i); + nb = cpu2->amd_nb; + if (!nb) + continue; + if (nb->nb_id == nb_id) + goto found; + } + + nb = amd_alloc_nb(cpu, nb_id); + if (!nb) { + pr_err("perf_events: failed NB allocation for CPU%d\n", cpu); + raw_spin_unlock(&amd_nb_lock); + return; + } +found: + nb->refcnt++; + cpu1->amd_nb = nb; + + raw_spin_unlock(&amd_nb_lock); +} + +static void amd_pmu_cpu_offline(int cpu) +{ + struct cpu_hw_events *cpuhw; + + if (boot_cpu_data.x86_max_cores < 2) + return; + + cpuhw = &per_cpu(cpu_hw_events, cpu); + + raw_spin_lock(&amd_nb_lock); + + if (--cpuhw->amd_nb->refcnt == 0) + kfree(cpuhw->amd_nb); + + cpuhw->amd_nb = NULL; + + raw_spin_unlock(&amd_nb_lock); +} + +static __init int amd_pmu_init(void) +{ + /* Performance-monitoring supported from K7 and later: */ + if (boot_cpu_data.x86 < 6) + return -ENODEV; + + x86_pmu = amd_pmu; + + /* Events are common for all AMDs */ + memcpy(hw_cache_event_ids, amd_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + + /* + * explicitly initialize the boot cpu, other cpus will get + * the cpu hotplug callbacks from smp_init() + */ + amd_pmu_cpu_online(smp_processor_id()); + return 0; +} + +#else /* CONFIG_CPU_SUP_AMD */ + +static int amd_pmu_init(void) +{ + return 0; +} + +static void amd_pmu_cpu_online(int cpu) +{ +} + +static void amd_pmu_cpu_offline(int cpu) +{ +} + +#endif diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c new file mode 100644 index 0000000..cf6590c --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -0,0 +1,971 @@ +#ifdef CONFIG_CPU_SUP_INTEL + +/* + * Intel PerfMon v3. Used on Core2 and later. + */ +static const u64 intel_perfmon_event_map[] = +{ + [PERF_COUNT_HW_CPU_CYCLES] = 0x003c, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x4f2e, + [PERF_COUNT_HW_CACHE_MISSES] = 0x412e, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, + [PERF_COUNT_HW_BUS_CYCLES] = 0x013c, +}; + +static struct event_constraint intel_core_event_constraints[] = +{ + INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ + INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ + INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ + INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ + INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ + INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FP_COMP_INSTR_RET */ + EVENT_CONSTRAINT_END +}; + +static struct event_constraint intel_core2_event_constraints[] = +{ + FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ + FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ + INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ + INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ + INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ + INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ + INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ + INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ + INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ + INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ + INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ + EVENT_CONSTRAINT_END +}; + +static struct event_constraint intel_nehalem_event_constraints[] = +{ + FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ + FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ + INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ + INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ + INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ + INTEL_EVENT_CONSTRAINT(0x43, 0x3), /* L1D_ALL_REF */ + INTEL_EVENT_CONSTRAINT(0x48, 0x3), /* L1D_PEND_MISS */ + INTEL_EVENT_CONSTRAINT(0x4e, 0x3), /* L1D_PREFETCH */ + INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ + INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ + EVENT_CONSTRAINT_END +}; + +static struct event_constraint intel_westmere_event_constraints[] = +{ + FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ + FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ + INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ + INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ + INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ + EVENT_CONSTRAINT_END +}; + +static struct event_constraint intel_gen_event_constraints[] = +{ + FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ + FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ + EVENT_CONSTRAINT_END +}; + +static u64 intel_pmu_event_map(int hw_event) +{ + return intel_perfmon_event_map[hw_event]; +} + +static __initconst u64 westmere_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ + [ C(RESULT_MISS) ] = 0x0151, /* L1D.REPL */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ + [ C(RESULT_MISS) ] = 0x0251, /* L1D.M_REPL */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ + [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ + [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ + [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ + [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ + [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x010b, /* MEM_INST_RETIRED.LOADS */ + [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x020b, /* MEM_INST_RETURED.STORES */ + [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ + [ C(RESULT_MISS) ] = 0x0185, /* ITLB_MISSES.ANY */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ + [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + +static __initconst u64 nehalem_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ + [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ + [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x014e, /* L1D_PREFETCH.REQUESTS */ + [ C(RESULT_MISS) ] = 0x024e, /* L1D_PREFETCH.MISS */ + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ + [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0324, /* L2_RQSTS.LOADS */ + [ C(RESULT_MISS) ] = 0x0224, /* L2_RQSTS.LD_MISS */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0c24, /* L2_RQSTS.RFOS */ + [ C(RESULT_MISS) ] = 0x0824, /* L2_RQSTS.RFO_MISS */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x4f2e, /* LLC Reference */ + [ C(RESULT_MISS) ] = 0x412e, /* LLC Misses */ + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ + [ C(RESULT_MISS) ] = 0x0108, /* DTLB_LOAD_MISSES.ANY */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ + [ C(RESULT_MISS) ] = 0x010c, /* MEM_STORE_RETIRED.DTLB_MISS */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0x0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x01c0, /* INST_RETIRED.ANY_P */ + [ C(RESULT_MISS) ] = 0x20c8, /* ITLB_MISS_RETIRED */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ALL_BRANCHES */ + [ C(RESULT_MISS) ] = 0x03e8, /* BPU_CLEARS.ANY */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + +static __initconst u64 core2_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI */ + [ C(RESULT_MISS) ] = 0x0140, /* L1D_CACHE_LD.I_STATE */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI */ + [ C(RESULT_MISS) ] = 0x0141, /* L1D_CACHE_ST.I_STATE */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x104e, /* L1D_PREFETCH.REQUESTS */ + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0080, /* L1I.READS */ + [ C(RESULT_MISS) ] = 0x0081, /* L1I.MISSES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ + [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ + [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0f40, /* L1D_CACHE_LD.MESI (alias) */ + [ C(RESULT_MISS) ] = 0x0208, /* DTLB_MISSES.MISS_LD */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0f41, /* L1D_CACHE_ST.MESI (alias) */ + [ C(RESULT_MISS) ] = 0x0808, /* DTLB_MISSES.MISS_ST */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ + [ C(RESULT_MISS) ] = 0x1282, /* ITLBMISSES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ + [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + +static __initconst u64 atom_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE.LD */ + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE.ST */ + [ C(RESULT_MISS) ] = 0, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(L1I ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0380, /* L1I.READS */ + [ C(RESULT_MISS) ] = 0x0280, /* L1I.MISSES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x4f29, /* L2_LD.MESI */ + [ C(RESULT_MISS) ] = 0x4129, /* L2_LD.ISTATE */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x4f2A, /* L2_ST.MESI */ + [ C(RESULT_MISS) ] = 0x412A, /* L2_ST.ISTATE */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x2140, /* L1D_CACHE_LD.MESI (alias) */ + [ C(RESULT_MISS) ] = 0x0508, /* DTLB_MISSES.MISS_LD */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x2240, /* L1D_CACHE_ST.MESI (alias) */ + [ C(RESULT_MISS) ] = 0x0608, /* DTLB_MISSES.MISS_ST */ + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = 0, + [ C(RESULT_MISS) ] = 0, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c0, /* INST_RETIRED.ANY_P */ + [ C(RESULT_MISS) ] = 0x0282, /* ITLB.MISSES */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, + [ C(BPU ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x00c4, /* BR_INST_RETIRED.ANY */ + [ C(RESULT_MISS) ] = 0x00c5, /* BP_INST_RETIRED.MISPRED */ + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + +static u64 intel_pmu_raw_event(u64 hw_event) +{ +#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL +#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL +#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL +#define CORE_EVNTSEL_INV_MASK 0x00800000ULL +#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL + +#define CORE_EVNTSEL_MASK \ + (INTEL_ARCH_EVTSEL_MASK | \ + INTEL_ARCH_UNIT_MASK | \ + INTEL_ARCH_EDGE_MASK | \ + INTEL_ARCH_INV_MASK | \ + INTEL_ARCH_CNT_MASK) + + return hw_event & CORE_EVNTSEL_MASK; +} + +static void intel_pmu_enable_bts(u64 config) +{ + unsigned long debugctlmsr; + + debugctlmsr = get_debugctlmsr(); + + debugctlmsr |= X86_DEBUGCTL_TR; + debugctlmsr |= X86_DEBUGCTL_BTS; + debugctlmsr |= X86_DEBUGCTL_BTINT; + + if (!(config & ARCH_PERFMON_EVENTSEL_OS)) + debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; + + if (!(config & ARCH_PERFMON_EVENTSEL_USR)) + debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; + + update_debugctlmsr(debugctlmsr); +} + +static void intel_pmu_disable_bts(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + unsigned long debugctlmsr; + + if (!cpuc->ds) + return; + + debugctlmsr = get_debugctlmsr(); + + debugctlmsr &= + ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | + X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); + + update_debugctlmsr(debugctlmsr); +} + +static void intel_pmu_disable_all(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0); + + if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) + intel_pmu_disable_bts(); +} + +static void intel_pmu_enable_all(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); + + if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { + struct perf_event *event = + cpuc->events[X86_PMC_IDX_FIXED_BTS]; + + if (WARN_ON_ONCE(!event)) + return; + + intel_pmu_enable_bts(event->hw.config); + } +} + +static inline u64 intel_pmu_get_status(void) +{ + u64 status; + + rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); + + return status; +} + +static inline void intel_pmu_ack_status(u64 ack) +{ + wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); +} + +static inline void +intel_pmu_disable_fixed(struct hw_perf_event *hwc, int __idx) +{ + int idx = __idx - X86_PMC_IDX_FIXED; + u64 ctrl_val, mask; + + mask = 0xfULL << (idx * 4); + + rdmsrl(hwc->config_base, ctrl_val); + ctrl_val &= ~mask; + (void)checking_wrmsrl(hwc->config_base, ctrl_val); +} + +static void intel_pmu_drain_bts_buffer(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct debug_store *ds = cpuc->ds; + struct bts_record { + u64 from; + u64 to; + u64 flags; + }; + struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; + struct bts_record *at, *top; + struct perf_output_handle handle; + struct perf_event_header header; + struct perf_sample_data data; + struct pt_regs regs; + + if (!event) + return; + + if (!ds) + return; + + at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; + top = (struct bts_record *)(unsigned long)ds->bts_index; + + if (top <= at) + return; + + ds->bts_index = ds->bts_buffer_base; + + + data.period = event->hw.last_period; + data.addr = 0; + data.raw = NULL; + regs.ip = 0; + + /* + * Prepare a generic sample, i.e. fill in the invariant fields. + * We will overwrite the from and to address before we output + * the sample. + */ + perf_prepare_sample(&header, &data, event, ®s); + + if (perf_output_begin(&handle, event, + header.size * (top - at), 1, 1)) + return; + + for (; at < top; at++) { + data.ip = at->from; + data.addr = at->to; + + perf_output_sample(&handle, &header, &data, event); + } + + perf_output_end(&handle); + + /* There's new data available. */ + event->hw.interrupts++; + event->pending_kill = POLL_IN; +} + +static inline void +intel_pmu_disable_event(struct hw_perf_event *hwc, int idx) +{ + if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { + intel_pmu_disable_bts(); + intel_pmu_drain_bts_buffer(); + return; + } + + if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { + intel_pmu_disable_fixed(hwc, idx); + return; + } + + x86_pmu_disable_event(hwc, idx); +} + +static inline void +intel_pmu_enable_fixed(struct hw_perf_event *hwc, int __idx) +{ + int idx = __idx - X86_PMC_IDX_FIXED; + u64 ctrl_val, bits, mask; + int err; + + /* + * Enable IRQ generation (0x8), + * and enable ring-3 counting (0x2) and ring-0 counting (0x1) + * if requested: + */ + bits = 0x8ULL; + if (hwc->config & ARCH_PERFMON_EVENTSEL_USR) + bits |= 0x2; + if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) + bits |= 0x1; + + /* + * ANY bit is supported in v3 and up + */ + if (x86_pmu.version > 2 && hwc->config & ARCH_PERFMON_EVENTSEL_ANY) + bits |= 0x4; + + bits <<= (idx * 4); + mask = 0xfULL << (idx * 4); + + rdmsrl(hwc->config_base, ctrl_val); + ctrl_val &= ~mask; + ctrl_val |= bits; + err = checking_wrmsrl(hwc->config_base, ctrl_val); +} + +static void intel_pmu_enable_event(struct hw_perf_event *hwc, int idx) +{ + if (unlikely(idx == X86_PMC_IDX_FIXED_BTS)) { + if (!__get_cpu_var(cpu_hw_events).enabled) + return; + + intel_pmu_enable_bts(hwc->config); + return; + } + + if (unlikely(hwc->config_base == MSR_ARCH_PERFMON_FIXED_CTR_CTRL)) { + intel_pmu_enable_fixed(hwc, idx); + return; + } + + __x86_pmu_enable_event(hwc, idx); +} + +/* + * Save and restart an expired event. Called by NMI contexts, + * so it has to be careful about preempting normal event ops: + */ +static int intel_pmu_save_and_restart(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int idx = hwc->idx; + int ret; + + x86_perf_event_update(event, hwc, idx); + ret = x86_perf_event_set_period(event, hwc, idx); + + return ret; +} + +static void intel_pmu_reset(void) +{ + struct debug_store *ds = __get_cpu_var(cpu_hw_events).ds; + unsigned long flags; + int idx; + + if (!x86_pmu.num_events) + return; + + local_irq_save(flags); + + printk("clearing PMU state on CPU#%d\n", smp_processor_id()); + + for (idx = 0; idx < x86_pmu.num_events; idx++) { + checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); + checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); + } + for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { + checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); + } + if (ds) + ds->bts_index = ds->bts_buffer_base; + + local_irq_restore(flags); +} + +/* + * This handler is triggered by the local APIC, so the APIC IRQ handling + * rules apply: + */ +static int intel_pmu_handle_irq(struct pt_regs *regs) +{ + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + int bit, loops; + u64 ack, status; + + data.addr = 0; + data.raw = NULL; + + cpuc = &__get_cpu_var(cpu_hw_events); + + perf_disable(); + intel_pmu_drain_bts_buffer(); + status = intel_pmu_get_status(); + if (!status) { + perf_enable(); + return 0; + } + + loops = 0; +again: + if (++loops > 100) { + WARN_ONCE(1, "perfevents: irq loop stuck!\n"); + perf_event_print_debug(); + intel_pmu_reset(); + perf_enable(); + return 1; + } + + inc_irq_stat(apic_perf_irqs); + ack = status; + for_each_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { + struct perf_event *event = cpuc->events[bit]; + + clear_bit(bit, (unsigned long *) &status); + if (!test_bit(bit, cpuc->active_mask)) + continue; + + if (!intel_pmu_save_and_restart(event)) + continue; + + data.period = event->hw.last_period; + + if (perf_event_overflow(event, 1, &data, regs)) + intel_pmu_disable_event(&event->hw, bit); + } + + intel_pmu_ack_status(ack); + + /* + * Repeat if there is more work to be done: + */ + status = intel_pmu_get_status(); + if (status) + goto again; + + perf_enable(); + + return 1; +} + +static struct event_constraint bts_constraint = + EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); + +static struct event_constraint * +intel_special_constraints(struct perf_event *event) +{ + unsigned int hw_event; + + hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK; + + if (unlikely((hw_event == + x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && + (event->hw.sample_period == 1))) { + + return &bts_constraint; + } + return NULL; +} + +static struct event_constraint * +intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) +{ + struct event_constraint *c; + + c = intel_special_constraints(event); + if (c) + return c; + + return x86_get_event_constraints(cpuc, event); +} + +static __initconst struct x86_pmu core_pmu = { + .name = "core", + .handle_irq = x86_pmu_handle_irq, + .disable_all = x86_pmu_disable_all, + .enable_all = x86_pmu_enable_all, + .enable = x86_pmu_enable_event, + .disable = x86_pmu_disable_event, + .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, + .perfctr = MSR_ARCH_PERFMON_PERFCTR0, + .event_map = intel_pmu_event_map, + .raw_event = intel_pmu_raw_event, + .max_events = ARRAY_SIZE(intel_perfmon_event_map), + .apic = 1, + /* + * Intel PMCs cannot be accessed sanely above 32 bit width, + * so we install an artificial 1<<31 period regardless of + * the generic event period: + */ + .max_period = (1ULL << 31) - 1, + .get_event_constraints = intel_get_event_constraints, + .event_constraints = intel_core_event_constraints, +}; + +static __initconst struct x86_pmu intel_pmu = { + .name = "Intel", + .handle_irq = intel_pmu_handle_irq, + .disable_all = intel_pmu_disable_all, + .enable_all = intel_pmu_enable_all, + .enable = intel_pmu_enable_event, + .disable = intel_pmu_disable_event, + .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, + .perfctr = MSR_ARCH_PERFMON_PERFCTR0, + .event_map = intel_pmu_event_map, + .raw_event = intel_pmu_raw_event, + .max_events = ARRAY_SIZE(intel_perfmon_event_map), + .apic = 1, + /* + * Intel PMCs cannot be accessed sanely above 32 bit width, + * so we install an artificial 1<<31 period regardless of + * the generic event period: + */ + .max_period = (1ULL << 31) - 1, + .enable_bts = intel_pmu_enable_bts, + .disable_bts = intel_pmu_disable_bts, + .get_event_constraints = intel_get_event_constraints +}; + +static __init int intel_pmu_init(void) +{ + union cpuid10_edx edx; + union cpuid10_eax eax; + unsigned int unused; + unsigned int ebx; + int version; + + if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { + /* check for P6 processor family */ + if (boot_cpu_data.x86 == 6) { + return p6_pmu_init(); + } else { + return -ENODEV; + } + } + + /* + * Check whether the Architectural PerfMon supports + * Branch Misses Retired hw_event or not. + */ + cpuid(10, &eax.full, &ebx, &unused, &edx.full); + if (eax.split.mask_length <= ARCH_PERFMON_BRANCH_MISSES_RETIRED) + return -ENODEV; + + version = eax.split.version_id; + if (version < 2) + x86_pmu = core_pmu; + else + x86_pmu = intel_pmu; + + x86_pmu.version = version; + x86_pmu.num_events = eax.split.num_events; + x86_pmu.event_bits = eax.split.bit_width; + x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1; + + /* + * Quirk: v2 perfmon does not report fixed-purpose events, so + * assume at least 3 events: + */ + if (version > 1) + x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); + + /* + * Install the hw-cache-events table: + */ + switch (boot_cpu_data.x86_model) { + case 14: /* 65 nm core solo/duo, "Yonah" */ + pr_cont("Core events, "); + break; + + case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ + case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ + case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ + case 29: /* six-core 45 nm xeon "Dunnington" */ + memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + + x86_pmu.event_constraints = intel_core2_event_constraints; + pr_cont("Core2 events, "); + break; + + case 26: /* 45 nm nehalem, "Bloomfield" */ + case 30: /* 45 nm nehalem, "Lynnfield" */ + memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + + x86_pmu.event_constraints = intel_nehalem_event_constraints; + pr_cont("Nehalem/Corei7 events, "); + break; + case 28: + memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + + x86_pmu.event_constraints = intel_gen_event_constraints; + pr_cont("Atom events, "); + break; + + case 37: /* 32 nm nehalem, "Clarkdale" */ + case 44: /* 32 nm nehalem, "Gulftown" */ + memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + + x86_pmu.event_constraints = intel_westmere_event_constraints; + pr_cont("Westmere events, "); + break; + default: + /* + * default constraints for v2 and up + */ + x86_pmu.event_constraints = intel_gen_event_constraints; + pr_cont("generic architected perfmon, "); + } + return 0; +} + +#else /* CONFIG_CPU_SUP_INTEL */ + +static int intel_pmu_init(void) +{ + return 0; +} + +#endif /* CONFIG_CPU_SUP_INTEL */ diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c new file mode 100644 index 0000000..1ca5ba0 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_p6.c @@ -0,0 +1,157 @@ +#ifdef CONFIG_CPU_SUP_INTEL + +/* + * Not sure about some of these + */ +static const u64 p6_perfmon_event_map[] = +{ + [PERF_COUNT_HW_CPU_CYCLES] = 0x0079, + [PERF_COUNT_HW_INSTRUCTIONS] = 0x00c0, + [PERF_COUNT_HW_CACHE_REFERENCES] = 0x0f2e, + [PERF_COUNT_HW_CACHE_MISSES] = 0x012e, + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = 0x00c4, + [PERF_COUNT_HW_BRANCH_MISSES] = 0x00c5, + [PERF_COUNT_HW_BUS_CYCLES] = 0x0062, +}; + +static u64 p6_pmu_event_map(int hw_event) +{ + return p6_perfmon_event_map[hw_event]; +} + +/* + * Event setting that is specified not to count anything. + * We use this to effectively disable a counter. + * + * L2_RQSTS with 0 MESI unit mask. + */ +#define P6_NOP_EVENT 0x0000002EULL + +static u64 p6_pmu_raw_event(u64 hw_event) +{ +#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL +#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL +#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL +#define P6_EVNTSEL_INV_MASK 0x00800000ULL +#define P6_EVNTSEL_REG_MASK 0xFF000000ULL + +#define P6_EVNTSEL_MASK \ + (P6_EVNTSEL_EVENT_MASK | \ + P6_EVNTSEL_UNIT_MASK | \ + P6_EVNTSEL_EDGE_MASK | \ + P6_EVNTSEL_INV_MASK | \ + P6_EVNTSEL_REG_MASK) + + return hw_event & P6_EVNTSEL_MASK; +} + +static struct event_constraint p6_event_constraints[] = +{ + INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ + INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ + INTEL_EVENT_CONSTRAINT(0x11, 0x1), /* FP_ASSIST */ + INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ + INTEL_EVENT_CONSTRAINT(0x13, 0x2), /* DIV */ + INTEL_EVENT_CONSTRAINT(0x14, 0x1), /* CYCLES_DIV_BUSY */ + EVENT_CONSTRAINT_END +}; + +static void p6_pmu_disable_all(void) +{ + u64 val; + + /* p6 only has one enable register */ + rdmsrl(MSR_P6_EVNTSEL0, val); + val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsrl(MSR_P6_EVNTSEL0, val); +} + +static void p6_pmu_enable_all(void) +{ + unsigned long val; + + /* p6 only has one enable register */ + rdmsrl(MSR_P6_EVNTSEL0, val); + val |= ARCH_PERFMON_EVENTSEL0_ENABLE; + wrmsrl(MSR_P6_EVNTSEL0, val); +} + +static inline void +p6_pmu_disable_event(struct hw_perf_event *hwc, int idx) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + u64 val = P6_NOP_EVENT; + + if (cpuc->enabled) + val |= ARCH_PERFMON_EVENTSEL0_ENABLE; + + (void)checking_wrmsrl(hwc->config_base + idx, val); +} + +static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + u64 val; + + val = hwc->config; + if (cpuc->enabled) + val |= ARCH_PERFMON_EVENTSEL0_ENABLE; + + (void)checking_wrmsrl(hwc->config_base + idx, val); +} + +static __initconst struct x86_pmu p6_pmu = { + .name = "p6", + .handle_irq = x86_pmu_handle_irq, + .disable_all = p6_pmu_disable_all, + .enable_all = p6_pmu_enable_all, + .enable = p6_pmu_enable_event, + .disable = p6_pmu_disable_event, + .eventsel = MSR_P6_EVNTSEL0, + .perfctr = MSR_P6_PERFCTR0, + .event_map = p6_pmu_event_map, + .raw_event = p6_pmu_raw_event, + .max_events = ARRAY_SIZE(p6_perfmon_event_map), + .apic = 1, + .max_period = (1ULL << 31) - 1, + .version = 0, + .num_events = 2, + /* + * Events have 40 bits implemented. However they are designed such + * that bits [32-39] are sign extensions of bit 31. As such the + * effective width of a event for P6-like PMU is 32 bits only. + * + * See IA-32 Intel Architecture Software developer manual Vol 3B + */ + .event_bits = 32, + .event_mask = (1ULL << 32) - 1, + .get_event_constraints = x86_get_event_constraints, + .event_constraints = p6_event_constraints, +}; + +static __init int p6_pmu_init(void) +{ + switch (boot_cpu_data.x86_model) { + case 1: + case 3: /* Pentium Pro */ + case 5: + case 6: /* Pentium II */ + case 7: + case 8: + case 11: /* Pentium III */ + case 9: + case 13: + /* Pentium M */ + break; + default: + pr_cont("unsupported p6 CPU model %d ", + boot_cpu_data.x86_model); + return -ENODEV; + } + + x86_pmu = p6_pmu; + + return 0; +} + +#endif /* CONFIG_CPU_SUP_INTEL */ -- cgit v1.1 From 1dd2980d990068e20045b90c424518cc7f3657ff Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 26 Feb 2010 17:07:35 +0100 Subject: perf_event, amd: Fix spinlock initialization Avoid kernels from exploding on AMD machines when they have any lock debugging bits enabled. Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_amd.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 6d28e08..8f3dbfda 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -1,6 +1,6 @@ #ifdef CONFIG_CPU_SUP_AMD -static raw_spinlock_t amd_nb_lock; +static DEFINE_RAW_SPINLOCK(amd_nb_lock); static __initconst u64 amd_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] -- cgit v1.1 From 3d083407a16698de86b42aee0da2ffb280b5cb7e Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Sat, 27 Feb 2010 17:24:15 +0100 Subject: x86/hw-breakpoints: Remove the name field Remove the name field from the arch_hw_breakpoint. We never deal with target symbols in the arch level, neither do we need to ever store it. It's a legacy for the previous version of the x86 breakpoint backend. Let's remove it. Signed-off-by: Frederic Weisbecker Cc: K.Prasad Cc: Linus Torvalds --- arch/x86/include/asm/hw_breakpoint.h | 1 - arch/x86/kernel/hw_breakpoint.c | 7 ------- 2 files changed, 8 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/hw_breakpoint.h b/arch/x86/include/asm/hw_breakpoint.h index 0675a7c..2a1bd8f 100644 --- a/arch/x86/include/asm/hw_breakpoint.h +++ b/arch/x86/include/asm/hw_breakpoint.h @@ -10,7 +10,6 @@ * (display/resolving) */ struct arch_hw_breakpoint { - char *name; /* Contains name of the symbol to set bkpt */ unsigned long address; u8 len; u8 type; diff --git a/arch/x86/kernel/hw_breakpoint.c b/arch/x86/kernel/hw_breakpoint.c index dca2802..41e08df 100644 --- a/arch/x86/kernel/hw_breakpoint.c +++ b/arch/x86/kernel/hw_breakpoint.c @@ -344,13 +344,6 @@ int arch_validate_hwbkpt_settings(struct perf_event *bp, } /* - * For kernel-addresses, either the address or symbol name can be - * specified. - */ - if (info->name) - info->address = (unsigned long) - kallsyms_lookup_name(info->name); - /* * Check that the low-order bits of the address are appropriate * for the alignment implied by len. */ -- cgit v1.1 From 1d6040f17d12a65b9f7ab4cb9fd6d721206b79ec Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 25 Feb 2010 19:40:46 +0100 Subject: perf, x86: make IBS macros available in perf_event.h This patch moves code from oprofile to perf_event.h to make it also available for usage by perf. Signed-off-by: Robert Richter --- arch/x86/include/asm/perf_event.h | 10 ++++++++++ arch/x86/oprofile/op_model_amd.c | 11 ----------- 2 files changed, 10 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index befd172..4933ccd 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -117,6 +117,16 @@ union cpuid10_edx { */ #define X86_PMC_IDX_FIXED_BTS (X86_PMC_IDX_FIXED + 16) +/* IbsFetchCtl bits/masks */ +#define IBS_FETCH_RAND_EN (1ULL<<57) +#define IBS_FETCH_VAL (1ULL<<49) +#define IBS_FETCH_ENABLE (1ULL<<48) +#define IBS_FETCH_CNT_MASK 0xFFFF0000ULL + +/* IbsOpCtl bits */ +#define IBS_OP_CNT_CTL (1ULL<<19) +#define IBS_OP_VAL (1ULL<<18) +#define IBS_OP_ENABLE (1ULL<<17) #ifdef CONFIG_PERF_EVENTS extern void init_hw_perf_events(void); diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 6a58256..c671749 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -46,17 +46,6 @@ static unsigned long reset_value[NUM_VIRT_COUNTERS]; -/* IbsFetchCtl bits/masks */ -#define IBS_FETCH_RAND_EN (1ULL<<57) -#define IBS_FETCH_VAL (1ULL<<49) -#define IBS_FETCH_ENABLE (1ULL<<48) -#define IBS_FETCH_CNT_MASK 0xFFFF0000ULL - -/* IbsOpCtl bits */ -#define IBS_OP_CNT_CTL (1ULL<<19) -#define IBS_OP_VAL (1ULL<<18) -#define IBS_OP_ENABLE (1ULL<<17) - #define IBS_FETCH_SIZE 6 #define IBS_OP_SIZE 12 -- cgit v1.1 From a163b1099dc7016704043c7fc572ae42519f08f7 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Thu, 25 Feb 2010 19:43:07 +0100 Subject: perf, x86: add some IBS macros to perf_event.h Signed-off-by: Robert Richter --- arch/x86/include/asm/perf_event.h | 4 +++- arch/x86/oprofile/op_model_amd.c | 6 +++--- 2 files changed, 6 insertions(+), 4 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 4933ccd..c7f60e1 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -121,12 +121,14 @@ union cpuid10_edx { #define IBS_FETCH_RAND_EN (1ULL<<57) #define IBS_FETCH_VAL (1ULL<<49) #define IBS_FETCH_ENABLE (1ULL<<48) -#define IBS_FETCH_CNT_MASK 0xFFFF0000ULL +#define IBS_FETCH_CNT 0xFFFF0000ULL +#define IBS_FETCH_MAX_CNT 0x0000FFFFULL /* IbsOpCtl bits */ #define IBS_OP_CNT_CTL (1ULL<<19) #define IBS_OP_VAL (1ULL<<18) #define IBS_OP_ENABLE (1ULL<<17) +#define IBS_OP_MAX_CNT 0x0000FFFFULL #ifdef CONFIG_PERF_EVENTS extern void init_hw_perf_events(void); diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index c671749..8ddb9fa 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -279,7 +279,7 @@ op_amd_handle_ibs(struct pt_regs * const regs, oprofile_write_commit(&entry); /* reenable the IRQ */ - ctl &= ~(IBS_FETCH_VAL | IBS_FETCH_CNT_MASK); + ctl &= ~(IBS_FETCH_VAL | IBS_FETCH_CNT); ctl |= IBS_FETCH_ENABLE; wrmsrl(MSR_AMD64_IBSFETCHCTL, ctl); } @@ -319,7 +319,7 @@ static inline void op_amd_start_ibs(void) return; if (ibs_config.fetch_enabled) { - val = (ibs_config.max_cnt_fetch >> 4) & 0xFFFF; + val = (ibs_config.max_cnt_fetch >> 4) & IBS_FETCH_MAX_CNT; val |= ibs_config.rand_en ? IBS_FETCH_RAND_EN : 0; val |= IBS_FETCH_ENABLE; wrmsrl(MSR_AMD64_IBSFETCHCTL, val); @@ -341,7 +341,7 @@ static inline void op_amd_start_ibs(void) * avoid underflows. */ ibs_op_ctl = min(ibs_op_ctl + IBS_RANDOM_MAXCNT_OFFSET, - 0xFFFFULL); + IBS_OP_MAX_CNT); } if (ibs_caps & IBS_CAPS_OPCNT && ibs_config.dispatched_ops) ibs_op_ctl |= IBS_OP_CNT_CTL; -- cgit v1.1 From bb1165d6882f423f90fc7007a88c6c993b7c2ac4 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 1 Mar 2010 14:21:23 +0100 Subject: perf, x86: rename macro in ARCH_PERFMON_EVENTSEL_ENABLE For consistency reasons this patch renames ARCH_PERFMON_EVENTSEL0_ENABLE to ARCH_PERFMON_EVENTSEL_ENABLE. The following is performed: $ sed -i -e s/ARCH_PERFMON_EVENTSEL0_ENABLE/ARCH_PERFMON_EVENTSEL_ENABLE/g \ arch/x86/include/asm/perf_event.h arch/x86/kernel/cpu/perf_event.c \ arch/x86/kernel/cpu/perf_event_p6.c \ arch/x86/kernel/cpu/perfctr-watchdog.c \ arch/x86/oprofile/op_model_amd.c arch/x86/oprofile/op_model_ppro.c Signed-off-by: Robert Richter --- arch/x86/include/asm/perf_event.h | 2 +- arch/x86/kernel/cpu/perf_event.c | 8 ++++---- arch/x86/kernel/cpu/perf_event_p6.c | 8 ++++---- arch/x86/kernel/cpu/perfctr-watchdog.c | 2 +- arch/x86/oprofile/op_model_amd.c | 6 +++--- arch/x86/oprofile/op_model_ppro.c | 6 +++--- 6 files changed, 16 insertions(+), 16 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index c7f60e1..80e6936 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -18,7 +18,7 @@ #define MSR_ARCH_PERFMON_EVENTSEL0 0x186 #define MSR_ARCH_PERFMON_EVENTSEL1 0x187 -#define ARCH_PERFMON_EVENTSEL0_ENABLE (1 << 22) +#define ARCH_PERFMON_EVENTSEL_ENABLE (1 << 22) #define ARCH_PERFMON_EVENTSEL_ANY (1 << 21) #define ARCH_PERFMON_EVENTSEL_INT (1 << 20) #define ARCH_PERFMON_EVENTSEL_OS (1 << 17) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 641ccb9..6531b4b 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -553,9 +553,9 @@ static void x86_pmu_disable_all(void) if (!test_bit(idx, cpuc->active_mask)) continue; rdmsrl(x86_pmu.eventsel + idx, val); - if (!(val & ARCH_PERFMON_EVENTSEL0_ENABLE)) + if (!(val & ARCH_PERFMON_EVENTSEL_ENABLE)) continue; - val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; + val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; wrmsrl(x86_pmu.eventsel + idx, val); } } @@ -590,7 +590,7 @@ static void x86_pmu_enable_all(void) continue; val = event->hw.config; - val |= ARCH_PERFMON_EVENTSEL0_ENABLE; + val |= ARCH_PERFMON_EVENTSEL_ENABLE; wrmsrl(x86_pmu.eventsel + idx, val); } } @@ -853,7 +853,7 @@ void hw_perf_enable(void) static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc, int idx) { (void)checking_wrmsrl(hwc->config_base + idx, - hwc->config | ARCH_PERFMON_EVENTSEL0_ENABLE); + hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE); } static inline void x86_pmu_disable_event(struct hw_perf_event *hwc, int idx) diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index 1ca5ba0..a4e67b9 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c @@ -62,7 +62,7 @@ static void p6_pmu_disable_all(void) /* p6 only has one enable register */ rdmsrl(MSR_P6_EVNTSEL0, val); - val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; + val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; wrmsrl(MSR_P6_EVNTSEL0, val); } @@ -72,7 +72,7 @@ static void p6_pmu_enable_all(void) /* p6 only has one enable register */ rdmsrl(MSR_P6_EVNTSEL0, val); - val |= ARCH_PERFMON_EVENTSEL0_ENABLE; + val |= ARCH_PERFMON_EVENTSEL_ENABLE; wrmsrl(MSR_P6_EVNTSEL0, val); } @@ -83,7 +83,7 @@ p6_pmu_disable_event(struct hw_perf_event *hwc, int idx) u64 val = P6_NOP_EVENT; if (cpuc->enabled) - val |= ARCH_PERFMON_EVENTSEL0_ENABLE; + val |= ARCH_PERFMON_EVENTSEL_ENABLE; (void)checking_wrmsrl(hwc->config_base + idx, val); } @@ -95,7 +95,7 @@ static void p6_pmu_enable_event(struct hw_perf_event *hwc, int idx) val = hwc->config; if (cpuc->enabled) - val |= ARCH_PERFMON_EVENTSEL0_ENABLE; + val |= ARCH_PERFMON_EVENTSEL_ENABLE; (void)checking_wrmsrl(hwc->config_base + idx, val); } diff --git a/arch/x86/kernel/cpu/perfctr-watchdog.c b/arch/x86/kernel/cpu/perfctr-watchdog.c index 74f4e85..fb329e9 100644 --- a/arch/x86/kernel/cpu/perfctr-watchdog.c +++ b/arch/x86/kernel/cpu/perfctr-watchdog.c @@ -680,7 +680,7 @@ static int setup_intel_arch_watchdog(unsigned nmi_hz) cpu_nmi_set_wd_enabled(); apic_write(APIC_LVTPC, APIC_DM_NMI); - evntsel |= ARCH_PERFMON_EVENTSEL0_ENABLE; + evntsel |= ARCH_PERFMON_EVENTSEL_ENABLE; wrmsr(evntsel_msr, evntsel, 0); intel_arch_wd_ops.checkbit = 1ULL << (eax.split.bit_width - 1); return 1; diff --git a/arch/x86/oprofile/op_model_amd.c b/arch/x86/oprofile/op_model_amd.c index 8ddb9fa..090cbbe 100644 --- a/arch/x86/oprofile/op_model_amd.c +++ b/arch/x86/oprofile/op_model_amd.c @@ -171,7 +171,7 @@ static void op_amd_setup_ctrs(struct op_x86_model_spec const *model, continue; } rdmsrl(msrs->controls[i].addr, val); - if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) + if (val & ARCH_PERFMON_EVENTSEL_ENABLE) op_x86_warn_in_use(i); val &= model->reserved; wrmsrl(msrs->controls[i].addr, val); @@ -398,7 +398,7 @@ static void op_amd_start(struct op_msrs const * const msrs) if (!reset_value[op_x86_phys_to_virt(i)]) continue; rdmsrl(msrs->controls[i].addr, val); - val |= ARCH_PERFMON_EVENTSEL0_ENABLE; + val |= ARCH_PERFMON_EVENTSEL_ENABLE; wrmsrl(msrs->controls[i].addr, val); } @@ -418,7 +418,7 @@ static void op_amd_stop(struct op_msrs const * const msrs) if (!reset_value[op_x86_phys_to_virt(i)]) continue; rdmsrl(msrs->controls[i].addr, val); - val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; + val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; wrmsrl(msrs->controls[i].addr, val); } diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 5d1727b..2bf90fa 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -88,7 +88,7 @@ static void ppro_setup_ctrs(struct op_x86_model_spec const *model, continue; } rdmsrl(msrs->controls[i].addr, val); - if (val & ARCH_PERFMON_EVENTSEL0_ENABLE) + if (val & ARCH_PERFMON_EVENTSEL_ENABLE) op_x86_warn_in_use(i); val &= model->reserved; wrmsrl(msrs->controls[i].addr, val); @@ -166,7 +166,7 @@ static void ppro_start(struct op_msrs const * const msrs) for (i = 0; i < num_counters; ++i) { if (reset_value[i]) { rdmsrl(msrs->controls[i].addr, val); - val |= ARCH_PERFMON_EVENTSEL0_ENABLE; + val |= ARCH_PERFMON_EVENTSEL_ENABLE; wrmsrl(msrs->controls[i].addr, val); } } @@ -184,7 +184,7 @@ static void ppro_stop(struct op_msrs const * const msrs) if (!reset_value[i]) continue; rdmsrl(msrs->controls[i].addr, val); - val &= ~ARCH_PERFMON_EVENTSEL0_ENABLE; + val &= ~ARCH_PERFMON_EVENTSEL_ENABLE; wrmsrl(msrs->controls[i].addr, val); } } -- cgit v1.1 From 320ebf09cbb6d01954c9a060266aa8e0d27f4638 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 2 Mar 2010 12:35:37 +0100 Subject: perf, x86: Restrict the ANY flag The ANY flag can show SMT data of another task (like 'top'), so we want to disable it when system-wide profiling is disabled. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 6531b4b..aab2e1c 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -503,6 +503,9 @@ static int __hw_perf_event_init(struct perf_event *event) */ if (attr->type == PERF_TYPE_RAW) { hwc->config |= x86_pmu.raw_event(attr->config); + if ((hwc->config & ARCH_PERFMON_EVENTSEL_ANY) && + perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) + return -EACCES; return 0; } -- cgit v1.1 From b622d644c7d61a5cb95b74e7b143c263bed21f0a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 1 Feb 2010 15:36:30 +0100 Subject: perf_events, x86: Fixup fixed counter constraints Patch 1da53e0230 ("perf_events, x86: Improve x86 event scheduling") lost us one of the fixed purpose counters and then ed8777fc13 ("perf_events, x86: Fix event constraint masks") broke it even further. Widen the fixed event mask to event+umask and specify the full config for each of the 3 fixed purpose counters. Then let the init code fill out the placement for the GP regs based on the cpuid info. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event.h | 2 +- arch/x86/kernel/cpu/perf_event.c | 25 ++++++++++++++++++------- arch/x86/kernel/cpu/perf_event_intel.c | 31 +++++++++++++++++++++---------- 3 files changed, 40 insertions(+), 18 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 80e6936..db6109a 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -50,7 +50,7 @@ INTEL_ARCH_INV_MASK| \ INTEL_ARCH_EDGE_MASK|\ INTEL_ARCH_UNIT_MASK|\ - INTEL_ARCH_EVTSEL_MASK) + INTEL_ARCH_EVENT_MASK) #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index aab2e1c..bfc43fa 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -73,10 +73,10 @@ struct debug_store { struct event_constraint { union { unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; - u64 idxmsk64[1]; + u64 idxmsk64; }; - int code; - int cmask; + u64 code; + u64 cmask; int weight; }; @@ -103,7 +103,7 @@ struct cpu_hw_events { }; #define __EVENT_CONSTRAINT(c, n, m, w) {\ - { .idxmsk64[0] = (n) }, \ + { .idxmsk64 = (n) }, \ .code = (c), \ .cmask = (m), \ .weight = (w), \ @@ -116,7 +116,7 @@ struct cpu_hw_events { EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK) #define FIXED_EVENT_CONSTRAINT(c, n) \ - EVENT_CONSTRAINT(c, n, INTEL_ARCH_FIXED_MASK) + EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK) #define EVENT_CONSTRAINT_END \ EVENT_CONSTRAINT(0, 0, 0) @@ -615,8 +615,8 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) bitmap_zero(used_mask, X86_PMC_IDX_MAX); for (i = 0; i < n; i++) { - constraints[i] = - x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); + c = x86_pmu.get_event_constraints(cpuc, cpuc->event_list[i]); + constraints[i] = c; } /* @@ -1350,6 +1350,7 @@ static void __init pmu_check_apic(void) void __init init_hw_perf_events(void) { + struct event_constraint *c; int err; pr_info("Performance Events: "); @@ -1398,6 +1399,16 @@ void __init init_hw_perf_events(void) __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1, 0, x86_pmu.num_events); + if (x86_pmu.event_constraints) { + for_each_event_constraint(c, x86_pmu.event_constraints) { + if (c->cmask != INTEL_ARCH_FIXED_MASK) + continue; + + c->idxmsk64 |= (1ULL << x86_pmu.num_events) - 1; + c->weight += x86_pmu.num_events; + } + } + pr_info("... version: %d\n", x86_pmu.version); pr_info("... bit width: %d\n", x86_pmu.event_bits); pr_info("... generic registers: %d\n", x86_pmu.num_events); diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index cf6590c..4fbdfe5 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -1,7 +1,7 @@ #ifdef CONFIG_CPU_SUP_INTEL /* - * Intel PerfMon v3. Used on Core2 and later. + * Intel PerfMon, used on Core and later. */ static const u64 intel_perfmon_event_map[] = { @@ -27,8 +27,14 @@ static struct event_constraint intel_core_event_constraints[] = static struct event_constraint intel_core2_event_constraints[] = { - FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ - FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ + /* + * Core2 has Fixed Counter 2 listed as CPU_CLK_UNHALTED.REF and event + * 0x013c as CPU_CLK_UNHALTED.BUS and specifies there is a fixed + * ratio between these counters. + */ + /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ INTEL_EVENT_CONSTRAINT(0x10, 0x1), /* FP_COMP_OPS_EXE */ INTEL_EVENT_CONSTRAINT(0x11, 0x2), /* FP_ASSIST */ INTEL_EVENT_CONSTRAINT(0x12, 0x2), /* MUL */ @@ -37,14 +43,16 @@ static struct event_constraint intel_core2_event_constraints[] = INTEL_EVENT_CONSTRAINT(0x18, 0x1), /* IDLE_DURING_DIV */ INTEL_EVENT_CONSTRAINT(0x19, 0x2), /* DELAYED_BYPASS */ INTEL_EVENT_CONSTRAINT(0xa1, 0x1), /* RS_UOPS_DISPATCH_CYCLES */ + INTEL_EVENT_CONSTRAINT(0xc9, 0x1), /* ITLB_MISS_RETIRED (T30-9) */ INTEL_EVENT_CONSTRAINT(0xcb, 0x1), /* MEM_LOAD_RETIRED */ EVENT_CONSTRAINT_END }; static struct event_constraint intel_nehalem_event_constraints[] = { - FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ - FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ + /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ INTEL_EVENT_CONSTRAINT(0x40, 0x3), /* L1D_CACHE_LD */ INTEL_EVENT_CONSTRAINT(0x41, 0x3), /* L1D_CACHE_ST */ INTEL_EVENT_CONSTRAINT(0x42, 0x3), /* L1D_CACHE_LOCK */ @@ -58,8 +66,9 @@ static struct event_constraint intel_nehalem_event_constraints[] = static struct event_constraint intel_westmere_event_constraints[] = { - FIXED_EVENT_CONSTRAINT(0xc0, (0xf|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ - FIXED_EVENT_CONSTRAINT(0x3c, (0xf|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ + /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ INTEL_EVENT_CONSTRAINT(0x51, 0x3), /* L1D */ INTEL_EVENT_CONSTRAINT(0x60, 0x1), /* OFFCORE_REQUESTS_OUTSTANDING */ INTEL_EVENT_CONSTRAINT(0x63, 0x3), /* CACHE_LOCK_CYCLES */ @@ -68,8 +77,9 @@ static struct event_constraint intel_westmere_event_constraints[] = static struct event_constraint intel_gen_event_constraints[] = { - FIXED_EVENT_CONSTRAINT(0xc0, (0x3|(1ULL<<32))), /* INSTRUCTIONS_RETIRED */ - FIXED_EVENT_CONSTRAINT(0x3c, (0x3|(1ULL<<33))), /* UNHALTED_CORE_CYCLES */ + FIXED_EVENT_CONSTRAINT(0x00c0, 0), /* INST_RETIRED.ANY */ + FIXED_EVENT_CONSTRAINT(0x003c, 1), /* CPU_CLK_UNHALTED.CORE */ + /* FIXED_EVENT_CONSTRAINT(0x013c, 2), CPU_CLK_UNHALTED.REF */ EVENT_CONSTRAINT_END }; @@ -935,7 +945,7 @@ static __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_nehalem_event_constraints; pr_cont("Nehalem/Corei7 events, "); break; - case 28: + case 28: /* Atom */ memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, sizeof(hw_cache_event_ids)); @@ -951,6 +961,7 @@ static __init int intel_pmu_init(void) x86_pmu.event_constraints = intel_westmere_event_constraints; pr_cont("Westmere events, "); break; + default: /* * default constraints for v2 and up -- cgit v1.1