From ca037701a025334e724e5c61b3b1082940c8b981 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 2 Mar 2010 19:52:12 +0100 Subject: perf, x86: Add PEBS infrastructure This patch implements support for Intel Precise Event Based Sampling, which is an alternative counter mode in which the counter triggers a hardware assist to collect information on events. The hardware assist takes a trap like snapshot of a subset of the machine registers. This data is written to the Intel Debug-Store, which can be programmed with a data threshold at which to raise a PMI. With the PEBS hardware assist being trap like, the reported IP is always one instruction after the actual instruction that triggered the event. This implements a simple PEBS model that always takes a single PEBS event at a time. This is done so that the interaction with the rest of the system is as expected (freq adjust, period randomization, lbr, callchains, etc.). It adds an ABI element: perf_event_attr::precise, which indicates that we wish to use this (constrained, but precise) mode. Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: paulus@samba.org Cc: eranian@google.com Cc: robert.richter@amd.com Cc: fweisbec@gmail.com LKML-Reference: <20100304140100.392111285@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 223 +++++------- arch/x86/kernel/cpu/perf_event_intel.c | 150 ++------ arch/x86/kernel/cpu/perf_event_intel_ds.c | 557 ++++++++++++++++++++++++++++++ 3 files changed, 669 insertions(+), 261 deletions(-) create mode 100644 arch/x86/kernel/cpu/perf_event_intel_ds.c (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 1d665a0b..0c03d5c 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -31,45 +31,6 @@ static u64 perf_event_mask __read_mostly; -/* The maximal number of PEBS events: */ -#define MAX_PEBS_EVENTS 4 - -/* The size of a BTS record in bytes: */ -#define BTS_RECORD_SIZE 24 - -/* The size of a per-cpu BTS buffer in bytes: */ -#define BTS_BUFFER_SIZE (BTS_RECORD_SIZE * 2048) - -/* The BTS overflow threshold in bytes from the end of the buffer: */ -#define BTS_OVFL_TH (BTS_RECORD_SIZE * 128) - - -/* - * Bits in the debugctlmsr controlling branch tracing. - */ -#define X86_DEBUGCTL_TR (1 << 6) -#define X86_DEBUGCTL_BTS (1 << 7) -#define X86_DEBUGCTL_BTINT (1 << 8) -#define X86_DEBUGCTL_BTS_OFF_OS (1 << 9) -#define X86_DEBUGCTL_BTS_OFF_USR (1 << 10) - -/* - * A debug store configuration. - * - * We only support architectures that use 64bit fields. - */ -struct debug_store { - u64 bts_buffer_base; - u64 bts_index; - u64 bts_absolute_maximum; - u64 bts_interrupt_threshold; - u64 pebs_buffer_base; - u64 pebs_index; - u64 pebs_absolute_maximum; - u64 pebs_interrupt_threshold; - u64 pebs_event_reset[MAX_PEBS_EVENTS]; -}; - struct event_constraint { union { unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; @@ -88,17 +49,29 @@ struct amd_nb { }; struct cpu_hw_events { + /* + * Generic x86 PMC bits + */ struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; unsigned long interrupts; int enabled; - struct debug_store *ds; int n_events; int n_added; int assign[X86_PMC_IDX_MAX]; /* event to counter assignment */ u64 tags[X86_PMC_IDX_MAX]; struct perf_event *event_list[X86_PMC_IDX_MAX]; /* in enabled order */ + + /* + * Intel DebugStore bits + */ + struct debug_store *ds; + u64 pebs_enabled; + + /* + * AMD specific bits + */ struct amd_nb *amd_nb; }; @@ -112,12 +85,24 @@ struct cpu_hw_events { #define EVENT_CONSTRAINT(c, n, m) \ __EVENT_CONSTRAINT(c, n, m, HWEIGHT(n)) +/* + * Constraint on the Event code. + */ #define INTEL_EVENT_CONSTRAINT(c, n) \ EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK) +/* + * Constraint on the Event code + UMask + fixed-mask + */ #define FIXED_EVENT_CONSTRAINT(c, n) \ EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK) +/* + * Constraint on the Event code + UMask + */ +#define PEBS_EVENT_CONSTRAINT(c, n) \ + EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVENT_MASK) + #define EVENT_CONSTRAINT_END \ EVENT_CONSTRAINT(0, 0, 0) @@ -128,6 +113,9 @@ struct cpu_hw_events { * struct x86_pmu - generic x86 pmu */ struct x86_pmu { + /* + * Generic x86 PMC bits + */ const char *name; int version; int (*handle_irq)(struct pt_regs *); @@ -146,10 +134,6 @@ struct x86_pmu { u64 event_mask; int apic; u64 max_period; - u64 intel_ctrl; - void (*enable_bts)(u64 config); - void (*disable_bts)(void); - struct event_constraint * (*get_event_constraints)(struct cpu_hw_events *cpuc, struct perf_event *event); @@ -162,6 +146,19 @@ struct x86_pmu { void (*cpu_starting)(int cpu); void (*cpu_dying)(int cpu); void (*cpu_dead)(int cpu); + + /* + * Intel Arch Perfmon v2+ + */ + u64 intel_ctrl; + + /* + * Intel DebugStore bits + */ + int bts, pebs; + int pebs_record_size; + void (*drain_pebs)(struct pt_regs *regs); + struct event_constraint *pebs_constraints; }; static struct x86_pmu x86_pmu __read_mostly; @@ -293,110 +290,14 @@ static void release_pmc_hardware(void) #endif } -static inline bool bts_available(void) -{ - return x86_pmu.enable_bts != NULL; -} - -static void init_debug_store_on_cpu(int cpu) -{ - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; - - if (!ds) - return; - - wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, - (u32)((u64)(unsigned long)ds), - (u32)((u64)(unsigned long)ds >> 32)); -} - -static void fini_debug_store_on_cpu(int cpu) -{ - if (!per_cpu(cpu_hw_events, cpu).ds) - return; - - wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); -} - -static void release_bts_hardware(void) -{ - int cpu; - - if (!bts_available()) - return; - - get_online_cpus(); - - for_each_online_cpu(cpu) - fini_debug_store_on_cpu(cpu); - - for_each_possible_cpu(cpu) { - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; - - if (!ds) - continue; - - per_cpu(cpu_hw_events, cpu).ds = NULL; - - kfree((void *)(unsigned long)ds->bts_buffer_base); - kfree(ds); - } - - put_online_cpus(); -} - -static int reserve_bts_hardware(void) -{ - int cpu, err = 0; - - if (!bts_available()) - return 0; - - get_online_cpus(); - - for_each_possible_cpu(cpu) { - struct debug_store *ds; - void *buffer; - - err = -ENOMEM; - buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); - if (unlikely(!buffer)) - break; - - ds = kzalloc(sizeof(*ds), GFP_KERNEL); - if (unlikely(!ds)) { - kfree(buffer); - break; - } - - ds->bts_buffer_base = (u64)(unsigned long)buffer; - ds->bts_index = ds->bts_buffer_base; - ds->bts_absolute_maximum = - ds->bts_buffer_base + BTS_BUFFER_SIZE; - ds->bts_interrupt_threshold = - ds->bts_absolute_maximum - BTS_OVFL_TH; - - per_cpu(cpu_hw_events, cpu).ds = ds; - err = 0; - } - - if (err) - release_bts_hardware(); - else { - for_each_online_cpu(cpu) - init_debug_store_on_cpu(cpu); - } - - put_online_cpus(); - - return err; -} +static int reserve_ds_buffers(void); +static void release_ds_buffers(void); static void hw_perf_event_destroy(struct perf_event *event) { if (atomic_dec_and_mutex_lock(&active_events, &pmc_reserve_mutex)) { release_pmc_hardware(); - release_bts_hardware(); + release_ds_buffers(); mutex_unlock(&pmc_reserve_mutex); } } @@ -459,7 +360,7 @@ static int __hw_perf_event_init(struct perf_event *event) if (!reserve_pmc_hardware()) err = -EBUSY; else - err = reserve_bts_hardware(); + err = reserve_ds_buffers(); } if (!err) atomic_inc(&active_events); @@ -537,7 +438,7 @@ static int __hw_perf_event_init(struct perf_event *event) if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && (hwc->sample_period == 1)) { /* BTS is not supported by this architecture. */ - if (!bts_available()) + if (!x86_pmu.bts) return -EOPNOTSUPP; /* BTS is currently only allowed for user-mode. */ @@ -995,6 +896,7 @@ static void x86_pmu_unthrottle(struct perf_event *event) void perf_event_print_debug(void) { u64 ctrl, status, overflow, pmc_ctrl, pmc_count, prev_left, fixed; + u64 pebs; struct cpu_hw_events *cpuc; unsigned long flags; int cpu, idx; @@ -1012,12 +914,14 @@ void perf_event_print_debug(void) rdmsrl(MSR_CORE_PERF_GLOBAL_STATUS, status); rdmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, overflow); rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR_CTRL, fixed); + rdmsrl(MSR_IA32_PEBS_ENABLE, pebs); pr_info("\n"); pr_info("CPU#%d: ctrl: %016llx\n", cpu, ctrl); pr_info("CPU#%d: status: %016llx\n", cpu, status); pr_info("CPU#%d: overflow: %016llx\n", cpu, overflow); pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); + pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs); } pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); @@ -1333,6 +1237,7 @@ undo: #include "perf_event_amd.c" #include "perf_event_p6.c" +#include "perf_event_intel_ds.c" #include "perf_event_intel.c" static int __cpuinit @@ -1465,6 +1370,32 @@ static const struct pmu pmu = { }; /* + * validate that we can schedule this event + */ +static int validate_event(struct perf_event *event) +{ + struct cpu_hw_events *fake_cpuc; + struct event_constraint *c; + int ret = 0; + + fake_cpuc = kmalloc(sizeof(*fake_cpuc), GFP_KERNEL | __GFP_ZERO); + if (!fake_cpuc) + return -ENOMEM; + + c = x86_pmu.get_event_constraints(fake_cpuc, event); + + if (!c || !c->weight) + ret = -ENOSPC; + + if (x86_pmu.put_event_constraints) + x86_pmu.put_event_constraints(fake_cpuc, event); + + kfree(fake_cpuc); + + return ret; +} + +/* * validate a single event group * * validation include: @@ -1529,6 +1460,8 @@ const struct pmu *hw_perf_event_init(struct perf_event *event) if (event->group_leader != event) err = validate_group(event); + else + err = validate_event(event); event->pmu = tmp; } diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 84bfde6..1144641 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -470,42 +470,6 @@ static u64 intel_pmu_raw_event(u64 hw_event) return hw_event & CORE_EVNTSEL_MASK; } -static void intel_pmu_enable_bts(u64 config) -{ - unsigned long debugctlmsr; - - debugctlmsr = get_debugctlmsr(); - - debugctlmsr |= X86_DEBUGCTL_TR; - debugctlmsr |= X86_DEBUGCTL_BTS; - debugctlmsr |= X86_DEBUGCTL_BTINT; - - if (!(config & ARCH_PERFMON_EVENTSEL_OS)) - debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; - - if (!(config & ARCH_PERFMON_EVENTSEL_USR)) - debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; - - update_debugctlmsr(debugctlmsr); -} - -static void intel_pmu_disable_bts(void) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - unsigned long debugctlmsr; - - if (!cpuc->ds) - return; - - debugctlmsr = get_debugctlmsr(); - - debugctlmsr &= - ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | - X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); - - update_debugctlmsr(debugctlmsr); -} - static void intel_pmu_disable_all(void) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -514,6 +478,8 @@ static void intel_pmu_disable_all(void) if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) intel_pmu_disable_bts(); + + intel_pmu_pebs_disable_all(); } static void intel_pmu_enable_all(void) @@ -531,6 +497,8 @@ static void intel_pmu_enable_all(void) intel_pmu_enable_bts(event->hw.config); } + + intel_pmu_pebs_enable_all(); } static inline u64 intel_pmu_get_status(void) @@ -547,8 +515,7 @@ static inline void intel_pmu_ack_status(u64 ack) wrmsrl(MSR_CORE_PERF_GLOBAL_OVF_CTRL, ack); } -static inline void -intel_pmu_disable_fixed(struct hw_perf_event *hwc) +static void intel_pmu_disable_fixed(struct hw_perf_event *hwc) { int idx = hwc->idx - X86_PMC_IDX_FIXED; u64 ctrl_val, mask; @@ -560,68 +527,7 @@ intel_pmu_disable_fixed(struct hw_perf_event *hwc) (void)checking_wrmsrl(hwc->config_base, ctrl_val); } -static void intel_pmu_drain_bts_buffer(void) -{ - struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); - struct debug_store *ds = cpuc->ds; - struct bts_record { - u64 from; - u64 to; - u64 flags; - }; - struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; - struct bts_record *at, *top; - struct perf_output_handle handle; - struct perf_event_header header; - struct perf_sample_data data; - struct pt_regs regs; - - if (!event) - return; - - if (!ds) - return; - - at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; - top = (struct bts_record *)(unsigned long)ds->bts_index; - - if (top <= at) - return; - - ds->bts_index = ds->bts_buffer_base; - - perf_sample_data_init(&data, 0); - - data.period = event->hw.last_period; - regs.ip = 0; - - /* - * Prepare a generic sample, i.e. fill in the invariant fields. - * We will overwrite the from and to address before we output - * the sample. - */ - perf_prepare_sample(&header, &data, event, ®s); - - if (perf_output_begin(&handle, event, - header.size * (top - at), 1, 1)) - return; - - for (; at < top; at++) { - data.ip = at->from; - data.addr = at->to; - - perf_output_sample(&handle, &header, &data, event); - } - - perf_output_end(&handle); - - /* There's new data available. */ - event->hw.interrupts++; - event->pending_kill = POLL_IN; -} - -static inline void -intel_pmu_disable_event(struct perf_event *event) +static void intel_pmu_disable_event(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; @@ -637,10 +543,12 @@ intel_pmu_disable_event(struct perf_event *event) } x86_pmu_disable_event(event); + + if (unlikely(event->attr.precise)) + intel_pmu_pebs_disable(hwc); } -static inline void -intel_pmu_enable_fixed(struct hw_perf_event *hwc) +static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) { int idx = hwc->idx - X86_PMC_IDX_FIXED; u64 ctrl_val, bits, mask; @@ -689,6 +597,9 @@ static void intel_pmu_enable_event(struct perf_event *event) return; } + if (unlikely(event->attr.precise)) + intel_pmu_pebs_enable(hwc); + __x86_pmu_enable_event(hwc); } @@ -762,6 +673,13 @@ again: inc_irq_stat(apic_perf_irqs); ack = status; + + /* + * PEBS overflow sets bit 62 in the global status register + */ + if (__test_and_clear_bit(62, (unsigned long *)&status)) + x86_pmu.drain_pebs(regs); + for_each_set_bit(bit, (unsigned long *)&status, X86_PMC_IDX_MAX) { struct perf_event *event = cpuc->events[bit]; @@ -791,22 +709,18 @@ done: return 1; } -static struct event_constraint bts_constraint = - EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); - static struct event_constraint * -intel_special_constraints(struct perf_event *event) +intel_bts_constraints(struct perf_event *event) { - unsigned int hw_event; - - hw_event = event->hw.config & INTEL_ARCH_EVENT_MASK; + struct hw_perf_event *hwc = &event->hw; + unsigned int hw_event, bts_event; - if (unlikely((hw_event == - x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS)) && - (event->hw.sample_period == 1))) { + hw_event = hwc->config & INTEL_ARCH_EVENT_MASK; + bts_event = x86_pmu.event_map(PERF_COUNT_HW_BRANCH_INSTRUCTIONS); + if (unlikely(hw_event == bts_event && hwc->sample_period == 1)) return &bts_constraint; - } + return NULL; } @@ -815,7 +729,11 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event { struct event_constraint *c; - c = intel_special_constraints(event); + c = intel_bts_constraints(event); + if (c) + return c; + + c = intel_pebs_constraints(event); if (c) return c; @@ -864,8 +782,6 @@ static __initconst struct x86_pmu intel_pmu = { * the generic event period: */ .max_period = (1ULL << 31) - 1, - .enable_bts = intel_pmu_enable_bts, - .disable_bts = intel_pmu_disable_bts, .get_event_constraints = intel_get_event_constraints, .cpu_starting = init_debug_store_on_cpu, @@ -915,6 +831,8 @@ static __init int intel_pmu_init(void) if (version > 1) x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); + intel_ds_init(); + /* * Install the hw-cache-events table: */ diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c new file mode 100644 index 0000000..0d994ef --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -0,0 +1,557 @@ +#ifdef CONFIG_CPU_SUP_INTEL + +/* The maximal number of PEBS events: */ +#define MAX_PEBS_EVENTS 4 + +/* The size of a BTS record in bytes: */ +#define BTS_RECORD_SIZE 24 + +#define BTS_BUFFER_SIZE (PAGE_SIZE << 4) +#define PEBS_BUFFER_SIZE PAGE_SIZE + +/* + * pebs_record_32 for p4 and core not supported + +struct pebs_record_32 { + u32 flags, ip; + u32 ax, bc, cx, dx; + u32 si, di, bp, sp; +}; + + */ + +struct pebs_record_core { + u64 flags, ip; + u64 ax, bx, cx, dx; + u64 si, di, bp, sp; + u64 r8, r9, r10, r11; + u64 r12, r13, r14, r15; +}; + +struct pebs_record_nhm { + u64 flags, ip; + u64 ax, bx, cx, dx; + u64 si, di, bp, sp; + u64 r8, r9, r10, r11; + u64 r12, r13, r14, r15; + u64 status, dla, dse, lat; +}; + +/* + * Bits in the debugctlmsr controlling branch tracing. + */ +#define X86_DEBUGCTL_TR (1 << 6) +#define X86_DEBUGCTL_BTS (1 << 7) +#define X86_DEBUGCTL_BTINT (1 << 8) +#define X86_DEBUGCTL_BTS_OFF_OS (1 << 9) +#define X86_DEBUGCTL_BTS_OFF_USR (1 << 10) + +/* + * A debug store configuration. + * + * We only support architectures that use 64bit fields. + */ +struct debug_store { + u64 bts_buffer_base; + u64 bts_index; + u64 bts_absolute_maximum; + u64 bts_interrupt_threshold; + u64 pebs_buffer_base; + u64 pebs_index; + u64 pebs_absolute_maximum; + u64 pebs_interrupt_threshold; + u64 pebs_event_reset[MAX_PEBS_EVENTS]; +}; + +static void init_debug_store_on_cpu(int cpu) +{ + struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + + if (!ds) + return; + + wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, + (u32)((u64)(unsigned long)ds), + (u32)((u64)(unsigned long)ds >> 32)); +} + +static void fini_debug_store_on_cpu(int cpu) +{ + if (!per_cpu(cpu_hw_events, cpu).ds) + return; + + wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); +} + +static void release_ds_buffers(void) +{ + int cpu; + + if (!x86_pmu.bts && !x86_pmu.pebs) + return; + + get_online_cpus(); + + for_each_online_cpu(cpu) + fini_debug_store_on_cpu(cpu); + + for_each_possible_cpu(cpu) { + struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + + if (!ds) + continue; + + per_cpu(cpu_hw_events, cpu).ds = NULL; + + kfree((void *)(unsigned long)ds->pebs_buffer_base); + kfree((void *)(unsigned long)ds->bts_buffer_base); + kfree(ds); + } + + put_online_cpus(); +} + +static int reserve_ds_buffers(void) +{ + int cpu, err = 0; + + if (!x86_pmu.bts && !x86_pmu.pebs) + return 0; + + get_online_cpus(); + + for_each_possible_cpu(cpu) { + struct debug_store *ds; + void *buffer; + int max, thresh; + + err = -ENOMEM; + ds = kzalloc(sizeof(*ds), GFP_KERNEL); + if (unlikely(!ds)) { + kfree(buffer); + break; + } + per_cpu(cpu_hw_events, cpu).ds = ds; + + if (x86_pmu.bts) { + buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); + if (unlikely(!buffer)) + break; + + max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; + thresh = max / 16; + + ds->bts_buffer_base = (u64)(unsigned long)buffer; + ds->bts_index = ds->bts_buffer_base; + ds->bts_absolute_maximum = ds->bts_buffer_base + + max * BTS_RECORD_SIZE; + ds->bts_interrupt_threshold = ds->bts_absolute_maximum - + thresh * BTS_RECORD_SIZE; + } + + if (x86_pmu.pebs) { + buffer = kzalloc(PEBS_BUFFER_SIZE, GFP_KERNEL); + if (unlikely(!buffer)) + break; + + max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size; + + ds->pebs_buffer_base = (u64)(unsigned long)buffer; + ds->pebs_index = ds->pebs_buffer_base; + ds->pebs_absolute_maximum = ds->pebs_buffer_base + + max * x86_pmu.pebs_record_size; + /* + * Always use single record PEBS + */ + ds->pebs_interrupt_threshold = ds->pebs_buffer_base + + x86_pmu.pebs_record_size; + } + + err = 0; + } + + if (err) + release_ds_buffers(); + else { + for_each_online_cpu(cpu) + init_debug_store_on_cpu(cpu); + } + + put_online_cpus(); + + return err; +} + +/* + * BTS + */ + +static struct event_constraint bts_constraint = + EVENT_CONSTRAINT(0, 1ULL << X86_PMC_IDX_FIXED_BTS, 0); + +static void intel_pmu_enable_bts(u64 config) +{ + unsigned long debugctlmsr; + + debugctlmsr = get_debugctlmsr(); + + debugctlmsr |= X86_DEBUGCTL_TR; + debugctlmsr |= X86_DEBUGCTL_BTS; + debugctlmsr |= X86_DEBUGCTL_BTINT; + + if (!(config & ARCH_PERFMON_EVENTSEL_OS)) + debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; + + if (!(config & ARCH_PERFMON_EVENTSEL_USR)) + debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; + + update_debugctlmsr(debugctlmsr); +} + +static void intel_pmu_disable_bts(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + unsigned long debugctlmsr; + + if (!cpuc->ds) + return; + + debugctlmsr = get_debugctlmsr(); + + debugctlmsr &= + ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | + X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); + + update_debugctlmsr(debugctlmsr); +} + +static void intel_pmu_drain_bts_buffer(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct debug_store *ds = cpuc->ds; + struct bts_record { + u64 from; + u64 to; + u64 flags; + }; + struct perf_event *event = cpuc->events[X86_PMC_IDX_FIXED_BTS]; + struct bts_record *at, *top; + struct perf_output_handle handle; + struct perf_event_header header; + struct perf_sample_data data; + struct pt_regs regs; + + if (!event) + return; + + if (!ds) + return; + + at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; + top = (struct bts_record *)(unsigned long)ds->bts_index; + + if (top <= at) + return; + + ds->bts_index = ds->bts_buffer_base; + + perf_sample_data_init(&data, 0); + data.period = event->hw.last_period; + regs.ip = 0; + + /* + * Prepare a generic sample, i.e. fill in the invariant fields. + * We will overwrite the from and to address before we output + * the sample. + */ + perf_prepare_sample(&header, &data, event, ®s); + + if (perf_output_begin(&handle, event, header.size * (top - at), 1, 1)) + return; + + for (; at < top; at++) { + data.ip = at->from; + data.addr = at->to; + + perf_output_sample(&handle, &header, &data, event); + } + + perf_output_end(&handle); + + /* There's new data available. */ + event->hw.interrupts++; + event->pending_kill = POLL_IN; +} + +/* + * PEBS + */ + +static struct event_constraint intel_core_pebs_events[] = { + PEBS_EVENT_CONSTRAINT(0x00c0, 0x1), /* INSTR_RETIRED.ANY */ + PEBS_EVENT_CONSTRAINT(0xfec1, 0x1), /* X87_OPS_RETIRED.ANY */ + PEBS_EVENT_CONSTRAINT(0x00c5, 0x1), /* BR_INST_RETIRED.MISPRED */ + PEBS_EVENT_CONSTRAINT(0x1fc7, 0x1), /* SIMD_INST_RETURED.ANY */ + PEBS_EVENT_CONSTRAINT(0x01cb, 0x1), /* MEM_LOAD_RETIRED.L1D_MISS */ + PEBS_EVENT_CONSTRAINT(0x02cb, 0x1), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */ + PEBS_EVENT_CONSTRAINT(0x04cb, 0x1), /* MEM_LOAD_RETIRED.L2_MISS */ + PEBS_EVENT_CONSTRAINT(0x08cb, 0x1), /* MEM_LOAD_RETIRED.L2_LINE_MISS */ + PEBS_EVENT_CONSTRAINT(0x10cb, 0x1), /* MEM_LOAD_RETIRED.DTLB_MISS */ + EVENT_CONSTRAINT_END +}; + +static struct event_constraint intel_nehalem_pebs_events[] = { + PEBS_EVENT_CONSTRAINT(0x00c0, 0xf), /* INSTR_RETIRED.ANY */ + PEBS_EVENT_CONSTRAINT(0xfec1, 0xf), /* X87_OPS_RETIRED.ANY */ + PEBS_EVENT_CONSTRAINT(0x00c5, 0xf), /* BR_INST_RETIRED.MISPRED */ + PEBS_EVENT_CONSTRAINT(0x1fc7, 0xf), /* SIMD_INST_RETURED.ANY */ + PEBS_EVENT_CONSTRAINT(0x01cb, 0xf), /* MEM_LOAD_RETIRED.L1D_MISS */ + PEBS_EVENT_CONSTRAINT(0x02cb, 0xf), /* MEM_LOAD_RETIRED.L1D_LINE_MISS */ + PEBS_EVENT_CONSTRAINT(0x04cb, 0xf), /* MEM_LOAD_RETIRED.L2_MISS */ + PEBS_EVENT_CONSTRAINT(0x08cb, 0xf), /* MEM_LOAD_RETIRED.L2_LINE_MISS */ + PEBS_EVENT_CONSTRAINT(0x10cb, 0xf), /* MEM_LOAD_RETIRED.DTLB_MISS */ + EVENT_CONSTRAINT_END +}; + +static struct event_constraint * +intel_pebs_constraints(struct perf_event *event) +{ + struct event_constraint *c; + + if (!event->attr.precise) + return NULL; + + if (x86_pmu.pebs_constraints) { + for_each_event_constraint(c, x86_pmu.pebs_constraints) { + if ((event->hw.config & c->cmask) == c->code) + return c; + } + } + + return &emptyconstraint; +} + +static void intel_pmu_pebs_enable(struct hw_perf_event *hwc) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + u64 val = cpuc->pebs_enabled; + + hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; + + val |= 1ULL << hwc->idx; + wrmsrl(MSR_IA32_PEBS_ENABLE, val); +} + +static void intel_pmu_pebs_disable(struct hw_perf_event *hwc) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + u64 val = cpuc->pebs_enabled; + + val &= ~(1ULL << hwc->idx); + wrmsrl(MSR_IA32_PEBS_ENABLE, val); + + hwc->config |= ARCH_PERFMON_EVENTSEL_INT; +} + +static void intel_pmu_pebs_enable_all(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (cpuc->pebs_enabled) + wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); +} + +static void intel_pmu_pebs_disable_all(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (cpuc->pebs_enabled) + wrmsrl(MSR_IA32_PEBS_ENABLE, 0); +} + +static int intel_pmu_save_and_restart(struct perf_event *event); +static void intel_pmu_disable_event(struct perf_event *event); + +static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct debug_store *ds = cpuc->ds; + struct perf_event *event = cpuc->events[0]; /* PMC0 only */ + struct pebs_record_core *at, *top; + struct perf_sample_data data; + struct pt_regs regs; + int n; + + if (!event || !ds || !x86_pmu.pebs) + return; + + intel_pmu_pebs_disable_all(); + + at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base; + top = (struct pebs_record_core *)(unsigned long)ds->pebs_index; + + if (top <= at) + goto out; + + ds->pebs_index = ds->pebs_buffer_base; + + if (!intel_pmu_save_and_restart(event)) + goto out; + + perf_sample_data_init(&data, 0); + data.period = event->hw.last_period; + + n = top - at; + + /* + * Should not happen, we program the threshold at 1 and do not + * set a reset value. + */ + WARN_ON_ONCE(n > 1); + + /* + * We use the interrupt regs as a base because the PEBS record + * does not contain a full regs set, specifically it seems to + * lack segment descriptors, which get used by things like + * user_mode(). + * + * In the simple case fix up only the IP and BP,SP regs, for + * PERF_SAMPLE_IP and PERF_SAMPLE_CALLCHAIN to function properly. + * A possible PERF_SAMPLE_REGS will have to transfer all regs. + */ + regs = *iregs; + regs.ip = at->ip; + regs.bp = at->bp; + regs.sp = at->sp; + + if (perf_event_overflow(event, 1, &data, ®s)) + intel_pmu_disable_event(event); + +out: + intel_pmu_pebs_enable_all(); +} + +static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct debug_store *ds = cpuc->ds; + struct pebs_record_nhm *at, *top; + struct perf_sample_data data; + struct perf_event *event = NULL; + struct pt_regs regs; + int bit, n; + + if (!ds || !x86_pmu.pebs) + return; + + intel_pmu_pebs_disable_all(); + + at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; + top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; + + if (top <= at) + goto out; + + ds->pebs_index = ds->pebs_buffer_base; + + n = top - at; + + /* + * Should not happen, we program the threshold at 1 and do not + * set a reset value. + */ + WARN_ON_ONCE(n > MAX_PEBS_EVENTS); + + for ( ; at < top; at++) { + for_each_bit(bit, (unsigned long *)&at->status, MAX_PEBS_EVENTS) { + if (!cpuc->events[bit]->attr.precise) + continue; + + event = cpuc->events[bit]; + } + + if (!event) + continue; + + if (!intel_pmu_save_and_restart(event)) + continue; + + perf_sample_data_init(&data, 0); + data.period = event->hw.last_period; + + /* + * See the comment in intel_pmu_drain_pebs_core() + */ + regs = *iregs; + regs.ip = at->ip; + regs.bp = at->bp; + regs.sp = at->sp; + + if (perf_event_overflow(event, 1, &data, ®s)) + intel_pmu_disable_event(event); + } +out: + intel_pmu_pebs_enable_all(); +} + +/* + * BTS, PEBS probe and setup + */ + +static void intel_ds_init(void) +{ + /* + * No support for 32bit formats + */ + if (!boot_cpu_has(X86_FEATURE_DTES64)) + return; + + x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); + x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); + if (x86_pmu.pebs) { + int format = 0; + + if (x86_pmu.version > 1) { + u64 capabilities; + /* + * v2+ has a PEBS format field + */ + rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities); + format = (capabilities >> 8) & 0xf; + } + + switch (format) { + case 0: + printk(KERN_CONT "PEBS v0, "); + x86_pmu.pebs_record_size = sizeof(struct pebs_record_core); + x86_pmu.drain_pebs = intel_pmu_drain_pebs_core; + x86_pmu.pebs_constraints = intel_core_pebs_events; + break; + + case 1: + printk(KERN_CONT "PEBS v1, "); + x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm); + x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; + x86_pmu.pebs_constraints = intel_nehalem_pebs_events; + break; + + default: + printk(KERN_CONT "PEBS unknown format: %d, ", format); + x86_pmu.pebs = 0; + break; + } + } +} + +#else /* CONFIG_CPU_SUP_INTEL */ + +static int reseve_ds_buffers(void) +{ + return 0; +} + +static void release_ds_buffers(void) +{ +} + +#endif /* CONFIG_CPU_SUP_INTEL */ -- cgit v1.1 From caff2befffe899e63df5cc760b7ed01cfd902685 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 3 Mar 2010 12:02:30 +0100 Subject: perf, x86: Implement simple LBR support Implement simple suport Intel Last-Branch-Record, it supports all hardware that implements FREEZE_LBRS_ON_PMI, but does not (yet) implement the LBR config register. The Intel LBR is a FIFO of From,To addresses describing the last few branches the hardware took. This patch does not add perf interface to the LBR, but merely provides an interface for internal use. Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: paulus@samba.org Cc: eranian@google.com Cc: robert.richter@amd.com Cc: fweisbec@gmail.com LKML-Reference: <20100304140100.544191154@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 18 +++ arch/x86/kernel/cpu/perf_event_intel.c | 13 ++ arch/x86/kernel/cpu/perf_event_intel_lbr.c | 228 +++++++++++++++++++++++++++++ 3 files changed, 259 insertions(+) create mode 100644 arch/x86/kernel/cpu/perf_event_intel_lbr.c (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 0c03d5c..1badff6 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -48,6 +48,8 @@ struct amd_nb { struct event_constraint event_constraints[X86_PMC_IDX_MAX]; }; +#define MAX_LBR_ENTRIES 16 + struct cpu_hw_events { /* * Generic x86 PMC bits @@ -70,6 +72,14 @@ struct cpu_hw_events { u64 pebs_enabled; /* + * Intel LBR bits + */ + int lbr_users; + void *lbr_context; + struct perf_branch_stack lbr_stack; + struct perf_branch_entry lbr_entries[MAX_LBR_ENTRIES]; + + /* * AMD specific bits */ struct amd_nb *amd_nb; @@ -159,6 +169,13 @@ struct x86_pmu { int pebs_record_size; void (*drain_pebs)(struct pt_regs *regs); struct event_constraint *pebs_constraints; + + /* + * Intel LBR + */ + unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ + int lbr_nr; /* hardware stack size */ + int lbr_format; /* hardware format */ }; static struct x86_pmu x86_pmu __read_mostly; @@ -1237,6 +1254,7 @@ undo: #include "perf_event_amd.c" #include "perf_event_p6.c" +#include "perf_event_intel_lbr.c" #include "perf_event_intel_ds.c" #include "perf_event_intel.c" diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 1144641..44f6ed4 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -480,6 +480,7 @@ static void intel_pmu_disable_all(void) intel_pmu_disable_bts(); intel_pmu_pebs_disable_all(); + intel_pmu_lbr_disable_all(); } static void intel_pmu_enable_all(void) @@ -499,6 +500,7 @@ static void intel_pmu_enable_all(void) } intel_pmu_pebs_enable_all(); + intel_pmu_lbr_enable_all(); } static inline u64 intel_pmu_get_status(void) @@ -674,6 +676,8 @@ again: inc_irq_stat(apic_perf_irqs); ack = status; + intel_pmu_lbr_read(); + /* * PEBS overflow sets bit 62 in the global status register */ @@ -848,6 +852,8 @@ static __init int intel_pmu_init(void) memcpy(hw_cache_event_ids, core2_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + intel_pmu_lbr_init_core(); + x86_pmu.event_constraints = intel_core2_event_constraints; pr_cont("Core2 events, "); break; @@ -857,13 +863,18 @@ static __init int intel_pmu_init(void) memcpy(hw_cache_event_ids, nehalem_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + intel_pmu_lbr_init_nhm(); + x86_pmu.event_constraints = intel_nehalem_event_constraints; pr_cont("Nehalem/Corei7 events, "); break; + case 28: /* Atom */ memcpy(hw_cache_event_ids, atom_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + intel_pmu_lbr_init_atom(); + x86_pmu.event_constraints = intel_gen_event_constraints; pr_cont("Atom events, "); break; @@ -873,6 +884,8 @@ static __init int intel_pmu_init(void) memcpy(hw_cache_event_ids, westmere_hw_cache_event_ids, sizeof(hw_cache_event_ids)); + intel_pmu_lbr_init_nhm(); + x86_pmu.event_constraints = intel_westmere_event_constraints; pr_cont("Westmere events, "); break; diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c new file mode 100644 index 0000000..ea3e99e --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -0,0 +1,228 @@ +#ifdef CONFIG_CPU_SUP_INTEL + +enum { + LBR_FORMAT_32 = 0x00, + LBR_FORMAT_LIP = 0x01, + LBR_FORMAT_EIP = 0x02, + LBR_FORMAT_EIP_FLAGS = 0x03, +}; + +/* + * We only support LBR implementations that have FREEZE_LBRS_ON_PMI + * otherwise it becomes near impossible to get a reliable stack. + */ + +#define X86_DEBUGCTL_LBR (1 << 0) +#define X86_DEBUGCTL_FREEZE_LBRS_ON_PMI (1 << 11) + +static void __intel_pmu_lbr_enable(void) +{ + u64 debugctl; + + rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + debugctl |= (X86_DEBUGCTL_LBR | X86_DEBUGCTL_FREEZE_LBRS_ON_PMI); + wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); +} + +static void __intel_pmu_lbr_disable(void) +{ + u64 debugctl; + + rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); + debugctl &= ~(X86_DEBUGCTL_LBR | X86_DEBUGCTL_FREEZE_LBRS_ON_PMI); + wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); +} + +static void intel_pmu_lbr_reset_32(void) +{ + int i; + + for (i = 0; i < x86_pmu.lbr_nr; i++) + wrmsrl(x86_pmu.lbr_from + i, 0); +} + +static void intel_pmu_lbr_reset_64(void) +{ + int i; + + for (i = 0; i < x86_pmu.lbr_nr; i++) { + wrmsrl(x86_pmu.lbr_from + i, 0); + wrmsrl(x86_pmu.lbr_to + i, 0); + } +} + +static void intel_pmu_lbr_reset(void) +{ + if (x86_pmu.lbr_format == LBR_FORMAT_32) + intel_pmu_lbr_reset_32(); + else + intel_pmu_lbr_reset_64(); +} + +static void intel_pmu_lbr_enable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (!x86_pmu.lbr_nr) + return; + + WARN_ON(cpuc->enabled); + + /* + * Reset the LBR stack if this is the first LBR user or + * we changed task context so as to avoid data leaks. + */ + + if (!cpuc->lbr_users || + (event->ctx->task && cpuc->lbr_context != event->ctx)) { + intel_pmu_lbr_reset(); + cpuc->lbr_context = event->ctx; + } + + cpuc->lbr_users++; +} + +static void intel_pmu_lbr_disable(struct perf_event *event) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (!x86_pmu.lbr_nr) + return; + + cpuc->lbr_users--; + + BUG_ON(cpuc->lbr_users < 0); + WARN_ON(cpuc->enabled); +} + +static void intel_pmu_lbr_enable_all(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (cpuc->lbr_users) + __intel_pmu_lbr_enable(); +} + +static void intel_pmu_lbr_disable_all(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (cpuc->lbr_users) + __intel_pmu_lbr_disable(); +} + +static inline u64 intel_pmu_lbr_tos(void) +{ + u64 tos; + + rdmsrl(x86_pmu.lbr_tos, tos); + + return tos; +} + +static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) +{ + unsigned long mask = x86_pmu.lbr_nr - 1; + u64 tos = intel_pmu_lbr_tos(); + int i; + + for (i = 0; i < x86_pmu.lbr_nr; i++, tos--) { + unsigned long lbr_idx = (tos - i) & mask; + union { + struct { + u32 from; + u32 to; + }; + u64 lbr; + } msr_lastbranch; + + rdmsrl(x86_pmu.lbr_from + lbr_idx, msr_lastbranch.lbr); + + cpuc->lbr_entries[i].from = msr_lastbranch.from; + cpuc->lbr_entries[i].to = msr_lastbranch.to; + cpuc->lbr_entries[i].flags = 0; + } + cpuc->lbr_stack.nr = i; +} + +#define LBR_FROM_FLAG_MISPRED (1ULL << 63) + +/* + * Due to lack of segmentation in Linux the effective address (offset) + * is the same as the linear address, allowing us to merge the LIP and EIP + * LBR formats. + */ +static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) +{ + unsigned long mask = x86_pmu.lbr_nr - 1; + u64 tos = intel_pmu_lbr_tos(); + int i; + + for (i = 0; i < x86_pmu.lbr_nr; i++, tos--) { + unsigned long lbr_idx = (tos - i) & mask; + u64 from, to, flags = 0; + + rdmsrl(x86_pmu.lbr_from + lbr_idx, from); + rdmsrl(x86_pmu.lbr_to + lbr_idx, to); + + if (x86_pmu.lbr_format == LBR_FORMAT_EIP_FLAGS) { + flags = !!(from & LBR_FROM_FLAG_MISPRED); + from = (u64)((((s64)from) << 1) >> 1); + } + + cpuc->lbr_entries[i].from = from; + cpuc->lbr_entries[i].to = to; + cpuc->lbr_entries[i].flags = flags; + } + cpuc->lbr_stack.nr = i; +} + +static void intel_pmu_lbr_read(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + + if (!cpuc->lbr_users) + return; + + if (x86_pmu.lbr_format == LBR_FORMAT_32) + intel_pmu_lbr_read_32(cpuc); + else + intel_pmu_lbr_read_64(cpuc); +} + +static int intel_pmu_lbr_format(void) +{ + u64 capabilities; + + rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities); + return capabilities & 0x1f; +} + +static void intel_pmu_lbr_init_core(void) +{ + x86_pmu.lbr_format = intel_pmu_lbr_format(); + x86_pmu.lbr_nr = 4; + x86_pmu.lbr_tos = 0x01c9; + x86_pmu.lbr_from = 0x40; + x86_pmu.lbr_to = 0x60; +} + +static void intel_pmu_lbr_init_nhm(void) +{ + x86_pmu.lbr_format = intel_pmu_lbr_format(); + x86_pmu.lbr_nr = 16; + x86_pmu.lbr_tos = 0x01c9; + x86_pmu.lbr_from = 0x680; + x86_pmu.lbr_to = 0x6c0; +} + +static void intel_pmu_lbr_init_atom(void) +{ + x86_pmu.lbr_format = intel_pmu_lbr_format(); + x86_pmu.lbr_nr = 8; + x86_pmu.lbr_tos = 0x01c9; + x86_pmu.lbr_from = 0x40; + x86_pmu.lbr_to = 0x60; +} + +#endif /* CONFIG_CPU_SUP_INTEL */ -- cgit v1.1 From ef21f683a045a79b6aa86ad81e5fdfc0d5ddd250 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 3 Mar 2010 13:12:23 +0100 Subject: perf, x86: use LBR for PEBS IP+1 fixup Use the LBR to fix up the PEBS IP+1 issue. As said, PEBS reports the next instruction, here we use the LBR to find the last branch and from that construct the actual IP. If the IP matches the LBR-TO, we use LBR-FROM, otherwise we use the LBR-TO address as the beginning of the last basic block and decode forward. Once we find a match to the current IP, we use the previous location. This patch introduces a new ABI element: PERF_RECORD_MISC_EXACT, which conveys that the reported IP (PERF_SAMPLE_IP) is the exact instruction that caused the event (barring CPU errata). The fixup can fail due to various reasons: 1) LBR contains invalid data (quite possible) 2) part of the basic block got paged out 3) the reported IP isn't part of the basic block (see 1) Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Masami Hiramatsu Cc: "Zhang, Yanmin" Cc: paulus@samba.org Cc: eranian@google.com Cc: robert.richter@amd.com Cc: fweisbec@gmail.com LKML-Reference: <20100304140100.619375431@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event.h | 19 +++++++ arch/x86/kernel/cpu/perf_event.c | 70 +++++++++++++------------- arch/x86/kernel/cpu/perf_event_intel.c | 4 +- arch/x86/kernel/cpu/perf_event_intel_ds.c | 84 ++++++++++++++++++++++++++++++- 4 files changed, 138 insertions(+), 39 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index db6109a..a9038c9 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -136,6 +136,25 @@ extern void perf_events_lapic_init(void); #define PERF_EVENT_INDEX_OFFSET 0 +/* + * Abuse bit 3 of the cpu eflags register to indicate proper PEBS IP fixups. + * This flag is otherwise unused and ABI specified to be 0, so nobody should + * care what we do with it. + */ +#define PERF_EFLAGS_EXACT (1UL << 3) + +#define perf_misc_flags(regs) \ +({ int misc = 0; \ + if (user_mode(regs)) \ + misc |= PERF_RECORD_MISC_USER; \ + else \ + misc |= PERF_RECORD_MISC_KERNEL; \ + if (regs->flags & PERF_EFLAGS_EXACT) \ + misc |= PERF_RECORD_MISC_EXACT; \ + misc; }) + +#define perf_instruction_pointer(regs) ((regs)->ip) + #else static inline void init_hw_perf_events(void) { } static inline void perf_events_lapic_init(void) { } diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 1badff6..5cb4e8d 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -29,6 +29,41 @@ #include #include +/* + * best effort, GUP based copy_from_user() that assumes IRQ or NMI context + */ +static unsigned long +copy_from_user_nmi(void *to, const void __user *from, unsigned long n) +{ + unsigned long offset, addr = (unsigned long)from; + int type = in_nmi() ? KM_NMI : KM_IRQ0; + unsigned long size, len = 0; + struct page *page; + void *map; + int ret; + + do { + ret = __get_user_pages_fast(addr, 1, 0, &page); + if (!ret) + break; + + offset = addr & (PAGE_SIZE - 1); + size = min(PAGE_SIZE - offset, n - len); + + map = kmap_atomic(page, type); + memcpy(to, map+offset, size); + kunmap_atomic(map, type); + put_page(page); + + len += size; + to += size; + addr += size; + + } while (len < n); + + return len; +} + static u64 perf_event_mask __read_mostly; struct event_constraint { @@ -1550,41 +1585,6 @@ perf_callchain_kernel(struct pt_regs *regs, struct perf_callchain_entry *entry) dump_trace(NULL, regs, NULL, regs->bp, &backtrace_ops, entry); } -/* - * best effort, GUP based copy_from_user() that assumes IRQ or NMI context - */ -static unsigned long -copy_from_user_nmi(void *to, const void __user *from, unsigned long n) -{ - unsigned long offset, addr = (unsigned long)from; - int type = in_nmi() ? KM_NMI : KM_IRQ0; - unsigned long size, len = 0; - struct page *page; - void *map; - int ret; - - do { - ret = __get_user_pages_fast(addr, 1, 0, &page); - if (!ret) - break; - - offset = addr & (PAGE_SIZE - 1); - size = min(PAGE_SIZE - offset, n - len); - - map = kmap_atomic(page, type); - memcpy(to, map+offset, size); - kunmap_atomic(map, type); - put_page(page); - - len += size; - to += size; - addr += size; - - } while (len < n); - - return len; -} - static int copy_stack_frame(const void __user *fp, struct stack_frame *frame) { unsigned long bytes; diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 44f6ed4..7eb78be 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -547,7 +547,7 @@ static void intel_pmu_disable_event(struct perf_event *event) x86_pmu_disable_event(event); if (unlikely(event->attr.precise)) - intel_pmu_pebs_disable(hwc); + intel_pmu_pebs_disable(event); } static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) @@ -600,7 +600,7 @@ static void intel_pmu_enable_event(struct perf_event *event) } if (unlikely(event->attr.precise)) - intel_pmu_pebs_enable(hwc); + intel_pmu_pebs_enable(event); __x86_pmu_enable_event(hwc); } diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 0d994ef..50e6ff3 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -331,26 +331,32 @@ intel_pebs_constraints(struct perf_event *event) return &emptyconstraint; } -static void intel_pmu_pebs_enable(struct hw_perf_event *hwc) +static void intel_pmu_pebs_enable(struct perf_event *event) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; u64 val = cpuc->pebs_enabled; hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; val |= 1ULL << hwc->idx; wrmsrl(MSR_IA32_PEBS_ENABLE, val); + + intel_pmu_lbr_enable(event); } -static void intel_pmu_pebs_disable(struct hw_perf_event *hwc) +static void intel_pmu_pebs_disable(struct perf_event *event) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + struct hw_perf_event *hwc = &event->hw; u64 val = cpuc->pebs_enabled; val &= ~(1ULL << hwc->idx); wrmsrl(MSR_IA32_PEBS_ENABLE, val); hwc->config |= ARCH_PERFMON_EVENTSEL_INT; + + intel_pmu_lbr_disable(event); } static void intel_pmu_pebs_enable_all(void) @@ -369,6 +375,70 @@ static void intel_pmu_pebs_disable_all(void) wrmsrl(MSR_IA32_PEBS_ENABLE, 0); } +#include + +#define MAX_INSN_SIZE 16 + +static inline bool kernel_ip(unsigned long ip) +{ +#ifdef CONFIG_X86_32 + return ip > PAGE_OFFSET; +#else + return (long)ip < 0; +#endif +} + +static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + unsigned long from = cpuc->lbr_entries[0].from; + unsigned long old_to, to = cpuc->lbr_entries[0].to; + unsigned long ip = regs->ip; + + if (!cpuc->lbr_stack.nr || !from || !to) + return 0; + + if (ip < to) + return 0; + + /* + * We sampled a branch insn, rewind using the LBR stack + */ + if (ip == to) { + regs->ip = from; + return 1; + } + + do { + struct insn insn; + u8 buf[MAX_INSN_SIZE]; + void *kaddr; + + old_to = to; + if (!kernel_ip(ip)) { + int bytes, size = min_t(int, MAX_INSN_SIZE, ip - to); + + bytes = copy_from_user_nmi(buf, (void __user *)to, size); + if (bytes != size) + return 0; + + kaddr = buf; + } else + kaddr = (void *)to; + + kernel_insn_init(&insn, kaddr); + insn_get_length(&insn); + to += insn.length; + } while (to < ip); + + if (to == ip) { + regs->ip = old_to; + return 1; + } + + return 0; +} + static int intel_pmu_save_and_restart(struct perf_event *event); static void intel_pmu_disable_event(struct perf_event *event); @@ -424,6 +494,11 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) regs.bp = at->bp; regs.sp = at->sp; + if (intel_pmu_pebs_fixup_ip(®s)) + regs.flags |= PERF_EFLAGS_EXACT; + else + regs.flags &= ~PERF_EFLAGS_EXACT; + if (perf_event_overflow(event, 1, &data, ®s)) intel_pmu_disable_event(event); @@ -487,6 +562,11 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) regs.bp = at->bp; regs.sp = at->sp; + if (intel_pmu_pebs_fixup_ip(®s)) + regs.flags |= PERF_EFLAGS_EXACT; + else + regs.flags &= ~PERF_EFLAGS_EXACT; + if (perf_event_overflow(event, 1, &data, ®s)) intel_pmu_disable_event(event); } -- cgit v1.1 From 8db909a7e3c888b5d45aef7650d74ccebe3ce725 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Wed, 3 Mar 2010 17:07:40 +0100 Subject: perf, x86: Clean up IA32_PERF_CAPABILITIES usage Saner PERF_CAPABILITIES support, which also exposes pebs_trap. Use that latter to make PEBS's use of LBR conditional since a fault-like pebs should already report the correct IP. ( As of this writing there is no known hardware that implements !pebs_trap ) Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: paulus@samba.org Cc: eranian@google.com Cc: robert.richter@amd.com Cc: fweisbec@gmail.com LKML-Reference: <20100304140100.770650663@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 15 +++++++++++++-- arch/x86/kernel/cpu/perf_event_intel.c | 10 ++++++++++ arch/x86/kernel/cpu/perf_event_intel_ds.c | 30 +++++++++++++++--------------- arch/x86/kernel/cpu/perf_event_intel_lbr.c | 18 ++++-------------- 4 files changed, 42 insertions(+), 31 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 5cb4e8d..7b5430b 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -154,6 +154,17 @@ struct cpu_hw_events { #define for_each_event_constraint(e, c) \ for ((e) = (c); (e)->cmask; (e)++) +union perf_capabilities { + struct { + u64 lbr_format : 6; + u64 pebs_trap : 1; + u64 pebs_arch_reg : 1; + u64 pebs_format : 4; + u64 smm_freeze : 1; + }; + u64 capabilities; +}; + /* * struct x86_pmu - generic x86 pmu */ @@ -195,7 +206,8 @@ struct x86_pmu { /* * Intel Arch Perfmon v2+ */ - u64 intel_ctrl; + u64 intel_ctrl; + union perf_capabilities intel_cap; /* * Intel DebugStore bits @@ -210,7 +222,6 @@ struct x86_pmu { */ unsigned long lbr_tos, lbr_from, lbr_to; /* MSR base regs */ int lbr_nr; /* hardware stack size */ - int lbr_format; /* hardware format */ }; static struct x86_pmu x86_pmu __read_mostly; diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 7eb78be..246c072 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -835,6 +835,16 @@ static __init int intel_pmu_init(void) if (version > 1) x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); + /* + * v2 and above have a perf capabilities MSR + */ + if (version > 1) { + u64 capabilities; + + rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities); + x86_pmu.intel_cap.capabilities = capabilities; + } + intel_ds_init(); /* diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 50e6ff3..5e40294 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -342,7 +342,8 @@ static void intel_pmu_pebs_enable(struct perf_event *event) val |= 1ULL << hwc->idx; wrmsrl(MSR_IA32_PEBS_ENABLE, val); - intel_pmu_lbr_enable(event); + if (x86_pmu.intel_cap.pebs_trap) + intel_pmu_lbr_enable(event); } static void intel_pmu_pebs_disable(struct perf_event *event) @@ -356,7 +357,8 @@ static void intel_pmu_pebs_disable(struct perf_event *event) hwc->config |= ARCH_PERFMON_EVENTSEL_INT; - intel_pmu_lbr_disable(event); + if (x86_pmu.intel_cap.pebs_trap) + intel_pmu_lbr_disable(event); } static void intel_pmu_pebs_enable_all(void) @@ -395,6 +397,12 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) unsigned long old_to, to = cpuc->lbr_entries[0].to; unsigned long ip = regs->ip; + /* + * We don't need to fixup if the PEBS assist is fault like + */ + if (!x86_pmu.intel_cap.pebs_trap) + return 1; + if (!cpuc->lbr_stack.nr || !from || !to) return 0; @@ -589,34 +597,26 @@ static void intel_ds_init(void) x86_pmu.bts = boot_cpu_has(X86_FEATURE_BTS); x86_pmu.pebs = boot_cpu_has(X86_FEATURE_PEBS); if (x86_pmu.pebs) { - int format = 0; - - if (x86_pmu.version > 1) { - u64 capabilities; - /* - * v2+ has a PEBS format field - */ - rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities); - format = (capabilities >> 8) & 0xf; - } + char pebs_type = x86_pmu.intel_cap.pebs_trap ? '+' : '-'; + int format = x86_pmu.intel_cap.pebs_format; switch (format) { case 0: - printk(KERN_CONT "PEBS v0, "); + printk(KERN_CONT "PEBS fmt0%c, ", pebs_type); x86_pmu.pebs_record_size = sizeof(struct pebs_record_core); x86_pmu.drain_pebs = intel_pmu_drain_pebs_core; x86_pmu.pebs_constraints = intel_core_pebs_events; break; case 1: - printk(KERN_CONT "PEBS v1, "); + printk(KERN_CONT "PEBS fmt1%c, ", pebs_type); x86_pmu.pebs_record_size = sizeof(struct pebs_record_nhm); x86_pmu.drain_pebs = intel_pmu_drain_pebs_nhm; x86_pmu.pebs_constraints = intel_nehalem_pebs_events; break; default: - printk(KERN_CONT "PEBS unknown format: %d, ", format); + printk(KERN_CONT "no PEBS fmt%d%c, ", format, pebs_type); x86_pmu.pebs = 0; break; } diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index ea3e99e..4f3a124 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -53,7 +53,7 @@ static void intel_pmu_lbr_reset_64(void) static void intel_pmu_lbr_reset(void) { - if (x86_pmu.lbr_format == LBR_FORMAT_32) + if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) intel_pmu_lbr_reset_32(); else intel_pmu_lbr_reset_64(); @@ -155,6 +155,7 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) { unsigned long mask = x86_pmu.lbr_nr - 1; + int lbr_format = x86_pmu.intel_cap.lbr_format; u64 tos = intel_pmu_lbr_tos(); int i; @@ -165,7 +166,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) rdmsrl(x86_pmu.lbr_from + lbr_idx, from); rdmsrl(x86_pmu.lbr_to + lbr_idx, to); - if (x86_pmu.lbr_format == LBR_FORMAT_EIP_FLAGS) { + if (lbr_format == LBR_FORMAT_EIP_FLAGS) { flags = !!(from & LBR_FROM_FLAG_MISPRED); from = (u64)((((s64)from) << 1) >> 1); } @@ -184,23 +185,14 @@ static void intel_pmu_lbr_read(void) if (!cpuc->lbr_users) return; - if (x86_pmu.lbr_format == LBR_FORMAT_32) + if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) intel_pmu_lbr_read_32(cpuc); else intel_pmu_lbr_read_64(cpuc); } -static int intel_pmu_lbr_format(void) -{ - u64 capabilities; - - rdmsrl(MSR_IA32_PERF_CAPABILITIES, capabilities); - return capabilities & 0x1f; -} - static void intel_pmu_lbr_init_core(void) { - x86_pmu.lbr_format = intel_pmu_lbr_format(); x86_pmu.lbr_nr = 4; x86_pmu.lbr_tos = 0x01c9; x86_pmu.lbr_from = 0x40; @@ -209,7 +201,6 @@ static void intel_pmu_lbr_init_core(void) static void intel_pmu_lbr_init_nhm(void) { - x86_pmu.lbr_format = intel_pmu_lbr_format(); x86_pmu.lbr_nr = 16; x86_pmu.lbr_tos = 0x01c9; x86_pmu.lbr_from = 0x680; @@ -218,7 +209,6 @@ static void intel_pmu_lbr_init_nhm(void) static void intel_pmu_lbr_init_atom(void) { - x86_pmu.lbr_format = intel_pmu_lbr_format(); x86_pmu.lbr_nr = 8; x86_pmu.lbr_tos = 0x01c9; x86_pmu.lbr_from = 0x40; -- cgit v1.1 From 7e1a40dda619b0483fbe0740494ed2c2a1f05289 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 4 Mar 2010 12:38:03 +0100 Subject: perf, x86: Expose the full PEBS record using PERF_SAMPLE_RAW Expose the full PEBS record using PERF_SAMPLE_RAW Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: paulus@samba.org Cc: eranian@google.com Cc: robert.richter@amd.com Cc: fweisbec@gmail.com LKML-Reference: <20100304140100.847218224@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_ds.c | 14 ++++++++++++++ 1 file changed, 14 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 5e40294..ef56f05 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -457,6 +457,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) struct perf_event *event = cpuc->events[0]; /* PMC0 only */ struct pebs_record_core *at, *top; struct perf_sample_data data; + struct perf_raw_record raw; struct pt_regs regs; int n; @@ -479,6 +480,12 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) perf_sample_data_init(&data, 0); data.period = event->hw.last_period; + if (event->attr.sample_type & PERF_SAMPLE_RAW) { + raw.size = x86_pmu.pebs_record_size; + raw.data = at; + data.raw = &raw; + } + n = top - at; /* @@ -521,6 +528,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) struct pebs_record_nhm *at, *top; struct perf_sample_data data; struct perf_event *event = NULL; + struct perf_raw_record raw; struct pt_regs regs; int bit, n; @@ -562,6 +570,12 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) perf_sample_data_init(&data, 0); data.period = event->hw.last_period; + if (event->attr.sample_type & PERF_SAMPLE_RAW) { + raw.size = x86_pmu.pebs_record_size; + raw.data = at; + data.raw = &raw; + } + /* * See the comment in intel_pmu_drain_pebs_core() */ -- cgit v1.1 From 30a813ae035d3e220a89609adce878e045c49547 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 4 Mar 2010 13:49:21 +0100 Subject: x86: Move MAX_INSN_SIZE into asm/insn.h Since there's now two users for this, place it in a common header. Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Masami Hiramatsu Cc: paulus@samba.org Cc: eranian@google.com Cc: robert.richter@amd.com Cc: fweisbec@gmail.com LKML-Reference: <20100304140100.923774125@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/insn.h | 2 ++ arch/x86/include/asm/kprobes.h | 2 +- arch/x86/kernel/cpu/perf_event_intel_ds.c | 2 -- 3 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/insn.h b/arch/x86/include/asm/insn.h index 96c2e0a..88c765e 100644 --- a/arch/x86/include/asm/insn.h +++ b/arch/x86/include/asm/insn.h @@ -68,6 +68,8 @@ struct insn { const insn_byte_t *next_byte; }; +#define MAX_INSN_SIZE 16 + #define X86_MODRM_MOD(modrm) (((modrm) & 0xc0) >> 6) #define X86_MODRM_REG(modrm) (((modrm) & 0x38) >> 3) #define X86_MODRM_RM(modrm) ((modrm) & 0x07) diff --git a/arch/x86/include/asm/kprobes.h b/arch/x86/include/asm/kprobes.h index 4ffa345..5478825 100644 --- a/arch/x86/include/asm/kprobes.h +++ b/arch/x86/include/asm/kprobes.h @@ -24,6 +24,7 @@ #include #include #include +#include #define __ARCH_WANT_KPROBES_INSN_SLOT @@ -36,7 +37,6 @@ typedef u8 kprobe_opcode_t; #define RELATIVEJUMP_SIZE 5 #define RELATIVECALL_OPCODE 0xe8 #define RELATIVE_ADDR_SIZE 4 -#define MAX_INSN_SIZE 16 #define MAX_STACK_SIZE 64 #define MIN_STACK_SIZE(ADDR) \ (((MAX_STACK_SIZE) < (((unsigned long)current_thread_info()) + \ diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index ef56f05..72453ac 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -379,8 +379,6 @@ static void intel_pmu_pebs_disable_all(void) #include -#define MAX_INSN_SIZE 16 - static inline bool kernel_ip(unsigned long ip) { #ifdef CONFIG_X86_32 -- cgit v1.1 From 3adaebd69557615c1bf0365ce5e32d93ac7d82af Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 5 Mar 2010 12:09:29 +0100 Subject: perf, x86: Fix silly bug in data store buffer allocation Fix up the ds allocation error path, where we could free @buffer before we used it. Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: paulus@samba.org Cc: eranian@google.com Cc: robert.richter@amd.com Cc: fweisbec@gmail.com LKML-Reference: <20100305154128.813452402@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_ds.c | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 72453ac..a67fff1 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -127,10 +127,8 @@ static int reserve_ds_buffers(void) err = -ENOMEM; ds = kzalloc(sizeof(*ds), GFP_KERNEL); - if (unlikely(!ds)) { - kfree(buffer); + if (unlikely(!ds)) break; - } per_cpu(cpu_hw_events, cpu).ds = ds; if (x86_pmu.bts) { -- cgit v1.1 From 3c44780b220e876b01e39d4028cd6f4205fbf5d6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 4 Mar 2010 21:49:01 +0100 Subject: perf, x86: Disable PEBS on clovertown chips This CPU has just too many handycaps to be really useful. Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: paulus@samba.org Cc: eranian@google.com Cc: robert.richter@amd.com Cc: fweisbec@gmail.com LKML-Reference: <20100305154128.890278662@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 4 ++++ arch/x86/kernel/cpu/perf_event_intel.c | 27 +++++++++++++++++++++++++++ 2 files changed, 31 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 7b5430b..335ee1d 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -197,6 +197,7 @@ struct x86_pmu { void (*put_event_constraints)(struct cpu_hw_events *cpuc, struct perf_event *event); struct event_constraint *event_constraints; + void (*quirks)(void); void (*cpu_prepare)(int cpu); void (*cpu_starting)(int cpu); @@ -1373,6 +1374,9 @@ void __init init_hw_perf_events(void) pr_cont("%s PMU driver.\n", x86_pmu.name); + if (x86_pmu.quirks) + x86_pmu.quirks(); + if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) { WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", x86_pmu.num_events, X86_PMC_MAX_GENERIC); diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 246c072..224c952 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -792,6 +792,32 @@ static __initconst struct x86_pmu intel_pmu = { .cpu_dying = fini_debug_store_on_cpu, }; +static void intel_clovertown_quirks(void) +{ + /* + * PEBS is unreliable due to: + * + * AJ67 - PEBS may experience CPL leaks + * AJ68 - PEBS PMI may be delayed by one event + * AJ69 - GLOBAL_STATUS[62] will only be set when DEBUGCTL[12] + * AJ106 - FREEZE_LBRS_ON_PMI doesn't work in combination with PEBS + * + * AJ67 could be worked around by restricting the OS/USR flags. + * AJ69 could be worked around by setting PMU_FREEZE_ON_PMI. + * + * AJ106 could possibly be worked around by not allowing LBR + * usage from PEBS, including the fixup. + * AJ68 could possibly be worked around by always programming + * a pebs_event_reset[0] value and coping with the lost events. + * + * But taken together it might just make sense to not enable PEBS on + * these chips. + */ + printk(KERN_WARNING "PEBS disabled due to CPU errata.\n"); + x86_pmu.pebs = 0; + x86_pmu.pebs_constraints = NULL; +} + static __init int intel_pmu_init(void) { union cpuid10_edx edx; @@ -856,6 +882,7 @@ static __init int intel_pmu_init(void) break; case 15: /* original 65 nm celeron/pentium/core2/xeon, "Merom"/"Conroe" */ + x86_pmu.quirks = intel_clovertown_quirks; case 22: /* single-core 65 nm celeron/core2solo "Merom-L"/"Conroe-L" */ case 23: /* current 45 nm celeron/core2/xeon "Penryn"/"Wolfdale" */ case 29: /* six-core 45 nm xeon "Dunnington" */ -- cgit v1.1 From 74846d35b24b6efd61bb88a0a750b6bb257e6e78 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 5 Mar 2010 13:49:35 +0100 Subject: perf, x86: Clear the LBRs on init Some CPUs have errata where the LBR is not cleared on Power-On. So always clear the LBRs before use. Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: paulus@samba.org Cc: eranian@google.com Cc: robert.richter@amd.com Cc: fweisbec@gmail.com LKML-Reference: <20100305154128.966563424@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 18 ++++++++++++++++-- arch/x86/kernel/cpu/perf_event_intel_lbr.c | 3 +++ 2 files changed, 19 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 224c952..c135ed7 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -767,6 +767,20 @@ static __initconst struct x86_pmu core_pmu = { .event_constraints = intel_core_event_constraints, }; +static void intel_pmu_cpu_starting(int cpu) +{ + init_debug_store_on_cpu(cpu); + /* + * Deal with CPUs that don't clear their LBRs on power-up. + */ + intel_pmu_lbr_reset(); +} + +static void intel_pmu_cpu_dying(int cpu) +{ + fini_debug_store_on_cpu(cpu); +} + static __initconst struct x86_pmu intel_pmu = { .name = "Intel", .handle_irq = intel_pmu_handle_irq, @@ -788,8 +802,8 @@ static __initconst struct x86_pmu intel_pmu = { .max_period = (1ULL << 31) - 1, .get_event_constraints = intel_get_event_constraints, - .cpu_starting = init_debug_store_on_cpu, - .cpu_dying = fini_debug_store_on_cpu, + .cpu_starting = intel_pmu_cpu_starting, + .cpu_dying = intel_pmu_cpu_dying, }; static void intel_clovertown_quirks(void) diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index 4f3a124..dcec765 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -53,6 +53,9 @@ static void intel_pmu_lbr_reset_64(void) static void intel_pmu_lbr_reset(void) { + if (!x86_pmu.lbr_nr) + return; + if (x86_pmu.intel_cap.lbr_format == LBR_FORMAT_32) intel_pmu_lbr_reset_32(); else -- cgit v1.1 From a562b1871f7f7d2f3a835c3c1e07fa58d473cfb7 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 5 Mar 2010 16:29:14 +0100 Subject: perf, x86: Robustify PEBS fixup It turns out the LBR is massively unreliable on certain CPUs, so code the fixup a little more defensive to avoid crashing the kernel. Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: paulus@samba.org Cc: eranian@google.com Cc: robert.richter@amd.com Cc: fweisbec@gmail.com LKML-Reference: <20100305154129.042271287@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_ds.c | 21 +++++++++++++++++++-- 1 file changed, 19 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index a67fff1..e7ac517 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -399,10 +399,23 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) if (!x86_pmu.intel_cap.pebs_trap) return 1; + /* + * No LBR entry, no basic block, no rewinding + */ if (!cpuc->lbr_stack.nr || !from || !to) return 0; - if (ip < to) + /* + * Basic blocks should never cross user/kernel boundaries + */ + if (kernel_ip(ip) != kernel_ip(to)) + return 0; + + /* + * unsigned math, either ip is before the start (impossible) or + * the basic block is larger than 1 page (sanity) + */ + if ((ip - to) > PAGE_SIZE) return 0; /* @@ -420,7 +433,7 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) old_to = to; if (!kernel_ip(ip)) { - int bytes, size = min_t(int, MAX_INSN_SIZE, ip - to); + int bytes, size = MAX_INSN_SIZE; bytes = copy_from_user_nmi(buf, (void __user *)to, size); if (bytes != size) @@ -440,6 +453,10 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) return 1; } + /* + * Even though we decoded the basic block, the instruction stream + * never matched the given IP, either the TO or the IP got corrupted. + */ return 0; } -- cgit v1.1 From cc7f00820b2f3be656569c41158d9323e425bcfe Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 8 Mar 2010 17:51:33 +0100 Subject: perf, x86: Avoid double disable on throttle vs ioctl(PERF_IOC_DISABLE) Calling ioctl(PERF_EVENT_IOC_DISABLE) on a thottled counter would result in a double disable, cure this by using x86_pmu_{start,stop} for throttle/unthrottle and teach x86_pmu_stop() to check ->active_mask. Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: paulus@samba.org Cc: eranian@google.com Cc: robert.richter@amd.com Cc: fweisbec@gmail.com LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_ds.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index e7ac517..a7401e4 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -461,7 +461,6 @@ static int intel_pmu_pebs_fixup_ip(struct pt_regs *regs) } static int intel_pmu_save_and_restart(struct perf_event *event); -static void intel_pmu_disable_event(struct perf_event *event); static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) { @@ -528,7 +527,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) regs.flags &= ~PERF_EFLAGS_EXACT; if (perf_event_overflow(event, 1, &data, ®s)) - intel_pmu_disable_event(event); + x86_pmu_stop(event); out: intel_pmu_pebs_enable_all(); @@ -603,7 +602,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) regs.flags &= ~PERF_EFLAGS_EXACT; if (perf_event_overflow(event, 1, &data, ®s)) - intel_pmu_disable_event(event); + x86_pmu_stop(event); } out: intel_pmu_pebs_enable_all(); -- cgit v1.1 From 8f4aebd2be9892bf8fb79a2d8576d3f3ee7f00f6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 6 Mar 2010 13:26:11 +0100 Subject: perf, x86: Fix pebs drains I overlooked the perf_disable()/perf_enable() calls in intel_pmu_handle_irq(), (pointed out by Markus) so we should not explicitly disable_all/enable_all pebs counters in the drain functions, these are already disabled and enabling them early is confusing. Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: paulus@samba.org Cc: eranian@google.com Cc: robert.richter@amd.com Cc: fweisbec@gmail.com LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_ds.c | 15 +++------------ 1 file changed, 3 insertions(+), 12 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index a7401e4..66c6962 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -476,18 +476,16 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) if (!event || !ds || !x86_pmu.pebs) return; - intel_pmu_pebs_disable_all(); - at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base; top = (struct pebs_record_core *)(unsigned long)ds->pebs_index; if (top <= at) - goto out; + return; ds->pebs_index = ds->pebs_buffer_base; if (!intel_pmu_save_and_restart(event)) - goto out; + return; perf_sample_data_init(&data, 0); data.period = event->hw.last_period; @@ -528,9 +526,6 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) if (perf_event_overflow(event, 1, &data, ®s)) x86_pmu_stop(event); - -out: - intel_pmu_pebs_enable_all(); } static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) @@ -547,13 +542,11 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) if (!ds || !x86_pmu.pebs) return; - intel_pmu_pebs_disable_all(); - at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; if (top <= at) - goto out; + return; ds->pebs_index = ds->pebs_buffer_base; @@ -604,8 +597,6 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) if (perf_event_overflow(event, 1, &data, ®s)) x86_pmu_stop(event); } -out: - intel_pmu_pebs_enable_all(); } /* -- cgit v1.1 From 4807e3d5dc7bb7057dd6ca3abb09f3da2eb8c323 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 6 Mar 2010 13:47:07 +0100 Subject: perf, x86: Fix PEBS enable/disable vs cpuc->enabled We should never call ->enable with the pmu enabled, and we _can_ have ->disable called with the pmu enabled. Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: paulus@samba.org Cc: eranian@google.com Cc: robert.richter@amd.com Cc: fweisbec@gmail.com LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_ds.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 66c6962..9ad0e67 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -338,7 +338,7 @@ static void intel_pmu_pebs_enable(struct perf_event *event) hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; val |= 1ULL << hwc->idx; - wrmsrl(MSR_IA32_PEBS_ENABLE, val); + WARN_ON_ONCE(cpuc->enabled); if (x86_pmu.intel_cap.pebs_trap) intel_pmu_lbr_enable(event); @@ -351,7 +351,8 @@ static void intel_pmu_pebs_disable(struct perf_event *event) u64 val = cpuc->pebs_enabled; val &= ~(1ULL << hwc->idx); - wrmsrl(MSR_IA32_PEBS_ENABLE, val); + if (cpuc->enabled) + wrmsrl(MSR_IA32_PEBS_ENABLE, val); hwc->config |= ARCH_PERFMON_EVENTSEL_INT; -- cgit v1.1 From 2df202bf7520eaffcbfb07e45dfa3cfb0aeee2c0 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 6 Mar 2010 13:48:54 +0100 Subject: perf, x86: Fix LBR enable/disable vs cpuc->enabled We should never call ->enable with the pmu enabled, and we _can_ have ->disable called with the pmu enabled. Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: paulus@samba.org Cc: eranian@google.com Cc: robert.richter@amd.com Cc: fweisbec@gmail.com LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_lbr.c | 7 ++++--- 1 file changed, 4 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index dcec765..0145f99 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -69,7 +69,7 @@ static void intel_pmu_lbr_enable(struct perf_event *event) if (!x86_pmu.lbr_nr) return; - WARN_ON(cpuc->enabled); + WARN_ON_ONCE(cpuc->enabled); /* * Reset the LBR stack if this is the first LBR user or @@ -93,9 +93,10 @@ static void intel_pmu_lbr_disable(struct perf_event *event) return; cpuc->lbr_users--; - BUG_ON(cpuc->lbr_users < 0); - WARN_ON(cpuc->enabled); + + if (cpuc->enabled && !cpuc->lbr_users) + __intel_pmu_lbr_disable(); } static void intel_pmu_lbr_enable_all(void) -- cgit v1.1 From d329527e47851f84b1e7944ed9601205f35f1b93 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 8 Mar 2010 13:57:14 +0100 Subject: perf, x86: Reorder intel_pmu_enable_all() The documentation says we have to enable PEBS before we enable the PMU proper. Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: paulus@samba.org Cc: eranian@google.com Cc: robert.richter@amd.com Cc: fweisbec@gmail.com LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 5 ++--- 1 file changed, 2 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index c135ed7..d3e2424 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -487,6 +487,8 @@ static void intel_pmu_enable_all(void) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + intel_pmu_pebs_enable_all(); + intel_pmu_lbr_enable_all(); wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, x86_pmu.intel_ctrl); if (test_bit(X86_PMC_IDX_FIXED_BTS, cpuc->active_mask)) { @@ -498,9 +500,6 @@ static void intel_pmu_enable_all(void) intel_pmu_enable_bts(event->hw.config); } - - intel_pmu_pebs_enable_all(); - intel_pmu_lbr_enable_all(); } static inline u64 intel_pmu_get_status(void) -- cgit v1.1 From 12ab854d744f04bfc5c6c4db723b7e31fc03eb29 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 6 Mar 2010 18:57:38 +0100 Subject: perf, x86: Deal with multiple state bits for pebs-fmt1 Its unclear if the PEBS state record will have only a single bit set, in case it does not and accumulates bits, deal with that by only processing each event once. Also, robustify some of the code. Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: paulus@samba.org Cc: eranian@google.com Cc: robert.richter@amd.com Cc: fweisbec@gmail.com LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_ds.c | 16 +++++++++++++--- 1 file changed, 13 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 9ad0e67..b4680da 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -538,6 +538,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) struct perf_event *event = NULL; struct perf_raw_record raw; struct pt_regs regs; + u64 status = 0; int bit, n; if (!ds || !x86_pmu.pebs) @@ -561,13 +562,22 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) for ( ; at < top; at++) { for_each_bit(bit, (unsigned long *)&at->status, MAX_PEBS_EVENTS) { - if (!cpuc->events[bit]->attr.precise) + event = cpuc->events[bit]; + if (!test_bit(bit, cpuc->active_mask)) continue; - event = cpuc->events[bit]; + WARN_ON_ONCE(!event); + + if (!event->attr.precise) + continue; + + if (__test_and_set_bit(bit, (unsigned long *)&status)) + continue; + + break; } - if (!event) + if (!event || bit >= MAX_PEBS_EVENTS) continue; if (!intel_pmu_save_and_restart(event)) -- cgit v1.1 From ad0e6cfe2a2a61d7b5530188e571d508146cb43b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Sat, 6 Mar 2010 19:49:06 +0100 Subject: perf, x86: Fix silly bug in intel_pmu_pebs_{enable,disable} We need to use the actual cpuc->pebs_enabled value, not a local copy for the changes to take effect. Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: paulus@samba.org Cc: eranian@google.com Cc: robert.richter@amd.com Cc: fweisbec@gmail.com LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_ds.c | 8 +++----- 1 file changed, 3 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index b4680da..2423694 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -333,11 +333,10 @@ static void intel_pmu_pebs_enable(struct perf_event *event) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - u64 val = cpuc->pebs_enabled; hwc->config &= ~ARCH_PERFMON_EVENTSEL_INT; - val |= 1ULL << hwc->idx; + cpuc->pebs_enabled |= 1ULL << hwc->idx; WARN_ON_ONCE(cpuc->enabled); if (x86_pmu.intel_cap.pebs_trap) @@ -348,11 +347,10 @@ static void intel_pmu_pebs_disable(struct perf_event *event) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct hw_perf_event *hwc = &event->hw; - u64 val = cpuc->pebs_enabled; - val &= ~(1ULL << hwc->idx); + cpuc->pebs_enabled &= ~(1ULL << hwc->idx); if (cpuc->enabled) - wrmsrl(MSR_IA32_PEBS_ENABLE, val); + wrmsrl(MSR_IA32_PEBS_ENABLE, cpuc->pebs_enabled); hwc->config |= ARCH_PERFMON_EVENTSEL_INT; -- cgit v1.1 From b83a46e7da4a948cc852ba7805dfb1a392dec861 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 8 Mar 2010 13:51:12 +0100 Subject: perf, x86: Don't reset the LBR as frequently If we reset the LBR on each first counter, simple counter rotation which first deschedules all counters and then reschedules the new ones will lead to LBR reset, even though we're still in the same task context. Reduce this by not flushing on the first counter but only flushing on different task contexts. Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: paulus@samba.org Cc: eranian@google.com Cc: robert.richter@amd.com Cc: fweisbec@gmail.com LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_lbr.c | 9 ++++----- 1 file changed, 4 insertions(+), 5 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index 0145f99..f278136 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -72,12 +72,11 @@ static void intel_pmu_lbr_enable(struct perf_event *event) WARN_ON_ONCE(cpuc->enabled); /* - * Reset the LBR stack if this is the first LBR user or - * we changed task context so as to avoid data leaks. + * Reset the LBR stack if we changed task context to + * avoid data leaks. */ - if (!cpuc->lbr_users || - (event->ctx->task && cpuc->lbr_context != event->ctx)) { + if (event->ctx->task && cpuc->lbr_context != event->ctx) { intel_pmu_lbr_reset(); cpuc->lbr_context = event->ctx; } @@ -93,7 +92,7 @@ static void intel_pmu_lbr_disable(struct perf_event *event) return; cpuc->lbr_users--; - BUG_ON(cpuc->lbr_users < 0); + WARN_ON_ONCE(cpuc->lbr_users < 0); if (cpuc->enabled && !cpuc->lbr_users) __intel_pmu_lbr_disable(); -- cgit v1.1 From 7645a24cbd01cbf4865d1273d5ddaa8d8c2ccb3a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 8 Mar 2010 13:51:31 +0100 Subject: perf, x86: Remove checking_{wr,rd}msr() usage We don't need checking_{wr,rd}msr() calls, since we should know what cpu we're running on and not use blindly poke at msrs. Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: paulus@samba.org Cc: eranian@google.com Cc: robert.richter@amd.com Cc: fweisbec@gmail.com LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 24 ++++++++++++++++++------ arch/x86/kernel/cpu/perf_event_intel.c | 5 ++--- 2 files changed, 20 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 335ee1d..e24f637 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -29,6 +29,17 @@ #include #include +#if 0 +#undef wrmsrl +#define wrmsrl(msr, val) \ +do { \ + trace_printk("wrmsrl(%lx, %lx)\n", (unsigned long)(msr),\ + (unsigned long)(val)); \ + native_write_msr((msr), (u32)((u64)(val)), \ + (u32)((u64)(val) >> 32)); \ +} while (0) +#endif + /* * best effort, GUP based copy_from_user() that assumes IRQ or NMI context */ @@ -821,14 +832,15 @@ void hw_perf_enable(void) static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc) { - (void)checking_wrmsrl(hwc->config_base + hwc->idx, + wrmsrl(hwc->config_base + hwc->idx, hwc->config | ARCH_PERFMON_EVENTSEL_ENABLE); } static inline void x86_pmu_disable_event(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; - (void)checking_wrmsrl(hwc->config_base + hwc->idx, hwc->config); + + wrmsrl(hwc->config_base + hwc->idx, hwc->config); } static DEFINE_PER_CPU(u64 [X86_PMC_IDX_MAX], pmc_prev_left); @@ -843,7 +855,7 @@ x86_perf_event_set_period(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; s64 left = atomic64_read(&hwc->period_left); s64 period = hwc->sample_period; - int err, ret = 0, idx = hwc->idx; + int ret = 0, idx = hwc->idx; if (idx == X86_PMC_IDX_FIXED_BTS) return 0; @@ -881,8 +893,8 @@ x86_perf_event_set_period(struct perf_event *event) */ atomic64_set(&hwc->prev_count, (u64)-left); - err = checking_wrmsrl(hwc->event_base + idx, - (u64)(-left) & x86_pmu.event_mask); + wrmsrl(hwc->event_base + idx, + (u64)(-left) & x86_pmu.event_mask); perf_event_update_userpage(event); @@ -987,7 +999,7 @@ void perf_event_print_debug(void) pr_info("CPU#%d: fixed: %016llx\n", cpu, fixed); pr_info("CPU#%d: pebs: %016llx\n", cpu, pebs); } - pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); + pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); for (idx = 0; idx < x86_pmu.num_events; idx++) { rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index d3e2424..971dc6e 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -525,7 +525,7 @@ static void intel_pmu_disable_fixed(struct hw_perf_event *hwc) rdmsrl(hwc->config_base, ctrl_val); ctrl_val &= ~mask; - (void)checking_wrmsrl(hwc->config_base, ctrl_val); + wrmsrl(hwc->config_base, ctrl_val); } static void intel_pmu_disable_event(struct perf_event *event) @@ -553,7 +553,6 @@ static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) { int idx = hwc->idx - X86_PMC_IDX_FIXED; u64 ctrl_val, bits, mask; - int err; /* * Enable IRQ generation (0x8), @@ -578,7 +577,7 @@ static void intel_pmu_enable_fixed(struct hw_perf_event *hwc) rdmsrl(hwc->config_base, ctrl_val); ctrl_val &= ~mask; ctrl_val |= bits; - err = checking_wrmsrl(hwc->config_base, ctrl_val); + wrmsrl(hwc->config_base, ctrl_val); } static void intel_pmu_enable_event(struct perf_event *event) -- cgit v1.1 From d80c7502ff63aa0d99d8c0c5803d28bbef67a74e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 9 Mar 2010 11:41:02 +0100 Subject: perf, x86: Fixup the PEBS handler for Core2 cpus Pull the core handler in line with the nhm one, also make sure we always drain the buffer. Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: paulus@samba.org Cc: eranian@google.com Cc: robert.richter@amd.com Cc: fweisbec@gmail.com LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_ds.c | 38 +++++++++++++++++++------------ 1 file changed, 24 insertions(+), 14 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 2423694..1bfd59b 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -472,20 +472,39 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) struct pt_regs regs; int n; - if (!event || !ds || !x86_pmu.pebs) + if (!ds || !x86_pmu.pebs) return; at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base; top = (struct pebs_record_core *)(unsigned long)ds->pebs_index; - if (top <= at) + /* + * Whatever else happens, drain the thing + */ + ds->pebs_index = ds->pebs_buffer_base; + + if (!test_bit(0, cpuc->active_mask)) return; - ds->pebs_index = ds->pebs_buffer_base; + WARN_ON_ONCE(!event); + + if (!event->attr.precise) + return; + + n = top - at; + if (n <= 0) + return; if (!intel_pmu_save_and_restart(event)) return; + /* + * Should not happen, we program the threshold at 1 and do not + * set a reset value. + */ + WARN_ON_ONCE(n > 1); + at += n - 1; + perf_sample_data_init(&data, 0); data.period = event->hw.last_period; @@ -495,14 +514,6 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) data.raw = &raw; } - n = top - at; - - /* - * Should not happen, we program the threshold at 1 and do not - * set a reset value. - */ - WARN_ON_ONCE(n > 1); - /* * We use the interrupt regs as a base because the PEBS record * does not contain a full regs set, specifically it seems to @@ -545,12 +556,11 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; top = (struct pebs_record_nhm *)(unsigned long)ds->pebs_index; - if (top <= at) - return; - ds->pebs_index = ds->pebs_buffer_base; n = top - at; + if (n <= 0) + return; /* * Should not happen, we program the threshold at 1 and do not -- cgit v1.1 From 63fb3f9b2312e131be5a0a2dddb63f2fb123db9b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 9 Mar 2010 11:51:02 +0100 Subject: perf, x86: Fix LBR read-out Don't decrement the TOS twice... Signed-off-by: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: paulus@samba.org Cc: eranian@google.com Cc: robert.richter@amd.com Cc: fweisbec@gmail.com LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_lbr.c | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index f278136..df4c98e2 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -129,7 +129,7 @@ static void intel_pmu_lbr_read_32(struct cpu_hw_events *cpuc) u64 tos = intel_pmu_lbr_tos(); int i; - for (i = 0; i < x86_pmu.lbr_nr; i++, tos--) { + for (i = 0; i < x86_pmu.lbr_nr; i++) { unsigned long lbr_idx = (tos - i) & mask; union { struct { @@ -162,7 +162,7 @@ static void intel_pmu_lbr_read_64(struct cpu_hw_events *cpuc) u64 tos = intel_pmu_lbr_tos(); int i; - for (i = 0; i < x86_pmu.lbr_nr; i++, tos--) { + for (i = 0; i < x86_pmu.lbr_nr; i++) { unsigned long lbr_idx = (tos - i) & mask; u64 from, to, flags = 0; -- cgit v1.1 From ba7e4d13fc7e25af1d167d40e6f028298dfc55ad Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 6 Jun 2009 13:58:12 +0200 Subject: perf, x86: Add INSTRUCTION_DECODER config flag The PEBS+LBR decoding magic needs the insn_get_length() infrastructure to be able to decode x86 instruction length. So split it out of KPROBES dependency and make it enabled when either KPROBES or PERF_EVENTS is enabled. Cc: Peter Zijlstra Cc: Masami Hiramatsu Cc: Frederic Weisbecker Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/Kconfig | 3 +++ arch/x86/lib/Makefile | 2 +- 2 files changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/Kconfig b/arch/x86/Kconfig index e984403..e1240f6 100644 --- a/arch/x86/Kconfig +++ b/arch/x86/Kconfig @@ -58,6 +58,9 @@ config X86 select HAVE_ARCH_KMEMCHECK select HAVE_USER_RETURN_NOTIFIER +config INSTRUCTION_DECODER + def_bool (KPROBES || PERF_EVENTS) + config OUTPUT_FORMAT string default "elf32-i386" if X86_32 diff --git a/arch/x86/lib/Makefile b/arch/x86/lib/Makefile index 419386c..cbaf8f2 100644 --- a/arch/x86/lib/Makefile +++ b/arch/x86/lib/Makefile @@ -20,7 +20,7 @@ lib-y := delay.o lib-y += thunk_$(BITS).o lib-y += usercopy_$(BITS).o getuser.o putuser.o lib-y += memcpy_$(BITS).o -lib-$(CONFIG_KPROBES) += insn.o inat.o +lib-$(CONFIG_INSTRUCTION_DECODER) += insn.o inat.o obj-y += msr.o msr-reg.o msr-reg-export.o -- cgit v1.1 From caa0142d84ceb0fc83e28f0475d0a7316cb6df77 Mon Sep 17 00:00:00 2001 From: Ingo Molnar Date: Sat, 6 Jun 2009 13:58:12 +0200 Subject: perf, x86: Fix the !CONFIG_CPU_SUP_INTEL build Fix typo. But the modularization here is ugly and should be improved. Cc: Peter Zijlstra Cc: Frederic Weisbecker Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_ds.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 1bfd59b..c59678a 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -661,7 +661,7 @@ static void intel_ds_init(void) #else /* CONFIG_CPU_SUP_INTEL */ -static int reseve_ds_buffers(void) +static int reserve_ds_buffers(void) { return 0; } -- cgit v1.1 From a072738e04f0eb26370e39ec679e9a0d65e49aea Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Thu, 11 Mar 2010 19:54:39 +0300 Subject: perf, x86: Implement initial P4 PMU driver The netburst PMU is way different from the "architectural perfomance monitoring" specification that current CPUs use. P4 uses a tuple of ESCR+CCCR+COUNTER MSR registers to handle perfomance monitoring events. A few implementational details: 1) We need a separate x86_pmu::hw_config helper in struct x86_pmu since register bit-fields are quite different from P6, Core and later cpu series. 2) For the same reason is a x86_pmu::schedule_events helper introduced. 3) hw_perf_event::config consists of packed ESCR+CCCR values. It's allowed since in reality both registers only use a half of their size. Of course before making a real write into a particular MSR we need to unpack the value and extend it to a proper size. 4) The tuple of packed ESCR+CCCR in hw_perf_event::config doesn't describe the memory address of ESCR MSR register so that we need to keep a mapping between these tuples used and available ESCR (various P4 events may use same ESCRs but not simultaneously), for this sake every active event has a per-cpu map of hw_perf_event::idx <--> ESCR addresses. 5) Since hw_perf_event::idx is an offset to counter/control register we need to lift X86_PMC_MAX_GENERIC up, otherwise kernel strips it down to 8 registers and event armed may never be turned off (ie the bit in active_mask is set but the loop never reaches this index to check), thanks to Peter Zijlstra Restrictions: - No cascaded counters support (do we ever need them?) - No dependent events support (so PERF_COUNT_HW_INSTRUCTIONS doesn't work for now) - There are events with same counters which can't work simultaneously (need to use intersected ones due to broken counter 1) - No PERF_COUNT_HW_CACHE_ events yet Todo: - Implement dependent events - Need proper hashing for event opcodes (no linear search, good for debugging stage but not in real loads) - Some events counted during a clock cycle -- need to set threshold for them and count every clock cycle just to get summary statistics (ie to behave the same way as other PMUs do) - Need to swicth to use event_constraints - To support RAW events we need to encode a global list of P4 events into p4_templates - Cache events need to be added Event support status matrix: Event status ----------------------------- cycles works cache-references works cache-misses works branch-misses works bus-cycles partially (does not work on 64bit cpu with HT enabled) instruction doesnt work (needs dependent event [mop tagging]) branches doesnt work Signed-off-by: Cyrill Gorcunov Signed-off-by: Lin Ming Cc: Peter Zijlstra Cc: Arnaldo Carvalho de Melo Cc: Stephane Eranian Cc: Robert Richter Cc: Frederic Weisbecker LKML-Reference: <20100311165439.GB5129@lenovo> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event.h | 2 +- arch/x86/include/asm/perf_event_p4.h | 707 +++++++++++++++++++++++++++++++++ arch/x86/kernel/cpu/perf_event.c | 46 ++- arch/x86/kernel/cpu/perf_event_amd.c | 2 + arch/x86/kernel/cpu/perf_event_intel.c | 15 +- arch/x86/kernel/cpu/perf_event_p4.c | 607 ++++++++++++++++++++++++++++ arch/x86/kernel/cpu/perf_event_p6.c | 2 + 7 files changed, 1358 insertions(+), 23 deletions(-) create mode 100644 arch/x86/include/asm/perf_event_p4.h create mode 100644 arch/x86/kernel/cpu/perf_event_p4.c (limited to 'arch/x86') diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index a9038c9..124dddd 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -5,7 +5,7 @@ * Performance event hw details: */ -#define X86_PMC_MAX_GENERIC 8 +#define X86_PMC_MAX_GENERIC 32 #define X86_PMC_MAX_FIXED 3 #define X86_PMC_IDX_GENERIC 0 diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h new file mode 100644 index 0000000..829f471 --- /dev/null +++ b/arch/x86/include/asm/perf_event_p4.h @@ -0,0 +1,707 @@ +/* + * Netburst Perfomance Events (P4, old Xeon) + */ + +#ifndef PERF_EVENT_P4_H +#define PERF_EVENT_P4_H + +#include +#include + +/* + * NetBurst has perfomance MSRs shared between + * threads if HT is turned on, ie for both logical + * processors (mem: in turn in Atom with HT support + * perf-MSRs are not shared and every thread has its + * own perf-MSRs set) + */ +#define ARCH_P4_TOTAL_ESCR (46) +#define ARCH_P4_RESERVED_ESCR (2) /* IQ_ESCR(0,1) not always present */ +#define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR) +#define ARCH_P4_MAX_CCCR (18) +#define ARCH_P4_MAX_COUNTER (ARCH_P4_MAX_CCCR / 2) + +#define P4_EVNTSEL_EVENT_MASK 0x7e000000U +#define P4_EVNTSEL_EVENT_SHIFT 25 +#define P4_EVNTSEL_EVENTMASK_MASK 0x01fffe00U +#define P4_EVNTSEL_EVENTMASK_SHIFT 9 +#define P4_EVNTSEL_TAG_MASK 0x000001e0U +#define P4_EVNTSEL_TAG_SHIFT 5 +#define P4_EVNTSEL_TAG_ENABLE 0x00000010U +#define P4_EVNTSEL_T0_OS 0x00000008U +#define P4_EVNTSEL_T0_USR 0x00000004U +#define P4_EVNTSEL_T1_OS 0x00000002U +#define P4_EVNTSEL_T1_USR 0x00000001U + +/* Non HT mask */ +#define P4_EVNTSEL_MASK \ + (P4_EVNTSEL_EVENT_MASK | \ + P4_EVNTSEL_EVENTMASK_MASK | \ + P4_EVNTSEL_TAG_MASK | \ + P4_EVNTSEL_TAG_ENABLE | \ + P4_EVNTSEL_T0_OS | \ + P4_EVNTSEL_T0_USR) + +/* HT mask */ +#define P4_EVNTSEL_MASK_HT \ + (P4_EVNTSEL_MASK | \ + P4_EVNTSEL_T1_OS | \ + P4_EVNTSEL_T1_USR) + +#define P4_CCCR_OVF 0x80000000U +#define P4_CCCR_CASCADE 0x40000000U +#define P4_CCCR_OVF_PMI_T0 0x04000000U +#define P4_CCCR_OVF_PMI_T1 0x08000000U +#define P4_CCCR_FORCE_OVF 0x02000000U +#define P4_CCCR_EDGE 0x01000000U +#define P4_CCCR_THRESHOLD_MASK 0x00f00000U +#define P4_CCCR_THRESHOLD_SHIFT 20 +#define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT) +#define P4_CCCR_COMPLEMENT 0x00080000U +#define P4_CCCR_COMPARE 0x00040000U +#define P4_CCCR_ESCR_SELECT_MASK 0x0000e000U +#define P4_CCCR_ESCR_SELECT_SHIFT 13 +#define P4_CCCR_ENABLE 0x00001000U +#define P4_CCCR_THREAD_SINGLE 0x00010000U +#define P4_CCCR_THREAD_BOTH 0x00020000U +#define P4_CCCR_THREAD_ANY 0x00030000U + +/* Non HT mask */ +#define P4_CCCR_MASK \ + (P4_CCCR_OVF | \ + P4_CCCR_CASCADE | \ + P4_CCCR_OVF_PMI_T0 | \ + P4_CCCR_FORCE_OVF | \ + P4_CCCR_EDGE | \ + P4_CCCR_THRESHOLD_MASK | \ + P4_CCCR_COMPLEMENT | \ + P4_CCCR_COMPARE | \ + P4_CCCR_ESCR_SELECT_MASK | \ + P4_CCCR_ENABLE) + +/* HT mask */ +#define P4_CCCR_MASK_HT \ + (P4_CCCR_MASK | \ + P4_CCCR_THREAD_ANY) + +/* + * format is 32 bit: ee ss aa aa + * where + * ee - 8 bit event + * ss - 8 bit selector + * aa aa - 16 bits reserved for tags/attributes + */ +#define P4_EVENT_PACK(event, selector) (((event) << 24) | ((selector) << 16)) +#define P4_EVENT_UNPACK_EVENT(packed) (((packed) >> 24) & 0xff) +#define P4_EVENT_UNPACK_SELECTOR(packed) (((packed) >> 16) & 0xff) +#define P4_EVENT_PACK_ATTR(attr) ((attr)) +#define P4_EVENT_UNPACK_ATTR(packed) ((packed) & 0xffff) +#define P4_MAKE_EVENT_ATTR(class, name, bit) class##_##name = (1 << bit) +#define P4_EVENT_ATTR(class, name) class##_##name +#define P4_EVENT_ATTR_STR(class, name) __stringify(class##_##name) + +/* + * config field is 64bit width and consists of + * HT << 63 | ESCR << 32 | CCCR + * where HT is HyperThreading bit (since ESCR + * has it reserved we may use it for own purpose) + * + * note that this is NOT the addresses of respective + * ESCR and CCCR but rather an only packed value should + * be unpacked and written to a proper addresses + * + * the base idea is to pack as much info as + * possible + */ +#define p4_config_pack_escr(v) (((u64)(v)) << 32) +#define p4_config_pack_cccr(v) (((u64)(v)) & 0xffffffffULL) +#define p4_config_unpack_escr(v) (((u64)(v)) >> 32) +#define p4_config_unpack_cccr(v) (((u64)(v)) & 0xffffffffULL) + +#define p4_config_unpack_emask(v) \ + ({ \ + u32 t = p4_config_unpack_escr((v)); \ + t &= P4_EVNTSEL_EVENTMASK_MASK; \ + t >>= P4_EVNTSEL_EVENTMASK_SHIFT; \ + t; \ + }) + +#define P4_CONFIG_HT_SHIFT 63 +#define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) + +static inline u32 p4_config_unpack_opcode(u64 config) +{ + u32 e, s; + + /* + * we don't care about HT presence here since + * event opcode doesn't depend on it + */ + e = (p4_config_unpack_escr(config) & P4_EVNTSEL_EVENT_MASK) >> P4_EVNTSEL_EVENT_SHIFT; + s = (p4_config_unpack_cccr(config) & P4_CCCR_ESCR_SELECT_MASK) >> P4_CCCR_ESCR_SELECT_SHIFT; + + return P4_EVENT_PACK(e, s); +} + +static inline bool p4_is_event_cascaded(u64 config) +{ + u32 cccr = p4_config_unpack_cccr(config); + return !!(cccr & P4_CCCR_CASCADE); +} + +static inline int p4_ht_config_thread(u64 config) +{ + return !!(config & P4_CONFIG_HT); +} + +static inline u64 p4_set_ht_bit(u64 config) +{ + return config | P4_CONFIG_HT; +} + +static inline u64 p4_clear_ht_bit(u64 config) +{ + return config & ~P4_CONFIG_HT; +} + +static inline int p4_ht_active(void) +{ +#ifdef CONFIG_SMP + return smp_num_siblings > 1; +#endif + return 0; +} + +static inline int p4_ht_thread(int cpu) +{ +#ifdef CONFIG_SMP + if (smp_num_siblings == 2) + return cpu != cpumask_first(__get_cpu_var(cpu_sibling_map)); +#endif + return 0; +} + +static inline int p4_should_swap_ts(u64 config, int cpu) +{ + return p4_ht_config_thread(config) ^ p4_ht_thread(cpu); +} + +static inline u32 p4_default_cccr_conf(int cpu) +{ + /* + * Note that P4_CCCR_THREAD_ANY is "required" on + * non-HT machines (on HT machines we count TS events + * regardless the state of second logical processor + */ + u32 cccr = P4_CCCR_THREAD_ANY; + + if (!p4_ht_thread(cpu)) + cccr |= P4_CCCR_OVF_PMI_T0; + else + cccr |= P4_CCCR_OVF_PMI_T1; + + return cccr; +} + +static inline u32 p4_default_escr_conf(int cpu, int exclude_os, int exclude_usr) +{ + u32 escr = 0; + + if (!p4_ht_thread(cpu)) { + if (!exclude_os) + escr |= P4_EVNTSEL_T0_OS; + if (!exclude_usr) + escr |= P4_EVNTSEL_T0_USR; + } else { + if (!exclude_os) + escr |= P4_EVNTSEL_T1_OS; + if (!exclude_usr) + escr |= P4_EVNTSEL_T1_USR; + } + + return escr; +} + +/* + * Comments below the event represent ESCR restriction + * for this event and counter index per ESCR + * + * MSR_P4_IQ_ESCR0 and MSR_P4_IQ_ESCR1 are available only on early + * processor builds (family 0FH, models 01H-02H). These MSRs + * are not available on later versions, so that we don't use + * them completely + * + * Also note that CCCR1 do not have P4_CCCR_ENABLE bit properly + * working so that we should not use this CCCR and respective + * counter as result + */ +#define P4_TC_DELIVER_MODE P4_EVENT_PACK(0x01, 0x01) + /* + * MSR_P4_TC_ESCR0: 4, 5 + * MSR_P4_TC_ESCR1: 6, 7 + */ + +#define P4_BPU_FETCH_REQUEST P4_EVENT_PACK(0x03, 0x00) + /* + * MSR_P4_BPU_ESCR0: 0, 1 + * MSR_P4_BPU_ESCR1: 2, 3 + */ + +#define P4_ITLB_REFERENCE P4_EVENT_PACK(0x18, 0x03) + /* + * MSR_P4_ITLB_ESCR0: 0, 1 + * MSR_P4_ITLB_ESCR1: 2, 3 + */ + +#define P4_MEMORY_CANCEL P4_EVENT_PACK(0x02, 0x05) + /* + * MSR_P4_DAC_ESCR0: 8, 9 + * MSR_P4_DAC_ESCR1: 10, 11 + */ + +#define P4_MEMORY_COMPLETE P4_EVENT_PACK(0x08, 0x02) + /* + * MSR_P4_SAAT_ESCR0: 8, 9 + * MSR_P4_SAAT_ESCR1: 10, 11 + */ + +#define P4_LOAD_PORT_REPLAY P4_EVENT_PACK(0x04, 0x02) + /* + * MSR_P4_SAAT_ESCR0: 8, 9 + * MSR_P4_SAAT_ESCR1: 10, 11 + */ + +#define P4_STORE_PORT_REPLAY P4_EVENT_PACK(0x05, 0x02) + /* + * MSR_P4_SAAT_ESCR0: 8, 9 + * MSR_P4_SAAT_ESCR1: 10, 11 + */ + +#define P4_MOB_LOAD_REPLAY P4_EVENT_PACK(0x03, 0x02) + /* + * MSR_P4_MOB_ESCR0: 0, 1 + * MSR_P4_MOB_ESCR1: 2, 3 + */ + +#define P4_PAGE_WALK_TYPE P4_EVENT_PACK(0x01, 0x04) + /* + * MSR_P4_PMH_ESCR0: 0, 1 + * MSR_P4_PMH_ESCR1: 2, 3 + */ + +#define P4_BSQ_CACHE_REFERENCE P4_EVENT_PACK(0x0c, 0x07) + /* + * MSR_P4_BSU_ESCR0: 0, 1 + * MSR_P4_BSU_ESCR1: 2, 3 + */ + +#define P4_IOQ_ALLOCATION P4_EVENT_PACK(0x03, 0x06) + /* + * MSR_P4_FSB_ESCR0: 0, 1 + * MSR_P4_FSB_ESCR1: 2, 3 + */ + +#define P4_IOQ_ACTIVE_ENTRIES P4_EVENT_PACK(0x1a, 0x06) + /* + * MSR_P4_FSB_ESCR1: 2, 3 + */ + +#define P4_FSB_DATA_ACTIVITY P4_EVENT_PACK(0x17, 0x06) + /* + * MSR_P4_FSB_ESCR0: 0, 1 + * MSR_P4_FSB_ESCR1: 2, 3 + */ + +#define P4_BSQ_ALLOCATION P4_EVENT_PACK(0x05, 0x07) + /* + * MSR_P4_BSU_ESCR0: 0, 1 + */ + +#define P4_BSQ_ACTIVE_ENTRIES P4_EVENT_PACK(0x06, 0x07) + /* + * MSR_P4_BSU_ESCR1: 2, 3 + */ + +#define P4_SSE_INPUT_ASSIST P4_EVENT_PACK(0x34, 0x01) + /* + * MSR_P4_FIRM_ESCR: 8, 9 + * MSR_P4_FIRM_ESCR: 10, 11 + */ + +#define P4_PACKED_SP_UOP P4_EVENT_PACK(0x08, 0x01) + /* + * MSR_P4_FIRM_ESCR0: 8, 9 + * MSR_P4_FIRM_ESCR1: 10, 11 + */ + +#define P4_PACKED_DP_UOP P4_EVENT_PACK(0x0c, 0x01) + /* + * MSR_P4_FIRM_ESCR0: 8, 9 + * MSR_P4_FIRM_ESCR1: 10, 11 + */ + +#define P4_SCALAR_SP_UOP P4_EVENT_PACK(0x0a, 0x01) + /* + * MSR_P4_FIRM_ESCR0: 8, 9 + * MSR_P4_FIRM_ESCR1: 10, 11 + */ + +#define P4_SCALAR_DP_UOP P4_EVENT_PACK(0x0e, 0x01) + /* + * MSR_P4_FIRM_ESCR0: 8, 9 + * MSR_P4_FIRM_ESCR1: 10, 11 + */ + +#define P4_64BIT_MMX_UOP P4_EVENT_PACK(0x02, 0x01) + /* + * MSR_P4_FIRM_ESCR0: 8, 9 + * MSR_P4_FIRM_ESCR1: 10, 11 + */ + +#define P4_128BIT_MMX_UOP P4_EVENT_PACK(0x1a, 0x01) + /* + * MSR_P4_FIRM_ESCR0: 8, 9 + * MSR_P4_FIRM_ESCR1: 10, 11 + */ + +#define P4_X87_FP_UOP P4_EVENT_PACK(0x04, 0x01) + /* + * MSR_P4_FIRM_ESCR0: 8, 9 + * MSR_P4_FIRM_ESCR1: 10, 11 + */ + +#define P4_TC_MISC P4_EVENT_PACK(0x06, 0x01) + /* + * MSR_P4_TC_ESCR0: 4, 5 + * MSR_P4_TC_ESCR1: 6, 7 + */ + +#define P4_GLOBAL_POWER_EVENTS P4_EVENT_PACK(0x13, 0x06) + /* + * MSR_P4_FSB_ESCR0: 0, 1 + * MSR_P4_FSB_ESCR1: 2, 3 + */ + +#define P4_TC_MS_XFER P4_EVENT_PACK(0x05, 0x00) + /* + * MSR_P4_MS_ESCR0: 4, 5 + * MSR_P4_MS_ESCR1: 6, 7 + */ + +#define P4_UOP_QUEUE_WRITES P4_EVENT_PACK(0x09, 0x00) + /* + * MSR_P4_MS_ESCR0: 4, 5 + * MSR_P4_MS_ESCR1: 6, 7 + */ + +#define P4_RETIRED_MISPRED_BRANCH_TYPE P4_EVENT_PACK(0x05, 0x02) + /* + * MSR_P4_TBPU_ESCR0: 4, 5 + * MSR_P4_TBPU_ESCR0: 6, 7 + */ + +#define P4_RETIRED_BRANCH_TYPE P4_EVENT_PACK(0x04, 0x02) + /* + * MSR_P4_TBPU_ESCR0: 4, 5 + * MSR_P4_TBPU_ESCR0: 6, 7 + */ + +#define P4_RESOURCE_STALL P4_EVENT_PACK(0x01, 0x01) + /* + * MSR_P4_ALF_ESCR0: 12, 13, 16 + * MSR_P4_ALF_ESCR1: 14, 15, 17 + */ + +#define P4_WC_BUFFER P4_EVENT_PACK(0x05, 0x05) + /* + * MSR_P4_DAC_ESCR0: 8, 9 + * MSR_P4_DAC_ESCR1: 10, 11 + */ + +#define P4_B2B_CYCLES P4_EVENT_PACK(0x16, 0x03) + /* + * MSR_P4_FSB_ESCR0: 0, 1 + * MSR_P4_FSB_ESCR1: 2, 3 + */ + +#define P4_BNR P4_EVENT_PACK(0x08, 0x03) + /* + * MSR_P4_FSB_ESCR0: 0, 1 + * MSR_P4_FSB_ESCR1: 2, 3 + */ + +#define P4_SNOOP P4_EVENT_PACK(0x06, 0x03) + /* + * MSR_P4_FSB_ESCR0: 0, 1 + * MSR_P4_FSB_ESCR1: 2, 3 + */ + +#define P4_RESPONSE P4_EVENT_PACK(0x04, 0x03) + /* + * MSR_P4_FSB_ESCR0: 0, 1 + * MSR_P4_FSB_ESCR1: 2, 3 + */ + +#define P4_FRONT_END_EVENT P4_EVENT_PACK(0x08, 0x05) + /* + * MSR_P4_CRU_ESCR2: 12, 13, 16 + * MSR_P4_CRU_ESCR3: 14, 15, 17 + */ + +#define P4_EXECUTION_EVENT P4_EVENT_PACK(0x0c, 0x05) + /* + * MSR_P4_CRU_ESCR2: 12, 13, 16 + * MSR_P4_CRU_ESCR3: 14, 15, 17 + */ + +#define P4_REPLAY_EVENT P4_EVENT_PACK(0x09, 0x05) + /* + * MSR_P4_CRU_ESCR2: 12, 13, 16 + * MSR_P4_CRU_ESCR3: 14, 15, 17 + */ + +#define P4_INSTR_RETIRED P4_EVENT_PACK(0x02, 0x04) + /* + * MSR_P4_CRU_ESCR2: 12, 13, 16 + * MSR_P4_CRU_ESCR3: 14, 15, 17 + */ + +#define P4_UOPS_RETIRED P4_EVENT_PACK(0x01, 0x04) + /* + * MSR_P4_CRU_ESCR2: 12, 13, 16 + * MSR_P4_CRU_ESCR3: 14, 15, 17 + */ + +#define P4_UOP_TYPE P4_EVENT_PACK(0x02, 0x02) + /* + * MSR_P4_RAT_ESCR0: 12, 13, 16 + * MSR_P4_RAT_ESCR1: 14, 15, 17 + */ + +#define P4_BRANCH_RETIRED P4_EVENT_PACK(0x06, 0x05) + /* + * MSR_P4_CRU_ESCR2: 12, 13, 16 + * MSR_P4_CRU_ESCR3: 14, 15, 17 + */ + +#define P4_MISPRED_BRANCH_RETIRED P4_EVENT_PACK(0x03, 0x04) + /* + * MSR_P4_CRU_ESCR0: 12, 13, 16 + * MSR_P4_CRU_ESCR1: 14, 15, 17 + */ + +#define P4_X87_ASSIST P4_EVENT_PACK(0x03, 0x05) + /* + * MSR_P4_CRU_ESCR2: 12, 13, 16 + * MSR_P4_CRU_ESCR3: 14, 15, 17 + */ + +#define P4_MACHINE_CLEAR P4_EVENT_PACK(0x02, 0x05) + /* + * MSR_P4_CRU_ESCR2: 12, 13, 16 + * MSR_P4_CRU_ESCR3: 14, 15, 17 + */ + +#define P4_INSTR_COMPLETED P4_EVENT_PACK(0x07, 0x04) + /* + * MSR_P4_CRU_ESCR0: 12, 13, 16 + * MSR_P4_CRU_ESCR1: 14, 15, 17 + */ + +/* + * a caller should use P4_EVENT_ATTR helper to + * pick the attribute needed, for example + * + * P4_EVENT_ATTR(P4_TC_DELIVER_MODE, DD) + */ +enum P4_EVENTS_ATTR { + P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, DD, 0), + P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, DB, 1), + P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, DI, 2), + P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, BD, 3), + P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, BB, 4), + P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, BI, 5), + P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, ID, 6), + + P4_MAKE_EVENT_ATTR(P4_BPU_FETCH_REQUEST, TCMISS, 0), + + P4_MAKE_EVENT_ATTR(P4_ITLB_REFERENCE, HIT, 0), + P4_MAKE_EVENT_ATTR(P4_ITLB_REFERENCE, MISS, 1), + P4_MAKE_EVENT_ATTR(P4_ITLB_REFERENCE, HIT_UK, 2), + + P4_MAKE_EVENT_ATTR(P4_MEMORY_CANCEL, ST_RB_FULL, 2), + P4_MAKE_EVENT_ATTR(P4_MEMORY_CANCEL, 64K_CONF, 3), + + P4_MAKE_EVENT_ATTR(P4_MEMORY_COMPLETE, LSC, 0), + P4_MAKE_EVENT_ATTR(P4_MEMORY_COMPLETE, SSC, 1), + + P4_MAKE_EVENT_ATTR(P4_LOAD_PORT_REPLAY, SPLIT_LD, 1), + + P4_MAKE_EVENT_ATTR(P4_STORE_PORT_REPLAY, SPLIT_ST, 1), + + P4_MAKE_EVENT_ATTR(P4_MOB_LOAD_REPLAY, NO_STA, 1), + P4_MAKE_EVENT_ATTR(P4_MOB_LOAD_REPLAY, NO_STD, 3), + P4_MAKE_EVENT_ATTR(P4_MOB_LOAD_REPLAY, PARTIAL_DATA, 4), + P4_MAKE_EVENT_ATTR(P4_MOB_LOAD_REPLAY, UNALGN_ADDR, 5), + + P4_MAKE_EVENT_ATTR(P4_PAGE_WALK_TYPE, DTMISS, 0), + P4_MAKE_EVENT_ATTR(P4_PAGE_WALK_TYPE, ITMISS, 1), + + P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITS, 0), + P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITE, 1), + P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITM, 2), + P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITS, 3), + P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITE, 4), + P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITM, 5), + P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_MISS, 8), + P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_MISS, 9), + P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, WR_2ndL_MISS, 10), + + P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, DEFAULT, 0), + P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, ALL_READ, 5), + P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, ALL_WRITE, 6), + P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, MEM_UC, 7), + P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, MEM_WC, 8), + P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, MEM_WT, 9), + P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, MEM_WP, 10), + P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, MEM_WB, 11), + P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, OWN, 13), + P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, OTHER, 14), + P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, PREFETCH, 15), + + P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, DEFAULT, 0), + P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, ALL_READ, 5), + P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, ALL_WRITE, 6), + P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, MEM_UC, 7), + P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, MEM_WC, 8), + P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, MEM_WT, 9), + P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, MEM_WP, 10), + P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, MEM_WB, 11), + P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, OWN, 13), + P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, OTHER, 14), + P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, PREFETCH, 15), + + P4_MAKE_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_DRV, 0), + P4_MAKE_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_OWN, 1), + P4_MAKE_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_OTHER, 2), + P4_MAKE_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DBSY_DRV, 3), + P4_MAKE_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DBSY_OWN, 4), + P4_MAKE_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DBSY_OTHER, 5), + + P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_TYPE0, 0), + P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_TYPE1, 1), + P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_LEN0, 2), + P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_LEN1, 3), + P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_IO_TYPE, 5), + P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_LOCK_TYPE, 6), + P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_CACHE_TYPE, 7), + P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_SPLIT_TYPE, 8), + P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_DEM_TYPE, 9), + P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_ORD_TYPE, 10), + P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, MEM_TYPE0, 11), + P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, MEM_TYPE1, 12), + P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, MEM_TYPE2, 13), + + P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_TYPE0, 0), + P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_TYPE1, 1), + P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_LEN0, 2), + P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_LEN1, 3), + P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE, 5), + P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE, 6), + P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE, 7), + P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE, 8), + P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE, 9), + P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE, 10), + P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, MEM_TYPE0, 11), + P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, MEM_TYPE1, 12), + P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, MEM_TYPE2, 13), + + P4_MAKE_EVENT_ATTR(P4_SSE_INPUT_ASSIST, ALL, 15), + + P4_MAKE_EVENT_ATTR(P4_PACKED_SP_UOP, ALL, 15), + + P4_MAKE_EVENT_ATTR(P4_PACKED_DP_UOP, ALL, 15), + + P4_MAKE_EVENT_ATTR(P4_SCALAR_SP_UOP, ALL, 15), + + P4_MAKE_EVENT_ATTR(P4_SCALAR_DP_UOP, ALL, 15), + + P4_MAKE_EVENT_ATTR(P4_64BIT_MMX_UOP, ALL, 15), + + P4_MAKE_EVENT_ATTR(P4_128BIT_MMX_UOP, ALL, 15), + + P4_MAKE_EVENT_ATTR(P4_X87_FP_UOP, ALL, 15), + + P4_MAKE_EVENT_ATTR(P4_TC_MISC, FLUSH, 4), + + P4_MAKE_EVENT_ATTR(P4_GLOBAL_POWER_EVENTS, RUNNING, 0), + + P4_MAKE_EVENT_ATTR(P4_TC_MS_XFER, CISC, 0), + + P4_MAKE_EVENT_ATTR(P4_UOP_QUEUE_WRITES, FROM_TC_BUILD, 0), + P4_MAKE_EVENT_ATTR(P4_UOP_QUEUE_WRITES, FROM_TC_DELIVER, 1), + P4_MAKE_EVENT_ATTR(P4_UOP_QUEUE_WRITES, FROM_ROM, 2), + + P4_MAKE_EVENT_ATTR(P4_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL, 1), + P4_MAKE_EVENT_ATTR(P4_RETIRED_MISPRED_BRANCH_TYPE, CALL, 2), + P4_MAKE_EVENT_ATTR(P4_RETIRED_MISPRED_BRANCH_TYPE, RETURN, 3), + P4_MAKE_EVENT_ATTR(P4_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT, 4), + + P4_MAKE_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CONDITIONAL, 1), + P4_MAKE_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CALL, 2), + P4_MAKE_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, RETURN, 3), + P4_MAKE_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, INDIRECT, 4), + + P4_MAKE_EVENT_ATTR(P4_RESOURCE_STALL, SBFULL, 5), + + P4_MAKE_EVENT_ATTR(P4_WC_BUFFER, WCB_EVICTS, 0), + P4_MAKE_EVENT_ATTR(P4_WC_BUFFER, WCB_FULL_EVICTS, 1), + + P4_MAKE_EVENT_ATTR(P4_FRONT_END_EVENT, NBOGUS, 0), + P4_MAKE_EVENT_ATTR(P4_FRONT_END_EVENT, BOGUS, 1), + + P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, NBOGUS0, 0), + P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, NBOGUS1, 1), + P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, NBOGUS2, 2), + P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, NBOGUS3, 3), + P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, BOGUS0, 4), + P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, BOGUS1, 5), + P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, BOGUS2, 6), + P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, BOGUS3, 7), + + P4_MAKE_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS, 0), + P4_MAKE_EVENT_ATTR(P4_REPLAY_EVENT, BOGUS, 1), + + P4_MAKE_EVENT_ATTR(P4_INSTR_RETIRED, NBOGUSNTAG, 0), + P4_MAKE_EVENT_ATTR(P4_INSTR_RETIRED, NBOGUSTAG, 1), + P4_MAKE_EVENT_ATTR(P4_INSTR_RETIRED, BOGUSNTAG, 2), + P4_MAKE_EVENT_ATTR(P4_INSTR_RETIRED, BOGUSTAG, 3), + + P4_MAKE_EVENT_ATTR(P4_UOPS_RETIRED, NBOGUS, 0), + P4_MAKE_EVENT_ATTR(P4_UOPS_RETIRED, BOGUS, 1), + + P4_MAKE_EVENT_ATTR(P4_UOP_TYPE, TAGLOADS, 1), + P4_MAKE_EVENT_ATTR(P4_UOP_TYPE, TAGSTORES, 2), + + P4_MAKE_EVENT_ATTR(P4_BRANCH_RETIRED, MMNP, 0), + P4_MAKE_EVENT_ATTR(P4_BRANCH_RETIRED, MMNM, 1), + P4_MAKE_EVENT_ATTR(P4_BRANCH_RETIRED, MMTP, 2), + P4_MAKE_EVENT_ATTR(P4_BRANCH_RETIRED, MMTM, 3), + + P4_MAKE_EVENT_ATTR(P4_MISPRED_BRANCH_RETIRED, NBOGUS, 0), + + P4_MAKE_EVENT_ATTR(P4_X87_ASSIST, FPSU, 0), + P4_MAKE_EVENT_ATTR(P4_X87_ASSIST, FPSO, 1), + P4_MAKE_EVENT_ATTR(P4_X87_ASSIST, POAO, 2), + P4_MAKE_EVENT_ATTR(P4_X87_ASSIST, POAU, 3), + P4_MAKE_EVENT_ATTR(P4_X87_ASSIST, PREA, 4), + + P4_MAKE_EVENT_ATTR(P4_MACHINE_CLEAR, CLEAR, 0), + P4_MAKE_EVENT_ATTR(P4_MACHINE_CLEAR, MOCLEAR, 1), + P4_MAKE_EVENT_ATTR(P4_MACHINE_CLEAR, SMCLEAR, 2), + + P4_MAKE_EVENT_ATTR(P4_INSTR_COMPLETED, NBOGUS, 0), + P4_MAKE_EVENT_ATTR(P4_INSTR_COMPLETED, BOGUS, 1), +}; + +#endif /* PERF_EVENT_P4_H */ diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index e24f637..e6a3f5f 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -190,6 +190,8 @@ struct x86_pmu { void (*enable_all)(void); void (*enable)(struct perf_event *); void (*disable)(struct perf_event *); + int (*hw_config)(struct perf_event_attr *attr, struct hw_perf_event *hwc); + int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); unsigned eventsel; unsigned perfctr; u64 (*event_map)(int); @@ -415,6 +417,25 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) return 0; } +static int x86_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc) +{ + /* + * Generate PMC IRQs: + * (keep 'enabled' bit clear for now) + */ + hwc->config = ARCH_PERFMON_EVENTSEL_INT; + + /* + * Count user and OS events unless requested not to + */ + if (!attr->exclude_user) + hwc->config |= ARCH_PERFMON_EVENTSEL_USR; + if (!attr->exclude_kernel) + hwc->config |= ARCH_PERFMON_EVENTSEL_OS; + + return 0; +} + /* * Setup the hardware configuration for a given attr_type */ @@ -446,23 +467,13 @@ static int __hw_perf_event_init(struct perf_event *event) event->destroy = hw_perf_event_destroy; - /* - * Generate PMC IRQs: - * (keep 'enabled' bit clear for now) - */ - hwc->config = ARCH_PERFMON_EVENTSEL_INT; - hwc->idx = -1; hwc->last_cpu = -1; hwc->last_tag = ~0ULL; - /* - * Count user and OS events unless requested not to. - */ - if (!attr->exclude_user) - hwc->config |= ARCH_PERFMON_EVENTSEL_USR; - if (!attr->exclude_kernel) - hwc->config |= ARCH_PERFMON_EVENTSEL_OS; + /* Processor specifics */ + if (x86_pmu.hw_config(attr, hwc)) + return -EOPNOTSUPP; if (!hwc->sample_period) { hwc->sample_period = x86_pmu.max_period; @@ -517,7 +528,7 @@ static int __hw_perf_event_init(struct perf_event *event) return -EOPNOTSUPP; /* BTS is currently only allowed for user-mode. */ - if (hwc->config & ARCH_PERFMON_EVENTSEL_OS) + if (!attr->exclude_kernel) return -EOPNOTSUPP; } @@ -931,7 +942,7 @@ static int x86_pmu_enable(struct perf_event *event) if (n < 0) return n; - ret = x86_schedule_events(cpuc, n, assign); + ret = x86_pmu.schedule_events(cpuc, n, assign); if (ret) return ret; /* @@ -1263,7 +1274,7 @@ int hw_perf_group_sched_in(struct perf_event *leader, if (n0 < 0) return n0; - ret = x86_schedule_events(cpuc, n0, assign); + ret = x86_pmu.schedule_events(cpuc, n0, assign); if (ret) return ret; @@ -1313,6 +1324,7 @@ undo: #include "perf_event_amd.c" #include "perf_event_p6.c" +#include "perf_event_p4.c" #include "perf_event_intel_lbr.c" #include "perf_event_intel_ds.c" #include "perf_event_intel.c" @@ -1515,7 +1527,7 @@ static int validate_group(struct perf_event *event) fake_cpuc->n_events = n; - ret = x86_schedule_events(fake_cpuc, n, NULL); + ret = x86_pmu.schedule_events(fake_cpuc, n, NULL); out_free: kfree(fake_cpuc); diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 573458f..358a8e3 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -363,6 +363,8 @@ static __initconst struct x86_pmu amd_pmu = { .enable_all = x86_pmu_enable_all, .enable = x86_pmu_enable_event, .disable = x86_pmu_disable_event, + .hw_config = x86_hw_config, + .schedule_events = x86_schedule_events, .eventsel = MSR_K7_EVNTSEL0, .perfctr = MSR_K7_PERFCTR0, .event_map = amd_pmu_event_map, diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 971dc6e..044b843 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -749,6 +749,8 @@ static __initconst struct x86_pmu core_pmu = { .enable_all = x86_pmu_enable_all, .enable = x86_pmu_enable_event, .disable = x86_pmu_disable_event, + .hw_config = x86_hw_config, + .schedule_events = x86_schedule_events, .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, .perfctr = MSR_ARCH_PERFMON_PERFCTR0, .event_map = intel_pmu_event_map, @@ -786,6 +788,8 @@ static __initconst struct x86_pmu intel_pmu = { .enable_all = intel_pmu_enable_all, .enable = intel_pmu_enable_event, .disable = intel_pmu_disable_event, + .hw_config = x86_hw_config, + .schedule_events = x86_schedule_events, .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, .perfctr = MSR_ARCH_PERFMON_PERFCTR0, .event_map = intel_pmu_event_map, @@ -839,12 +843,13 @@ static __init int intel_pmu_init(void) int version; if (!cpu_has(&boot_cpu_data, X86_FEATURE_ARCH_PERFMON)) { - /* check for P6 processor family */ - if (boot_cpu_data.x86 == 6) { - return p6_pmu_init(); - } else { + switch (boot_cpu_data.x86) { + case 0x6: + return p6_pmu_init(); + case 0xf: + return p4_pmu_init(); + } return -ENODEV; - } } /* diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c new file mode 100644 index 0000000..381f593 --- /dev/null +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -0,0 +1,607 @@ +/* + * Netburst Perfomance Events (P4, old Xeon) + * + * Copyright (C) 2010 Parallels, Inc., Cyrill Gorcunov + * Copyright (C) 2010 Intel Corporation, Lin Ming + * + * For licencing details see kernel-base/COPYING + */ + +#ifdef CONFIG_CPU_SUP_INTEL + +#include + +/* + * array indices: 0,1 - HT threads, used with HT enabled cpu + */ +struct p4_event_template { + u32 opcode; /* ESCR event + CCCR selector */ + u64 config; /* packed predefined bits */ + int dep; /* upstream dependency event index */ + unsigned int emask; /* ESCR EventMask */ + unsigned int escr_msr[2]; /* ESCR MSR for this event */ + unsigned int cntr[2]; /* counter index (offset) */ +}; + +struct p4_pmu_res { + /* maps hw_conf::idx into template for ESCR sake */ + struct p4_event_template *tpl[ARCH_P4_MAX_CCCR]; +}; + +static DEFINE_PER_CPU(struct p4_pmu_res, p4_pmu_config); + +/* + * WARN: CCCR1 doesn't have a working enable bit so try to not + * use it if possible + * + * Also as only we start to support raw events we will need to + * append _all_ P4_EVENT_PACK'ed events here + */ +struct p4_event_template p4_templates[] = { + [0] = { + .opcode = P4_UOP_TYPE, + .config = 0, + .dep = -1, + .emask = + P4_EVENT_ATTR(P4_UOP_TYPE, TAGLOADS) | + P4_EVENT_ATTR(P4_UOP_TYPE, TAGSTORES), + .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 }, + .cntr = { 16, 17 }, + }, + [1] = { + .opcode = P4_GLOBAL_POWER_EVENTS, + .config = 0, + .dep = -1, + .emask = + P4_EVENT_ATTR(P4_GLOBAL_POWER_EVENTS, RUNNING), + .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .cntr = { 0, 2 }, + }, + [2] = { + .opcode = P4_INSTR_RETIRED, + .config = 0, + .dep = 0, /* needs front-end tagging */ + .emask = + P4_EVENT_ATTR(P4_INSTR_RETIRED, NBOGUSNTAG) | + P4_EVENT_ATTR(P4_INSTR_RETIRED, NBOGUSTAG) | + P4_EVENT_ATTR(P4_INSTR_RETIRED, BOGUSNTAG) | + P4_EVENT_ATTR(P4_INSTR_RETIRED, BOGUSTAG), + .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, + .cntr = { 12, 14 }, + }, + [3] = { + .opcode = P4_BSQ_CACHE_REFERENCE, + .config = 0, + .dep = -1, + .emask = + P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) | + P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) | + P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) | + P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) | + P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) | + P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITM), + .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, + .cntr = { 0, 2 }, + }, + [4] = { + .opcode = P4_BSQ_CACHE_REFERENCE, + .config = 0, + .dep = -1, + .emask = + P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) | + P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) | + P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, WR_2ndL_MISS), + .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, + .cntr = { 0, 3 }, + }, + [5] = { + .opcode = P4_RETIRED_BRANCH_TYPE, + .config = 0, + .dep = -1, + .emask = + P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CONDITIONAL) | + P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CALL) | + P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, RETURN) | + P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, INDIRECT), + .escr_msr = { MSR_P4_TBPU_ESCR0, MSR_P4_TBPU_ESCR1 }, + .cntr = { 4, 6 }, + }, + [6] = { + .opcode = P4_MISPRED_BRANCH_RETIRED, + .config = 0, + .dep = -1, + .emask = + P4_EVENT_ATTR(P4_MISPRED_BRANCH_RETIRED, NBOGUS), + .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, + .cntr = { 12, 14 }, + }, + [7] = { + .opcode = P4_FSB_DATA_ACTIVITY, + .config = p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE), + .dep = -1, + .emask = + P4_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_DRV) | + P4_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_OWN), + .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .cntr = { 0, 2 }, + }, +}; + +static struct p4_event_template *p4_event_map[PERF_COUNT_HW_MAX] = { + /* non-halted CPU clocks */ + [PERF_COUNT_HW_CPU_CYCLES] = &p4_templates[1], + + /* retired instructions: dep on tagging the FSB */ + [PERF_COUNT_HW_INSTRUCTIONS] = &p4_templates[2], + + /* cache hits */ + [PERF_COUNT_HW_CACHE_REFERENCES] = &p4_templates[3], + + /* cache misses */ + [PERF_COUNT_HW_CACHE_MISSES] = &p4_templates[4], + + /* branch instructions retired */ + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = &p4_templates[5], + + /* mispredicted branches retired */ + [PERF_COUNT_HW_BRANCH_MISSES] = &p4_templates[6], + + /* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN): */ + [PERF_COUNT_HW_BUS_CYCLES] = &p4_templates[7], +}; + +static u64 p4_pmu_event_map(int hw_event) +{ + struct p4_event_template *tpl; + u64 config; + + if (hw_event > ARRAY_SIZE(p4_event_map)) { + printk_once(KERN_ERR "PMU: Incorrect event index\n"); + return 0; + } + tpl = p4_event_map[hw_event]; + + /* + * fill config up according to + * a predefined event template + */ + config = tpl->config; + config |= p4_config_pack_escr(P4_EVENT_UNPACK_EVENT(tpl->opcode) << P4_EVNTSEL_EVENT_SHIFT); + config |= p4_config_pack_escr(tpl->emask << P4_EVNTSEL_EVENTMASK_SHIFT); + config |= p4_config_pack_cccr(P4_EVENT_UNPACK_SELECTOR(tpl->opcode) << P4_CCCR_ESCR_SELECT_SHIFT); + + /* on HT machine we need a special bit */ + if (p4_ht_active() && p4_ht_thread(raw_smp_processor_id())) + config = p4_set_ht_bit(config); + + return config; +} + +/* + * Note that we still have 5 events (from global events SDM list) + * intersected in opcode+emask bits so we will need another + * scheme there do distinguish templates. + */ +static inline int p4_pmu_emask_match(unsigned int dst, unsigned int src) +{ + return dst & src; +} + +static struct p4_event_template *p4_pmu_template_lookup(u64 config) +{ + u32 opcode = p4_config_unpack_opcode(config); + unsigned int emask = p4_config_unpack_emask(config); + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(p4_templates); i++) { + if (opcode == p4_templates[i].opcode && + p4_pmu_emask_match(emask, p4_templates[i].emask)) + return &p4_templates[i]; + } + + return NULL; +} + +/* + * We don't control raw events so it's up to the caller + * to pass sane values (and we don't count the thread number + * on HT machine but allow HT-compatible specifics to be + * passed on) + */ +static u64 p4_pmu_raw_event(u64 hw_event) +{ + return hw_event & + (p4_config_pack_escr(P4_EVNTSEL_MASK_HT) | + p4_config_pack_cccr(P4_CCCR_MASK_HT)); +} + +static int p4_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc) +{ + int cpu = raw_smp_processor_id(); + + /* + * the reason we use cpu that early is that: if we get scheduled + * first time on the same cpu -- we will not need swap thread + * specific flags in config (and will save some cpu cycles) + */ + + /* CCCR by default */ + hwc->config = p4_config_pack_cccr(p4_default_cccr_conf(cpu)); + + /* Count user and OS events unless not requested to */ + hwc->config |= p4_config_pack_escr(p4_default_escr_conf(cpu, attr->exclude_kernel, + attr->exclude_user)); + return 0; +} + +static inline void p4_pmu_clear_cccr_ovf(struct hw_perf_event *hwc) +{ + unsigned long dummy; + + rdmsrl(hwc->config_base + hwc->idx, dummy); + if (dummy & P4_CCCR_OVF) { + (void)checking_wrmsrl(hwc->config_base + hwc->idx, + ((u64)dummy) & ~P4_CCCR_OVF); + } +} + +static inline void p4_pmu_disable_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + + /* + * If event gets disabled while counter is in overflowed + * state we need to clear P4_CCCR_OVF, otherwise interrupt get + * asserted again and again + */ + (void)checking_wrmsrl(hwc->config_base + hwc->idx, + (u64)(p4_config_unpack_cccr(hwc->config)) & + ~P4_CCCR_ENABLE & ~P4_CCCR_OVF); +} + +static void p4_pmu_disable_all(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + int idx; + + for (idx = 0; idx < x86_pmu.num_events; idx++) { + struct perf_event *event = cpuc->events[idx]; + if (!test_bit(idx, cpuc->active_mask)) + continue; + p4_pmu_disable_event(event); + } +} + +static void p4_pmu_enable_event(struct perf_event *event) +{ + struct hw_perf_event *hwc = &event->hw; + int thread = p4_ht_config_thread(hwc->config); + u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config)); + u64 escr_base; + struct p4_event_template *tpl; + struct p4_pmu_res *c; + + /* + * some preparation work from per-cpu private fields + * since we need to find out which ESCR to use + */ + c = &__get_cpu_var(p4_pmu_config); + tpl = c->tpl[hwc->idx]; + if (!tpl) { + pr_crit("%s: Wrong index: %d\n", __func__, hwc->idx); + return; + } + escr_base = (u64)tpl->escr_msr[thread]; + + /* + * - we dont support cascaded counters yet + * - and counter 1 is broken (erratum) + */ + WARN_ON_ONCE(p4_is_event_cascaded(hwc->config)); + WARN_ON_ONCE(hwc->idx == 1); + + (void)checking_wrmsrl(escr_base, escr_conf); + (void)checking_wrmsrl(hwc->config_base + hwc->idx, + (u64)(p4_config_unpack_cccr(hwc->config)) | P4_CCCR_ENABLE); +} + +static void p4_pmu_enable_all(void) +{ + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + int idx; + + for (idx = 0; idx < x86_pmu.num_events; idx++) { + struct perf_event *event = cpuc->events[idx]; + if (!test_bit(idx, cpuc->active_mask)) + continue; + p4_pmu_enable_event(event); + } +} + +static int p4_pmu_handle_irq(struct pt_regs *regs) +{ + struct perf_sample_data data; + struct cpu_hw_events *cpuc; + struct perf_event *event; + struct hw_perf_event *hwc; + int idx, handled = 0; + u64 val; + + data.addr = 0; + data.raw = NULL; + + cpuc = &__get_cpu_var(cpu_hw_events); + + for (idx = 0; idx < x86_pmu.num_events; idx++) { + + if (!test_bit(idx, cpuc->active_mask)) + continue; + + event = cpuc->events[idx]; + hwc = &event->hw; + + WARN_ON_ONCE(hwc->idx != idx); + + /* + * FIXME: Redundant call, actually not needed + * but just to check if we're screwed + */ + p4_pmu_clear_cccr_ovf(hwc); + + val = x86_perf_event_update(event); + if (val & (1ULL << (x86_pmu.event_bits - 1))) + continue; + + /* + * event overflow + */ + handled = 1; + data.period = event->hw.last_period; + + if (!x86_perf_event_set_period(event)) + continue; + if (perf_event_overflow(event, 1, &data, regs)) + p4_pmu_disable_event(event); + } + + if (handled) { + /* p4 quirk: unmask it again */ + apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); + inc_irq_stat(apic_perf_irqs); + } + + return handled; +} + +/* + * swap thread specific fields according to a thread + * we are going to run on + */ +static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu) +{ + u32 escr, cccr; + + /* + * we either lucky and continue on same cpu or no HT support + */ + if (!p4_should_swap_ts(hwc->config, cpu)) + return; + + /* + * the event is migrated from an another logical + * cpu, so we need to swap thread specific flags + */ + + escr = p4_config_unpack_escr(hwc->config); + cccr = p4_config_unpack_cccr(hwc->config); + + if (p4_ht_thread(cpu)) { + cccr &= ~P4_CCCR_OVF_PMI_T0; + cccr |= P4_CCCR_OVF_PMI_T1; + if (escr & P4_EVNTSEL_T0_OS) { + escr &= ~P4_EVNTSEL_T0_OS; + escr |= P4_EVNTSEL_T1_OS; + } + if (escr & P4_EVNTSEL_T0_USR) { + escr &= ~P4_EVNTSEL_T0_USR; + escr |= P4_EVNTSEL_T1_USR; + } + hwc->config = p4_config_pack_escr(escr); + hwc->config |= p4_config_pack_cccr(cccr); + hwc->config |= P4_CONFIG_HT; + } else { + cccr &= ~P4_CCCR_OVF_PMI_T1; + cccr |= P4_CCCR_OVF_PMI_T0; + if (escr & P4_EVNTSEL_T1_OS) { + escr &= ~P4_EVNTSEL_T1_OS; + escr |= P4_EVNTSEL_T0_OS; + } + if (escr & P4_EVNTSEL_T1_USR) { + escr &= ~P4_EVNTSEL_T1_USR; + escr |= P4_EVNTSEL_T0_USR; + } + hwc->config = p4_config_pack_escr(escr); + hwc->config |= p4_config_pack_cccr(cccr); + hwc->config &= ~P4_CONFIG_HT; + } +} + +/* ESCRs are not sequential in memory so we need a map */ +static unsigned int p4_escr_map[ARCH_P4_TOTAL_ESCR] = { + MSR_P4_ALF_ESCR0, /* 0 */ + MSR_P4_ALF_ESCR1, /* 1 */ + MSR_P4_BPU_ESCR0, /* 2 */ + MSR_P4_BPU_ESCR1, /* 3 */ + MSR_P4_BSU_ESCR0, /* 4 */ + MSR_P4_BSU_ESCR1, /* 5 */ + MSR_P4_CRU_ESCR0, /* 6 */ + MSR_P4_CRU_ESCR1, /* 7 */ + MSR_P4_CRU_ESCR2, /* 8 */ + MSR_P4_CRU_ESCR3, /* 9 */ + MSR_P4_CRU_ESCR4, /* 10 */ + MSR_P4_CRU_ESCR5, /* 11 */ + MSR_P4_DAC_ESCR0, /* 12 */ + MSR_P4_DAC_ESCR1, /* 13 */ + MSR_P4_FIRM_ESCR0, /* 14 */ + MSR_P4_FIRM_ESCR1, /* 15 */ + MSR_P4_FLAME_ESCR0, /* 16 */ + MSR_P4_FLAME_ESCR1, /* 17 */ + MSR_P4_FSB_ESCR0, /* 18 */ + MSR_P4_FSB_ESCR1, /* 19 */ + MSR_P4_IQ_ESCR0, /* 20 */ + MSR_P4_IQ_ESCR1, /* 21 */ + MSR_P4_IS_ESCR0, /* 22 */ + MSR_P4_IS_ESCR1, /* 23 */ + MSR_P4_ITLB_ESCR0, /* 24 */ + MSR_P4_ITLB_ESCR1, /* 25 */ + MSR_P4_IX_ESCR0, /* 26 */ + MSR_P4_IX_ESCR1, /* 27 */ + MSR_P4_MOB_ESCR0, /* 28 */ + MSR_P4_MOB_ESCR1, /* 29 */ + MSR_P4_MS_ESCR0, /* 30 */ + MSR_P4_MS_ESCR1, /* 31 */ + MSR_P4_PMH_ESCR0, /* 32 */ + MSR_P4_PMH_ESCR1, /* 33 */ + MSR_P4_RAT_ESCR0, /* 34 */ + MSR_P4_RAT_ESCR1, /* 35 */ + MSR_P4_SAAT_ESCR0, /* 36 */ + MSR_P4_SAAT_ESCR1, /* 37 */ + MSR_P4_SSU_ESCR0, /* 38 */ + MSR_P4_SSU_ESCR1, /* 39 */ + MSR_P4_TBPU_ESCR0, /* 40 */ + MSR_P4_TBPU_ESCR1, /* 41 */ + MSR_P4_TC_ESCR0, /* 42 */ + MSR_P4_TC_ESCR1, /* 43 */ + MSR_P4_U2L_ESCR0, /* 44 */ + MSR_P4_U2L_ESCR1, /* 45 */ +}; + +static int p4_get_escr_idx(unsigned int addr) +{ + unsigned int i; + + for (i = 0; i < ARRAY_SIZE(p4_escr_map); i++) { + if (addr == p4_escr_map[i]) + return i; + } + + return -1; +} + +static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) +{ + unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; + unsigned long escr_mask[BITS_TO_LONGS(ARCH_P4_TOTAL_ESCR)]; + + struct hw_perf_event *hwc; + struct p4_event_template *tpl; + struct p4_pmu_res *c; + int cpu = raw_smp_processor_id(); + int escr_idx, thread, i, num; + + bitmap_zero(used_mask, X86_PMC_IDX_MAX); + bitmap_zero(escr_mask, ARCH_P4_TOTAL_ESCR); + + c = &__get_cpu_var(p4_pmu_config); + /* + * Firstly find out which resource events are going + * to use, if ESCR+CCCR tuple is already borrowed + * then get out of here + */ + for (i = 0, num = n; i < n; i++, num--) { + hwc = &cpuc->event_list[i]->hw; + tpl = p4_pmu_template_lookup(hwc->config); + if (!tpl) + goto done; + thread = p4_ht_thread(cpu); + escr_idx = p4_get_escr_idx(tpl->escr_msr[thread]); + if (escr_idx == -1) + goto done; + + /* already allocated and remains on the same cpu */ + if (hwc->idx != -1 && !p4_should_swap_ts(hwc->config, cpu)) { + if (assign) + assign[i] = hwc->idx; + /* upstream dependent event */ + if (unlikely(tpl->dep != -1)) + printk_once(KERN_WARNING "PMU: Dep events are " + "not implemented yet\n"); + goto reserve; + } + + /* it may be already borrowed */ + if (test_bit(tpl->cntr[thread], used_mask) || + test_bit(escr_idx, escr_mask)) + goto done; + + /* + * ESCR+CCCR+COUNTERs are available to use lets swap + * thread specific bits, push assigned bits + * back and save template into per-cpu + * area (which will allow us to find out the ESCR + * to be used at moment of "enable event via real MSR") + */ + p4_pmu_swap_config_ts(hwc, cpu); + if (assign) { + assign[i] = tpl->cntr[thread]; + c->tpl[assign[i]] = tpl; + } +reserve: + set_bit(tpl->cntr[thread], used_mask); + set_bit(escr_idx, escr_mask); + } + +done: + return num ? -ENOSPC : 0; +} + +static __initconst struct x86_pmu p4_pmu = { + .name = "Netburst P4/Xeon", + .handle_irq = p4_pmu_handle_irq, + .disable_all = p4_pmu_disable_all, + .enable_all = p4_pmu_enable_all, + .enable = p4_pmu_enable_event, + .disable = p4_pmu_disable_event, + .eventsel = MSR_P4_BPU_CCCR0, + .perfctr = MSR_P4_BPU_PERFCTR0, + .event_map = p4_pmu_event_map, + .raw_event = p4_pmu_raw_event, + .max_events = ARRAY_SIZE(p4_event_map), + .get_event_constraints = x86_get_event_constraints, + /* + * IF HT disabled we may need to use all + * ARCH_P4_MAX_CCCR counters simulaneously + * though leave it restricted at moment assuming + * HT is on + */ + .num_events = ARCH_P4_MAX_CCCR, + .apic = 1, + .event_bits = 40, + .event_mask = (1ULL << 40) - 1, + .max_period = (1ULL << 39) - 1, + .hw_config = p4_hw_config, + .schedule_events = p4_pmu_schedule_events, +}; + +static __init int p4_pmu_init(void) +{ + unsigned int low, high; + + /* If we get stripped -- indexig fails */ + BUILD_BUG_ON(ARCH_P4_MAX_CCCR > X86_PMC_MAX_GENERIC); + + rdmsr(MSR_IA32_MISC_ENABLE, low, high); + if (!(low & (1 << 7))) { + pr_cont("unsupported Netburst CPU model %d ", + boot_cpu_data.x86_model); + return -ENODEV; + } + + pr_cont("Netburst events, "); + + x86_pmu = p4_pmu; + + return 0; +} + +#endif /* CONFIG_CPU_SUP_INTEL */ diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index a330485..6ff4d01d 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c @@ -109,6 +109,8 @@ static __initconst struct x86_pmu p6_pmu = { .enable_all = p6_pmu_enable_all, .enable = p6_pmu_enable_event, .disable = p6_pmu_disable_event, + .hw_config = x86_hw_config, + .schedule_events = x86_schedule_events, .eventsel = MSR_P6_EVNTSEL0, .perfctr = MSR_P6_PERFCTR0, .event_map = p6_pmu_event_map, -- cgit v1.1 From 0b861225a5890f22445f08ca9cc7a87cff276ff7 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 12 Mar 2010 00:50:16 +0300 Subject: x86, perf: Fix NULL deref on not assigned x86_pmu In case of not assigned x86_pmu and software events NULL dereference may being hit via x86_pmu::schedule_events method. Fix it by checking if x86_pmu is initialized at all. Signed-off-by: Cyrill Gorcunov Cc: Lin Ming Cc: Arnaldo Carvalho de Melo Cc: Stephane Eranian Cc: Robert Richter Cc: Frederic Weisbecker Cc: Peter Zijlstra LKML-Reference: <20100311215016.GG25162@lenovo> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 3 +++ 1 file changed, 3 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index e6a3f5f..5586a02 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1269,6 +1269,9 @@ int hw_perf_group_sched_in(struct perf_event *leader, int assign[X86_PMC_IDX_MAX]; int n0, n1, ret; + if (!x86_pmu_initialized()) + return 0; + /* n0 = total number of events */ n0 = collect_events(cpuc, leader, true); if (n0 < 0) -- cgit v1.1 From 8576e1971663ffdb6139041de97cdd2e1d4791cc Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Sat, 13 Mar 2010 11:11:16 +0300 Subject: x86, perf: Unmask LVTPC only if we have APIC supported Ingo reported: | | There's a build failure on -tip with the P4 driver, on UP 32-bit, if | PERF_EVENTS is enabled but UP_APIC is disabled: | | arch/x86/built-in.o: In function `p4_pmu_handle_irq': | perf_event.c:(.text+0xa756): undefined reference to `apic' | perf_event.c:(.text+0xa76e): undefined reference to `apic' | So we have to unmask LVTPC only if we're configured to have one. Reported-by: Ingo Molnar Signed-off-by: Cyrill Gorcunov CC: Lin Ming CC: Peter Zijlstra LKML-Reference: <20100313081116.GA5179@lenovo> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_p4.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index 381f593..ef861da1 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -365,8 +365,10 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) } if (handled) { +#ifdef CONFIG_X86_LOCAL_APIC /* p4 quirk: unmask it again */ apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); +#endif inc_irq_stat(apic_perf_irqs); } -- cgit v1.1 From e4495262826d1eabca3529fa6ac22394eb348132 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Mon, 15 Mar 2010 12:58:22 +0800 Subject: perf, x86: Enable not tagged retired instruction counting on P4s This should turn on instruction counting on P4s, which was missing in the first version of the new PMU driver. It's inaccurate for now, we still need dependant event to tag mops before we can count them precisely. The result is that the number of instruction may be lifted up. Signed-off-by: Cyrill Gorcunov Signed-off-by: Lin Ming Cc: Peter Zijlstra LKML-Reference: <1268629102.3355.11.camel@minggr.sh.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event_p4.h | 8 ++++---- arch/x86/kernel/cpu/perf_event_p4.c | 8 +++----- 2 files changed, 7 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index 829f471..b47b9e9 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h @@ -324,8 +324,8 @@ static inline u32 p4_default_escr_conf(int cpu, int exclude_os, int exclude_usr) #define P4_SSE_INPUT_ASSIST P4_EVENT_PACK(0x34, 0x01) /* - * MSR_P4_FIRM_ESCR: 8, 9 - * MSR_P4_FIRM_ESCR: 10, 11 + * MSR_P4_FIRM_ESCR0: 8, 9 + * MSR_P4_FIRM_ESCR1: 10, 11 */ #define P4_PACKED_SP_UOP P4_EVENT_PACK(0x08, 0x01) @@ -462,8 +462,8 @@ static inline u32 p4_default_escr_conf(int cpu, int exclude_os, int exclude_usr) #define P4_INSTR_RETIRED P4_EVENT_PACK(0x02, 0x04) /* - * MSR_P4_CRU_ESCR2: 12, 13, 16 - * MSR_P4_CRU_ESCR3: 14, 15, 17 + * MSR_P4_CRU_ESCR0: 12, 13, 16 + * MSR_P4_CRU_ESCR1: 14, 15, 17 */ #define P4_UOPS_RETIRED P4_EVENT_PACK(0x01, 0x04) diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index ef861da1..a11ce73 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -60,13 +60,11 @@ struct p4_event_template p4_templates[] = { [2] = { .opcode = P4_INSTR_RETIRED, .config = 0, - .dep = 0, /* needs front-end tagging */ + .dep = -1, /* needs front-end tagging */ .emask = P4_EVENT_ATTR(P4_INSTR_RETIRED, NBOGUSNTAG) | - P4_EVENT_ATTR(P4_INSTR_RETIRED, NBOGUSTAG) | - P4_EVENT_ATTR(P4_INSTR_RETIRED, BOGUSNTAG) | - P4_EVENT_ATTR(P4_INSTR_RETIRED, BOGUSTAG), - .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, + P4_EVENT_ATTR(P4_INSTR_RETIRED, BOGUSNTAG), + .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, .cntr = { 12, 14 }, }, [3] = { -- cgit v1.1 From 1d199b1ad606ae8b88acebd295b101c4e1cf2a57 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Tue, 16 Mar 2010 01:05:02 +0100 Subject: perf: Fix unexported generic perf_arch_fetch_caller_regs perf_arch_fetch_caller_regs() is exported for the overriden x86 version, but not for the generic weak version. As a general rule, weak functions should not have their symbol exported in the same file they are defined. So let's export it on trace_event_perf.c as it is used by trace events only. This fixes: ERROR: ".perf_arch_fetch_caller_regs" [fs/xfs/xfs.ko] undefined! ERROR: ".perf_arch_fetch_caller_regs" [arch/powerpc/platforms/cell/spufs/spufs.ko] undefined! -v2: And also only build it if trace events are enabled. -v3: Fix changelog mistake Reported-by: Stephen Rothwell Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Xiao Guangrong Cc: Paul Mackerras LKML-Reference: <1268697902-9518-1-git-send-regression-fweisbec@gmail.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 978d297..0d3466c 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1695,6 +1695,7 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) return entry; } +#ifdef CONFIG_EVENT_TRACING void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int skip) { regs->ip = ip; @@ -1706,4 +1707,4 @@ void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int ski regs->cs = __KERNEL_CS; local_save_flags(regs->flags); } -EXPORT_SYMBOL_GPL(perf_arch_fetch_caller_regs); +#endif -- cgit v1.1 From 8ea7f544100844307072cae2f5fc108afdef999a Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Tue, 16 Mar 2010 10:12:36 +0800 Subject: x86, perf: Fix comments in Pentium-4 PMU definitions Reported-by: Cyrill Gorcunov Signed-off-by: Lin Ming Cc: Peter Zijlstra Cc: Mike Galbraith Cc: Paul Mackerras Cc: Arnaldo Carvalho de Melo Cc: Frederic Weisbecker LKML-Reference: <1268705556.3379.8.camel@minggr.sh.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event_p4.h | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index b47b9e9..b842b32 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h @@ -319,6 +319,7 @@ static inline u32 p4_default_escr_conf(int cpu, int exclude_os, int exclude_usr) #define P4_BSQ_ACTIVE_ENTRIES P4_EVENT_PACK(0x06, 0x07) /* + * NOTE: no ESCR name in docs, it's guessed * MSR_P4_BSU_ESCR1: 2, 3 */ @@ -468,8 +469,8 @@ static inline u32 p4_default_escr_conf(int cpu, int exclude_os, int exclude_usr) #define P4_UOPS_RETIRED P4_EVENT_PACK(0x01, 0x04) /* - * MSR_P4_CRU_ESCR2: 12, 13, 16 - * MSR_P4_CRU_ESCR3: 14, 15, 17 + * MSR_P4_CRU_ESCR0: 12, 13, 16 + * MSR_P4_CRU_ESCR1: 14, 15, 17 */ #define P4_UOP_TYPE P4_EVENT_PACK(0x02, 0x02) -- cgit v1.1 From 984763cb90d4b5444baa0c3e43feff7926bf1834 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 16 Mar 2010 17:07:33 +0100 Subject: perf, x86: Report error code that returned from x86_pmu.hw_config() If x86_pmu.hw_config() fails a fixed error code (-EOPNOTSUPP) is returned even if a different error was reported. This patch fixes this. Signed-off-by: Robert Richter Acked-by: Cyrill Gorcunov Acked-by: Lin Ming Cc: acme@redhat.com Cc: eranian@google.com Cc: gorcunov@openvz.org Cc: peterz@infradead.org Cc: fweisbec@gmail.com LKML-Reference: <20100316160733.GR1585@erda.amd.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 5 +++-- 1 file changed, 3 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 0d3466c..5dacf63 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -472,8 +472,9 @@ static int __hw_perf_event_init(struct perf_event *event) hwc->last_tag = ~0ULL; /* Processor specifics */ - if (x86_pmu.hw_config(attr, hwc)) - return -EOPNOTSUPP; + err = x86_pmu.hw_config(attr, hwc); + if (err) + return err; if (!hwc->sample_period) { hwc->sample_period = x86_pmu.max_period; -- cgit v1.1 From b27ea29c6267889be255f2217fa7a6106e6a8b04 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 17 Mar 2010 12:49:10 +0100 Subject: perf/core, x86: Reduce number of CONFIG_X86_LOCAL_APIC macros The function reserve_pmc_hardware() and release_pmc_hardware() were hard to read. This patch improves readability of the code by removing most of the CONFIG_X86_LOCAL_APIC macros. Signed-off-by: Robert Richter Cc: Stephane Eranian Cc: Peter Zijlstra LKML-Reference: <1268826553-19518-2-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 15 +++++++++------ 1 file changed, 9 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 5dacf63..793e63f 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -313,9 +313,10 @@ again: static atomic_t active_events; static DEFINE_MUTEX(pmc_reserve_mutex); +#ifdef CONFIG_X86_LOCAL_APIC + static bool reserve_pmc_hardware(void) { -#ifdef CONFIG_X86_LOCAL_APIC int i; if (nmi_watchdog == NMI_LOCAL_APIC) @@ -330,11 +331,9 @@ static bool reserve_pmc_hardware(void) if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) goto eventsel_fail; } -#endif return true; -#ifdef CONFIG_X86_LOCAL_APIC eventsel_fail: for (i--; i >= 0; i--) release_evntsel_nmi(x86_pmu.eventsel + i); @@ -349,12 +348,10 @@ perfctr_fail: enable_lapic_nmi_watchdog(); return false; -#endif } static void release_pmc_hardware(void) { -#ifdef CONFIG_X86_LOCAL_APIC int i; for (i = 0; i < x86_pmu.num_events; i++) { @@ -364,9 +361,15 @@ static void release_pmc_hardware(void) if (nmi_watchdog == NMI_LOCAL_APIC) enable_lapic_nmi_watchdog(); -#endif } +#else + +static bool reserve_pmc_hardware(void) { return true; } +static void release_pmc_hardware(void) {} + +#endif + static int reserve_ds_buffers(void); static void release_ds_buffers(void); -- cgit v1.1 From 10f1014d86fd4fe5087080d609b51183396c5e4c Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 17 Mar 2010 12:49:12 +0100 Subject: perf/core, x86: Remove cpu_hw_events.interrupts This member in the struct is not used anymore and can be removed. Signed-off-by: Robert Richter Cc: Stephane Eranian Cc: Peter Zijlstra LKML-Reference: <1268826553-19518-4-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 1 - 1 file changed, 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 793e63f..104292a 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -102,7 +102,6 @@ struct cpu_hw_events { */ struct perf_event *events[X86_PMC_IDX_MAX]; /* in counter order */ unsigned long active_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; - unsigned long interrupts; int enabled; int n_events; -- cgit v1.1 From d6dc0b4ead6e8720096ecfa3d9e899b47ddbc8ed Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Wed, 17 Mar 2010 12:49:13 +0100 Subject: perf/core, x86: Remove duplicate perf_event_mask variable The same information is stored also in x86_pmu.intel_ctrl. This patch removes perf_event_mask and instead uses x86_pmu.intel_ctrl directly. Signed-off-by: Robert Richter Cc: Stephane Eranian Cc: Peter Zijlstra LKML-Reference: <1268826553-19518-5-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 9 +++------ 1 file changed, 3 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 104292a..c97d5b5 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -75,8 +75,6 @@ copy_from_user_nmi(void *to, const void __user *from, unsigned long n) return len; } -static u64 perf_event_mask __read_mostly; - struct event_constraint { union { unsigned long idxmsk[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; @@ -1406,7 +1404,7 @@ void __init init_hw_perf_events(void) x86_pmu.num_events, X86_PMC_MAX_GENERIC); x86_pmu.num_events = X86_PMC_MAX_GENERIC; } - perf_event_mask = (1 << x86_pmu.num_events) - 1; + x86_pmu.intel_ctrl = (1 << x86_pmu.num_events) - 1; perf_max_events = x86_pmu.num_events; if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) { @@ -1415,9 +1413,8 @@ void __init init_hw_perf_events(void) x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED; } - perf_event_mask |= + x86_pmu.intel_ctrl |= ((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED; - x86_pmu.intel_ctrl = perf_event_mask; perf_events_lapic_init(); register_die_notifier(&perf_event_nmi_notifier); @@ -1442,7 +1439,7 @@ void __init init_hw_perf_events(void) pr_info("... value mask: %016Lx\n", x86_pmu.event_mask); pr_info("... max period: %016Lx\n", x86_pmu.max_period); pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed); - pr_info("... event mask: %016Lx\n", perf_event_mask); + pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); perf_cpu_notifier(x86_pmu_notifier); } -- cgit v1.1 From d674cd1963129b70bc5f631c51fb30fb73213fb2 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 17 Mar 2010 13:37:00 +0300 Subject: x86, apic: Allow to use certain functions without APIC built-in support In case even if the kernel is configured so that no APIC support is built-in we still may allow to use certain apic functions as dummy calls. In particular we start using it in perf-events code. Note that this is not that same as NOOP apic driver (which is used if APIC support is present but no physical APIC is available), this is for the case when we don't have apic code compiled in at all. Signed-off-by: Cyrill Gorcunov Cc: H. Peter Anvin Cc: Yinghai Lu Cc: Yinghai Lu LKML-Reference: <20100317104356.011052632@openvz.org> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/apic.h | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/apic.h b/arch/x86/include/asm/apic.h index b4ac2cd..1fa03e0 100644 --- a/arch/x86/include/asm/apic.h +++ b/arch/x86/include/asm/apic.h @@ -373,6 +373,7 @@ extern atomic_t init_deasserted; extern int wakeup_secondary_cpu_via_nmi(int apicid, unsigned long start_eip); #endif +#ifdef CONFIG_X86_LOCAL_APIC static inline u32 apic_read(u32 reg) { return apic->read(reg); @@ -403,10 +404,19 @@ static inline u32 safe_apic_wait_icr_idle(void) return apic->safe_wait_icr_idle(); } +#else /* CONFIG_X86_LOCAL_APIC */ + +static inline u32 apic_read(u32 reg) { return 0; } +static inline void apic_write(u32 reg, u32 val) { } +static inline u64 apic_icr_read(void) { return 0; } +static inline void apic_icr_write(u32 low, u32 high) { } +static inline void apic_wait_icr_idle(void) { } +static inline u32 safe_apic_wait_icr_idle(void) { return 0; } + +#endif /* CONFIG_X86_LOCAL_APIC */ static inline void ack_APIC_irq(void) { -#ifdef CONFIG_X86_LOCAL_APIC /* * ack_APIC_irq() actually gets compiled as a single instruction * ... yummie. @@ -414,7 +424,6 @@ static inline void ack_APIC_irq(void) /* Docs say use 0 for future compatibility */ apic_write(APIC_EOI, 0); -#endif } static inline unsigned default_get_apic_id(unsigned long x) -- cgit v1.1 From 7335f75e9ca166044e38a96abad422d8e6e364b5 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 17 Mar 2010 13:37:01 +0300 Subject: x86, perf: Use apic_write unconditionally Since apic_write() maps to a plain noop in the !CONFIG_X86_LOCAL_APIC case we're safe to remove this conditional compilation and clean up the code a bit. Signed-off-by: Cyrill Gorcunov Cc: fweisbec@gmail.com Cc: acme@redhat.com Cc: eranian@google.com Cc: peterz@infradead.org LKML-Reference: <20100317104356.232371479@openvz.org> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 4 ---- arch/x86/kernel/cpu/perf_event_p4.c | 2 -- 2 files changed, 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index c97d5b5..14eca80 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1136,7 +1136,6 @@ void set_perf_event_pending(void) void perf_events_lapic_init(void) { -#ifdef CONFIG_X86_LOCAL_APIC if (!x86_pmu.apic || !x86_pmu_initialized()) return; @@ -1144,7 +1143,6 @@ void perf_events_lapic_init(void) * Always use NMI for PMU */ apic_write(APIC_LVTPC, APIC_DM_NMI); -#endif } static int __kprobes @@ -1168,9 +1166,7 @@ perf_event_nmi_handler(struct notifier_block *self, regs = args->regs; -#ifdef CONFIG_X86_LOCAL_APIC apic_write(APIC_LVTPC, APIC_DM_NMI); -#endif /* * Can't rely on the handled return value to say it was our NMI, two * events could trigger 'simultaneously' raising two back-to-back NMIs. diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index a11ce73..0367889 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -363,10 +363,8 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) } if (handled) { -#ifdef CONFIG_X86_LOCAL_APIC /* p4 quirk: unmask it again */ apic_write(APIC_LVTPC, apic_read(APIC_LVTPC) & ~APIC_LVT_MASKED); -#endif inc_irq_stat(apic_perf_irqs); } -- cgit v1.1 From f34edbc1cdb0f8f83d94e1d668dd6e41abf0defb Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Thu, 18 Mar 2010 18:33:07 +0800 Subject: perf, x86: Add a key to simplify template lookup in Pentium-4 PMU Currently, we use opcode(Event and Event-Selector) + emask to look up template in p4_templates. But cache events (L1-dcache-load-misses, LLC-load-misses, etc) use the same event(P4_REPLAY_EVENT) to do the counting, ie, they have the same opcode and emask. So we can not use current lookup mechanism to find the template for cache events. This patch introduces a "key", which is the index into p4_templates. The low 12 bits of CCCR are reserved, so we can hide the "key" in the low 12 bits of hwc->config. We extract the key from hwc->config and then quickly find the template. Signed-off-by: Lin Ming Reviewed-by: Cyrill Gorcunov Cc: Peter Zijlstra LKML-Reference: <1268908387.13901.127.camel@minggr.sh.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event_p4.h | 5 ++- arch/x86/kernel/cpu/perf_event_p4.c | 86 ++++++++++++++---------------------- 2 files changed, 38 insertions(+), 53 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index b842b32..7d3406a 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h @@ -65,6 +65,7 @@ #define P4_CCCR_THREAD_SINGLE 0x00010000U #define P4_CCCR_THREAD_BOTH 0x00020000U #define P4_CCCR_THREAD_ANY 0x00030000U +#define P4_CCCR_RESERVED 0x00000fffU /* Non HT mask */ #define P4_CCCR_MASK \ @@ -116,7 +117,7 @@ #define p4_config_pack_escr(v) (((u64)(v)) << 32) #define p4_config_pack_cccr(v) (((u64)(v)) & 0xffffffffULL) #define p4_config_unpack_escr(v) (((u64)(v)) >> 32) -#define p4_config_unpack_cccr(v) (((u64)(v)) & 0xffffffffULL) +#define p4_config_unpack_cccr(v) (((u64)(v)) & 0xfffff000ULL) #define p4_config_unpack_emask(v) \ ({ \ @@ -126,6 +127,8 @@ t; \ }) +#define p4_config_unpack_key(v) (((u64)(v)) & P4_CCCR_RESERVED) + #define P4_CONFIG_HT_SHIFT 63 #define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index 0367889..3e97ed3 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -18,6 +18,7 @@ struct p4_event_template { u32 opcode; /* ESCR event + CCCR selector */ u64 config; /* packed predefined bits */ int dep; /* upstream dependency event index */ + int key; /* index into p4_templates */ unsigned int emask; /* ESCR EventMask */ unsigned int escr_msr[2]; /* ESCR MSR for this event */ unsigned int cntr[2]; /* counter index (offset) */ @@ -39,38 +40,31 @@ static DEFINE_PER_CPU(struct p4_pmu_res, p4_pmu_config); */ struct p4_event_template p4_templates[] = { [0] = { - .opcode = P4_UOP_TYPE, - .config = 0, - .dep = -1, - .emask = - P4_EVENT_ATTR(P4_UOP_TYPE, TAGLOADS) | - P4_EVENT_ATTR(P4_UOP_TYPE, TAGSTORES), - .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 }, - .cntr = { 16, 17 }, - }, - [1] = { .opcode = P4_GLOBAL_POWER_EVENTS, .config = 0, .dep = -1, + .key = 0, .emask = P4_EVENT_ATTR(P4_GLOBAL_POWER_EVENTS, RUNNING), .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, .cntr = { 0, 2 }, }, - [2] = { + [1] = { .opcode = P4_INSTR_RETIRED, .config = 0, .dep = -1, /* needs front-end tagging */ + .key = 1, .emask = P4_EVENT_ATTR(P4_INSTR_RETIRED, NBOGUSNTAG) | P4_EVENT_ATTR(P4_INSTR_RETIRED, BOGUSNTAG), .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, .cntr = { 12, 14 }, }, - [3] = { + [2] = { .opcode = P4_BSQ_CACHE_REFERENCE, .config = 0, .dep = -1, + .key = 2, .emask = P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) | @@ -81,10 +75,11 @@ struct p4_event_template p4_templates[] = { .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, .cntr = { 0, 2 }, }, - [4] = { + [3] = { .opcode = P4_BSQ_CACHE_REFERENCE, .config = 0, .dep = -1, + .key = 3, .emask = P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) | P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) | @@ -92,10 +87,11 @@ struct p4_event_template p4_templates[] = { .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, .cntr = { 0, 3 }, }, - [5] = { + [4] = { .opcode = P4_RETIRED_BRANCH_TYPE, .config = 0, .dep = -1, + .key = 4, .emask = P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CONDITIONAL) | P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CALL) | @@ -104,48 +100,38 @@ struct p4_event_template p4_templates[] = { .escr_msr = { MSR_P4_TBPU_ESCR0, MSR_P4_TBPU_ESCR1 }, .cntr = { 4, 6 }, }, - [6] = { + [5] = { .opcode = P4_MISPRED_BRANCH_RETIRED, .config = 0, .dep = -1, + .key = 5, .emask = P4_EVENT_ATTR(P4_MISPRED_BRANCH_RETIRED, NBOGUS), .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, .cntr = { 12, 14 }, }, - [7] = { + [6] = { .opcode = P4_FSB_DATA_ACTIVITY, .config = p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE), .dep = -1, + .key = 6, .emask = P4_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_DRV) | P4_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_OWN), .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, .cntr = { 0, 2 }, }, -}; - -static struct p4_event_template *p4_event_map[PERF_COUNT_HW_MAX] = { - /* non-halted CPU clocks */ - [PERF_COUNT_HW_CPU_CYCLES] = &p4_templates[1], - - /* retired instructions: dep on tagging the FSB */ - [PERF_COUNT_HW_INSTRUCTIONS] = &p4_templates[2], - - /* cache hits */ - [PERF_COUNT_HW_CACHE_REFERENCES] = &p4_templates[3], - - /* cache misses */ - [PERF_COUNT_HW_CACHE_MISSES] = &p4_templates[4], - - /* branch instructions retired */ - [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = &p4_templates[5], - - /* mispredicted branches retired */ - [PERF_COUNT_HW_BRANCH_MISSES] = &p4_templates[6], - - /* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN): */ - [PERF_COUNT_HW_BUS_CYCLES] = &p4_templates[7], + [7] = { + .opcode = P4_UOP_TYPE, + .config = 0, + .dep = -1, + .key = 7, + .emask = + P4_EVENT_ATTR(P4_UOP_TYPE, TAGLOADS) | + P4_EVENT_ATTR(P4_UOP_TYPE, TAGSTORES), + .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 }, + .cntr = { 16, 17 }, + }, }; static u64 p4_pmu_event_map(int hw_event) @@ -153,11 +139,11 @@ static u64 p4_pmu_event_map(int hw_event) struct p4_event_template *tpl; u64 config; - if (hw_event > ARRAY_SIZE(p4_event_map)) { + if (hw_event > ARRAY_SIZE(p4_templates)) { printk_once(KERN_ERR "PMU: Incorrect event index\n"); return 0; } - tpl = p4_event_map[hw_event]; + tpl = &p4_templates[hw_event]; /* * fill config up according to @@ -167,6 +153,7 @@ static u64 p4_pmu_event_map(int hw_event) config |= p4_config_pack_escr(P4_EVENT_UNPACK_EVENT(tpl->opcode) << P4_EVNTSEL_EVENT_SHIFT); config |= p4_config_pack_escr(tpl->emask << P4_EVNTSEL_EVENTMASK_SHIFT); config |= p4_config_pack_cccr(P4_EVENT_UNPACK_SELECTOR(tpl->opcode) << P4_CCCR_ESCR_SELECT_SHIFT); + config |= p4_config_pack_cccr(hw_event & P4_CCCR_RESERVED); /* on HT machine we need a special bit */ if (p4_ht_active() && p4_ht_thread(raw_smp_processor_id())) @@ -187,17 +174,12 @@ static inline int p4_pmu_emask_match(unsigned int dst, unsigned int src) static struct p4_event_template *p4_pmu_template_lookup(u64 config) { - u32 opcode = p4_config_unpack_opcode(config); - unsigned int emask = p4_config_unpack_emask(config); - unsigned int i; - - for (i = 0; i < ARRAY_SIZE(p4_templates); i++) { - if (opcode == p4_templates[i].opcode && - p4_pmu_emask_match(emask, p4_templates[i].emask)) - return &p4_templates[i]; - } + int key = p4_config_unpack_key(config); - return NULL; + if (key < ARRAY_SIZE(p4_templates)) + return &p4_templates[key]; + else + return NULL; } /* @@ -564,7 +546,7 @@ static __initconst struct x86_pmu p4_pmu = { .perfctr = MSR_P4_BPU_PERFCTR0, .event_map = p4_pmu_event_map, .raw_event = p4_pmu_raw_event, - .max_events = ARRAY_SIZE(p4_event_map), + .max_events = ARRAY_SIZE(p4_templates), .get_event_constraints = x86_get_event_constraints, /* * IF HT disabled we may need to use all -- cgit v1.1 From cb7d6b5053e86598735d9af19930f5929f007b7f Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Thu, 18 Mar 2010 18:33:12 +0800 Subject: perf, x86: Add cache events for the Pentium-4 PMU Move the HT bit setting code from p4_pmu_event_map to p4_hw_config. So the cache events can get HT bit set correctly. Tested on my P4 desktop, below 6 cache events work: L1-dcache-load-misses LLC-load-misses dTLB-load-misses dTLB-store-misses iTLB-loads iTLB-load-misses Signed-off-by: Lin Ming Reviewed-by: Cyrill Gorcunov Cc: Peter Zijlstra LKML-Reference: <1268908392.13901.128.camel@minggr.sh.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/msr-index.h | 2 + arch/x86/include/asm/perf_event_p4.h | 10 +++ arch/x86/kernel/cpu/perf_event_p4.c | 153 +++++++++++++++++++++++++++++++++-- 3 files changed, 159 insertions(+), 6 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index 1cd58cd..aef562c 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -357,6 +357,8 @@ #define MSR_P4_U2L_ESCR0 0x000003b0 #define MSR_P4_U2L_ESCR1 0x000003b1 +#define MSR_P4_PEBS_MATRIX_VERT 0x000003f2 + /* Intel Core-based CPU performance counters */ #define MSR_CORE_PERF_FIXED_CTR0 0x00000309 #define MSR_CORE_PERF_FIXED_CTR1 0x0000030a diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index 7d3406a..871249c 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h @@ -708,4 +708,14 @@ enum P4_EVENTS_ATTR { P4_MAKE_EVENT_ATTR(P4_INSTR_COMPLETED, BOGUS, 1), }; +enum { + KEY_P4_L1D_OP_READ_RESULT_MISS, + KEY_P4_LL_OP_READ_RESULT_MISS, + KEY_P4_DTLB_OP_READ_RESULT_MISS, + KEY_P4_DTLB_OP_WRITE_RESULT_MISS, + KEY_P4_ITLB_OP_READ_RESULT_ACCESS, + KEY_P4_ITLB_OP_READ_RESULT_MISS, + KEY_P4_UOP_TYPE, +}; + #endif /* PERF_EVENT_P4_H */ diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index 3e97ed3..b7bf991 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -19,6 +19,11 @@ struct p4_event_template { u64 config; /* packed predefined bits */ int dep; /* upstream dependency event index */ int key; /* index into p4_templates */ + u64 msr; /* + * the high 32 bits set into MSR_IA32_PEBS_ENABLE and + * the low 32 bits set into MSR_P4_PEBS_MATRIX_VERT + * for cache events + */ unsigned int emask; /* ESCR EventMask */ unsigned int escr_msr[2]; /* ESCR MSR for this event */ unsigned int cntr[2]; /* counter index (offset) */ @@ -31,6 +36,67 @@ struct p4_pmu_res { static DEFINE_PER_CPU(struct p4_pmu_res, p4_pmu_config); +#define P4_CACHE_EVENT_CONFIG(event, bit) \ + p4_config_pack_escr(P4_EVENT_UNPACK_EVENT(event) << P4_EVNTSEL_EVENT_SHIFT) | \ + p4_config_pack_escr((event##_##bit) << P4_EVNTSEL_EVENTMASK_SHIFT) | \ + p4_config_pack_cccr(P4_EVENT_UNPACK_SELECTOR(event) << P4_CCCR_ESCR_SELECT_SHIFT) + +static __initconst u64 p4_hw_cache_event_ids + [PERF_COUNT_HW_CACHE_MAX] + [PERF_COUNT_HW_CACHE_OP_MAX] + [PERF_COUNT_HW_CACHE_RESULT_MAX] = +{ + [ C(L1D ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + /* 1stL_cache_load_miss_retired */ + [ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_REPLAY_EVENT, NBOGUS) + | KEY_P4_L1D_OP_READ_RESULT_MISS, + }, + }, + [ C(LL ) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + /* 2ndL_cache_load_miss_retired */ + [ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_REPLAY_EVENT, NBOGUS) + | KEY_P4_LL_OP_READ_RESULT_MISS, + }, + }, + [ C(DTLB) ] = { + [ C(OP_READ) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + /* DTLB_load_miss_retired */ + [ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_REPLAY_EVENT, NBOGUS) + | KEY_P4_DTLB_OP_READ_RESULT_MISS, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = 0x0, + /* DTLB_store_miss_retired */ + [ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_REPLAY_EVENT, NBOGUS) + | KEY_P4_DTLB_OP_WRITE_RESULT_MISS, + }, + }, + [ C(ITLB) ] = { + [ C(OP_READ) ] = { + /* ITLB_reference.HIT */ + [ C(RESULT_ACCESS) ] = P4_CACHE_EVENT_CONFIG(P4_ITLB_REFERENCE, HIT) + | KEY_P4_ITLB_OP_READ_RESULT_ACCESS, + + /* ITLB_reference.MISS */ + [ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_ITLB_REFERENCE, MISS) + | KEY_P4_ITLB_OP_READ_RESULT_MISS, + }, + [ C(OP_WRITE) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + [ C(OP_PREFETCH) ] = { + [ C(RESULT_ACCESS) ] = -1, + [ C(RESULT_MISS) ] = -1, + }, + }, +}; + /* * WARN: CCCR1 doesn't have a working enable bit so try to not * use it if possible @@ -121,11 +187,77 @@ struct p4_event_template p4_templates[] = { .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, .cntr = { 0, 2 }, }, - [7] = { + [KEY_P4_L1D_OP_READ_RESULT_MISS] = { + .opcode = P4_REPLAY_EVENT, + .config = 0, + .dep = -1, + .msr = (u64)(1 << 0 | 1 << 24) << 32 | (1 << 0), + .key = KEY_P4_L1D_OP_READ_RESULT_MISS, + .emask = + P4_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS), + .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR2 }, + .cntr = { 16, 17 }, + }, + [KEY_P4_LL_OP_READ_RESULT_MISS] = { + .opcode = P4_REPLAY_EVENT, + .config = 0, + .dep = -1, + .msr = (u64)(1 << 1 | 1 << 24) << 32 | (1 << 0), + .key = KEY_P4_LL_OP_READ_RESULT_MISS, + .emask = + P4_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS), + .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR2 }, + .cntr = { 16, 17 }, + }, + [KEY_P4_DTLB_OP_READ_RESULT_MISS] = { + .opcode = P4_REPLAY_EVENT, + .config = 0, + .dep = -1, + .msr = (u64)(1 << 2 | 1 << 24) << 32 | (1 << 0), + .key = KEY_P4_DTLB_OP_READ_RESULT_MISS, + .emask = + P4_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS), + .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR2 }, + .cntr = { 16, 17 }, + }, + [KEY_P4_DTLB_OP_WRITE_RESULT_MISS] = { + .opcode = P4_REPLAY_EVENT, + .config = 0, + .dep = -1, + .msr = (u64)(1 << 2 | 1 << 24) << 32 | (1 << 1), + .key = KEY_P4_DTLB_OP_WRITE_RESULT_MISS, + .emask = + P4_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS), + .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR2 }, + .cntr = { 16, 17 }, + }, + [KEY_P4_ITLB_OP_READ_RESULT_ACCESS] = { + .opcode = P4_ITLB_REFERENCE, + .config = 0, + .dep = -1, + .msr = 0, + .key = KEY_P4_ITLB_OP_READ_RESULT_ACCESS, + .emask = + P4_EVENT_ATTR(P4_ITLB_REFERENCE, HIT), + .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 }, + .cntr = { 0, 2 }, + }, + [KEY_P4_ITLB_OP_READ_RESULT_MISS] = { + .opcode = P4_ITLB_REFERENCE, + .config = 0, + .dep = -1, + .msr = 0, + .key = KEY_P4_ITLB_OP_READ_RESULT_MISS, + .emask = + P4_EVENT_ATTR(P4_ITLB_REFERENCE, MISS), + .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 }, + .cntr = { 0, 2 }, + }, + [KEY_P4_UOP_TYPE] = { .opcode = P4_UOP_TYPE, .config = 0, .dep = -1, - .key = 7, + .key = KEY_P4_UOP_TYPE, .emask = P4_EVENT_ATTR(P4_UOP_TYPE, TAGLOADS) | P4_EVENT_ATTR(P4_UOP_TYPE, TAGSTORES), @@ -155,10 +287,6 @@ static u64 p4_pmu_event_map(int hw_event) config |= p4_config_pack_cccr(P4_EVENT_UNPACK_SELECTOR(tpl->opcode) << P4_CCCR_ESCR_SELECT_SHIFT); config |= p4_config_pack_cccr(hw_event & P4_CCCR_RESERVED); - /* on HT machine we need a special bit */ - if (p4_ht_active() && p4_ht_thread(raw_smp_processor_id())) - config = p4_set_ht_bit(config); - return config; } @@ -211,6 +339,10 @@ static int p4_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc) /* Count user and OS events unless not requested to */ hwc->config |= p4_config_pack_escr(p4_default_escr_conf(cpu, attr->exclude_kernel, attr->exclude_user)); + /* on HT machine we need a special bit */ + if (p4_ht_active() && p4_ht_thread(cpu)) + hwc->config = p4_set_ht_bit(hwc->config); + return 0; } @@ -271,6 +403,12 @@ static void p4_pmu_enable_event(struct perf_event *event) pr_crit("%s: Wrong index: %d\n", __func__, hwc->idx); return; } + + if (tpl->msr) { + (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, tpl->msr >> 32); + (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, tpl->msr & 0xffffffff); + } + escr_base = (u64)tpl->escr_msr[thread]; /* @@ -577,6 +715,9 @@ static __init int p4_pmu_init(void) return -ENODEV; } + memcpy(hw_cache_event_ids, p4_hw_cache_event_ids, + sizeof(hw_cache_event_ids)); + pr_cont("Netburst events, "); x86_pmu = p4_pmu; -- cgit v1.1 From 4b24a88b35e15e04bd8f2c5dda65b5dc8ebca05f Mon Sep 17 00:00:00 2001 From: Stephane Eranian Date: Wed, 17 Mar 2010 23:21:01 +0200 Subject: perf_events: Fix resource leak in x86 __hw_perf_event_init() If reserve_pmc_hardware() succeeds but reserve_ds_buffers() fails, then we need to release_pmc_hardware. It won't be done by the destroy() callback because we return before setting it in case of error. Signed-off-by: Stephane Eranian Cc: Cc: peterz@infradead.org Cc: paulus@samba.org Cc: davem@davemloft.net Cc: fweisbec@gmail.com Cc: robert.richter@amd.com Cc: perfmon2-devel@lists.sf.net LKML-Reference: <4ba1568b.15185e0a.182a.7802@mx.google.com> Signed-off-by: Ingo Molnar -- arch/x86/kernel/cpu/perf_event.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) --- arch/x86/kernel/cpu/perf_event.c | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 14eca80..f571f51 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -455,8 +455,11 @@ static int __hw_perf_event_init(struct perf_event *event) if (atomic_read(&active_events) == 0) { if (!reserve_pmc_hardware()) err = -EBUSY; - else + else { err = reserve_ds_buffers(); + if (err) + release_pmc_hardware(); + } } if (!err) atomic_inc(&active_events); -- cgit v1.1 From 9c8c6bad3137112d2c7bf3d215b736ee4215fa74 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Fri, 19 Mar 2010 00:12:56 +0300 Subject: x86, perf: Fix few cosmetic dabs for P4 pmu (comments and constantify) - A few ESCR have escaped fixing at previous attempt. - p4_escr_map is read only, make it const. Nothing serious. Signed-off-by: Cyrill Gorcunov Cc: Lin Ming LKML-Reference: <20100318211256.GH5062@lenovo> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event_p4.h | 4 ++-- arch/x86/kernel/cpu/perf_event_p4.c | 2 +- 2 files changed, 3 insertions(+), 3 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index 871249c..2a1a57f 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h @@ -401,13 +401,13 @@ static inline u32 p4_default_escr_conf(int cpu, int exclude_os, int exclude_usr) #define P4_RETIRED_MISPRED_BRANCH_TYPE P4_EVENT_PACK(0x05, 0x02) /* * MSR_P4_TBPU_ESCR0: 4, 5 - * MSR_P4_TBPU_ESCR0: 6, 7 + * MSR_P4_TBPU_ESCR1: 6, 7 */ #define P4_RETIRED_BRANCH_TYPE P4_EVENT_PACK(0x04, 0x02) /* * MSR_P4_TBPU_ESCR0: 4, 5 - * MSR_P4_TBPU_ESCR0: 6, 7 + * MSR_P4_TBPU_ESCR1: 6, 7 */ #define P4_RESOURCE_STALL P4_EVENT_PACK(0x01, 0x01) diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index b7bf991..b8a811a 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -545,7 +545,7 @@ static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu) } /* ESCRs are not sequential in memory so we need a map */ -static unsigned int p4_escr_map[ARCH_P4_TOTAL_ESCR] = { +static const unsigned int p4_escr_map[ARCH_P4_TOTAL_ESCR] = { MSR_P4_ALF_ESCR0, /* 0 */ MSR_P4_ALF_ESCR1, /* 1 */ MSR_P4_BPU_ESCR0, /* 2 */ -- cgit v1.1 From 40b7e05e17eef31ff30fe08dfc2424ef653a792c Mon Sep 17 00:00:00 2001 From: Lin Ming Date: Fri, 19 Mar 2010 15:28:58 +0800 Subject: perf, x86: Fix key indexing in Pentium-4 PMU Index 0-6 in p4_templates are reserved for common hardware events. So p4_templates is arranged as below: 0 - 6: common hardware events 7 - N: cache events N+1 - ...: other raw events Reported-by: Cyrill Gorcunov Signed-off-by: Lin Ming Acked-by: Cyrill Gorcunov Cc: Peter Zijlstra LKML-Reference: <1268983738.13901.142.camel@minggr.sh.intel.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event_p4.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index 2a1a57f..facf9618 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h @@ -709,7 +709,7 @@ enum P4_EVENTS_ATTR { }; enum { - KEY_P4_L1D_OP_READ_RESULT_MISS, + KEY_P4_L1D_OP_READ_RESULT_MISS = PERF_COUNT_HW_MAX, KEY_P4_LL_OP_READ_RESULT_MISS, KEY_P4_DTLB_OP_READ_RESULT_MISS, KEY_P4_DTLB_OP_WRITE_RESULT_MISS, -- cgit v1.1 From d814f30105798b6677ecb73ed61d691ff96dada9 Mon Sep 17 00:00:00 2001 From: Cyrill Gorcunov Date: Wed, 24 Mar 2010 12:09:26 +0800 Subject: x86, perf: Add raw events support for the P4 PMU The adding of raw event support lead to complete code refactoring. I hope is became more readable then it was. The list of changes: 1) The 64bit config field is enough to hold all information we need to track event details. To achieve it we used *own* enum for events selection in ESCR register and map this key into proper value at moment of event enabling. For the same reason we use 12LSB bits in CCCR register -- to track which exactly cache trace event was requested. And we cear this bits at real 'write' moment. 2) There is no per-cpu area reserved for P4 PMU anymore. We don't need it. All is held by config. 3) Now we may use any available counter, ie we try to grab any possible counter. v2: - Lin Ming reported the lack of ESCR selector in CCCR for cache events v3: - Don't loose cache event codes at config unpacking procedure, we may need it one day so no obscure hack behind our back, better to clear reserved bits explicitly when needed (thanks Ming for pointing out) - Lin Ming fixed misplaced opcodes in cache events Signed-off-by: Cyrill Gorcunov Tested-by: Lin Ming Signed-off-by: Lin Ming Cc: Arnaldo Carvalho de Melo Cc: Stephane Eranian Cc: Robert Richter Cc: Frederic Weisbecker Cc: Cyrill Gorcunov Cc: Peter Zijlstra LKML-Reference: <1269403766.3409.6.camel@minggr.sh.intel.com> [ v4: did a few whitespace fixlets ] Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event_p4.h | 691 +++++++++++++++++--------------- arch/x86/kernel/cpu/perf_event_p4.c | 746 ++++++++++++++++++++--------------- 2 files changed, 806 insertions(+), 631 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/perf_event_p4.h b/arch/x86/include/asm/perf_event_p4.h index facf9618..b05400a 100644 --- a/arch/x86/include/asm/perf_event_p4.h +++ b/arch/x86/include/asm/perf_event_p4.h @@ -15,38 +15,40 @@ * perf-MSRs are not shared and every thread has its * own perf-MSRs set) */ -#define ARCH_P4_TOTAL_ESCR (46) -#define ARCH_P4_RESERVED_ESCR (2) /* IQ_ESCR(0,1) not always present */ -#define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR) -#define ARCH_P4_MAX_CCCR (18) -#define ARCH_P4_MAX_COUNTER (ARCH_P4_MAX_CCCR / 2) - -#define P4_EVNTSEL_EVENT_MASK 0x7e000000U -#define P4_EVNTSEL_EVENT_SHIFT 25 -#define P4_EVNTSEL_EVENTMASK_MASK 0x01fffe00U -#define P4_EVNTSEL_EVENTMASK_SHIFT 9 -#define P4_EVNTSEL_TAG_MASK 0x000001e0U -#define P4_EVNTSEL_TAG_SHIFT 5 -#define P4_EVNTSEL_TAG_ENABLE 0x00000010U -#define P4_EVNTSEL_T0_OS 0x00000008U -#define P4_EVNTSEL_T0_USR 0x00000004U -#define P4_EVNTSEL_T1_OS 0x00000002U -#define P4_EVNTSEL_T1_USR 0x00000001U +#define ARCH_P4_TOTAL_ESCR (46) +#define ARCH_P4_RESERVED_ESCR (2) /* IQ_ESCR(0,1) not always present */ +#define ARCH_P4_MAX_ESCR (ARCH_P4_TOTAL_ESCR - ARCH_P4_RESERVED_ESCR) +#define ARCH_P4_MAX_CCCR (18) +#define ARCH_P4_MAX_COUNTER (ARCH_P4_MAX_CCCR / 2) + +#define P4_ESCR_EVENT_MASK 0x7e000000U +#define P4_ESCR_EVENT_SHIFT 25 +#define P4_ESCR_EVENTMASK_MASK 0x01fffe00U +#define P4_ESCR_EVENTMASK_SHIFT 9 +#define P4_ESCR_TAG_MASK 0x000001e0U +#define P4_ESCR_TAG_SHIFT 5 +#define P4_ESCR_TAG_ENABLE 0x00000010U +#define P4_ESCR_T0_OS 0x00000008U +#define P4_ESCR_T0_USR 0x00000004U +#define P4_ESCR_T1_OS 0x00000002U +#define P4_ESCR_T1_USR 0x00000001U + +#define P4_ESCR_EVENT(v) ((v) << P4_ESCR_EVENT_SHIFT) +#define P4_ESCR_EMASK(v) ((v) << P4_ESCR_EVENTMASK_SHIFT) +#define P4_ESCR_TAG(v) ((v) << P4_ESCR_TAG_SHIFT) /* Non HT mask */ -#define P4_EVNTSEL_MASK \ - (P4_EVNTSEL_EVENT_MASK | \ - P4_EVNTSEL_EVENTMASK_MASK | \ - P4_EVNTSEL_TAG_MASK | \ - P4_EVNTSEL_TAG_ENABLE | \ - P4_EVNTSEL_T0_OS | \ - P4_EVNTSEL_T0_USR) +#define P4_ESCR_MASK \ + (P4_ESCR_EVENT_MASK | \ + P4_ESCR_EVENTMASK_MASK | \ + P4_ESCR_TAG_MASK | \ + P4_ESCR_TAG_ENABLE | \ + P4_ESCR_T0_OS | \ + P4_ESCR_T0_USR) /* HT mask */ -#define P4_EVNTSEL_MASK_HT \ - (P4_EVNTSEL_MASK | \ - P4_EVNTSEL_T1_OS | \ - P4_EVNTSEL_T1_USR) +#define P4_ESCR_MASK_HT \ + (P4_ESCR_MASK | P4_ESCR_T1_OS | P4_ESCR_T1_USR) #define P4_CCCR_OVF 0x80000000U #define P4_CCCR_CASCADE 0x40000000U @@ -56,7 +58,6 @@ #define P4_CCCR_EDGE 0x01000000U #define P4_CCCR_THRESHOLD_MASK 0x00f00000U #define P4_CCCR_THRESHOLD_SHIFT 20 -#define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT) #define P4_CCCR_COMPLEMENT 0x00080000U #define P4_CCCR_COMPARE 0x00040000U #define P4_CCCR_ESCR_SELECT_MASK 0x0000e000U @@ -67,6 +68,13 @@ #define P4_CCCR_THREAD_ANY 0x00030000U #define P4_CCCR_RESERVED 0x00000fffU +#define P4_CCCR_THRESHOLD(v) ((v) << P4_CCCR_THRESHOLD_SHIFT) +#define P4_CCCR_ESEL(v) ((v) << P4_CCCR_ESCR_SELECT_SHIFT) + +/* Custom bits in reerved CCCR area */ +#define P4_CCCR_CACHE_OPS_MASK 0x0000003fU + + /* Non HT mask */ #define P4_CCCR_MASK \ (P4_CCCR_OVF | \ @@ -81,25 +89,11 @@ P4_CCCR_ENABLE) /* HT mask */ -#define P4_CCCR_MASK_HT \ - (P4_CCCR_MASK | \ - P4_CCCR_THREAD_ANY) +#define P4_CCCR_MASK_HT (P4_CCCR_MASK | P4_CCCR_THREAD_ANY) -/* - * format is 32 bit: ee ss aa aa - * where - * ee - 8 bit event - * ss - 8 bit selector - * aa aa - 16 bits reserved for tags/attributes - */ -#define P4_EVENT_PACK(event, selector) (((event) << 24) | ((selector) << 16)) -#define P4_EVENT_UNPACK_EVENT(packed) (((packed) >> 24) & 0xff) -#define P4_EVENT_UNPACK_SELECTOR(packed) (((packed) >> 16) & 0xff) -#define P4_EVENT_PACK_ATTR(attr) ((attr)) -#define P4_EVENT_UNPACK_ATTR(packed) ((packed) & 0xffff) -#define P4_MAKE_EVENT_ATTR(class, name, bit) class##_##name = (1 << bit) -#define P4_EVENT_ATTR(class, name) class##_##name -#define P4_EVENT_ATTR_STR(class, name) __stringify(class##_##name) +#define P4_GEN_ESCR_EMASK(class, name, bit) \ + class##__##name = ((1 << bit) << P4_ESCR_EVENTMASK_SHIFT) +#define P4_ESCR_EMASK_BIT(class, name) class##__##name /* * config field is 64bit width and consists of @@ -117,35 +111,29 @@ #define p4_config_pack_escr(v) (((u64)(v)) << 32) #define p4_config_pack_cccr(v) (((u64)(v)) & 0xffffffffULL) #define p4_config_unpack_escr(v) (((u64)(v)) >> 32) -#define p4_config_unpack_cccr(v) (((u64)(v)) & 0xfffff000ULL) +#define p4_config_unpack_cccr(v) (((u64)(v)) & 0xffffffffULL) #define p4_config_unpack_emask(v) \ ({ \ u32 t = p4_config_unpack_escr((v)); \ - t &= P4_EVNTSEL_EVENTMASK_MASK; \ - t >>= P4_EVNTSEL_EVENTMASK_SHIFT; \ + t = t & P4_ESCR_EVENTMASK_MASK; \ + t = t >> P4_ESCR_EVENTMASK_SHIFT; \ + t; \ + }) + +#define p4_config_unpack_event(v) \ + ({ \ + u32 t = p4_config_unpack_escr((v)); \ + t = t & P4_ESCR_EVENT_MASK; \ + t = t >> P4_ESCR_EVENT_SHIFT; \ t; \ }) -#define p4_config_unpack_key(v) (((u64)(v)) & P4_CCCR_RESERVED) +#define p4_config_unpack_cache_event(v) (((u64)(v)) & P4_CCCR_CACHE_OPS_MASK) #define P4_CONFIG_HT_SHIFT 63 #define P4_CONFIG_HT (1ULL << P4_CONFIG_HT_SHIFT) -static inline u32 p4_config_unpack_opcode(u64 config) -{ - u32 e, s; - - /* - * we don't care about HT presence here since - * event opcode doesn't depend on it - */ - e = (p4_config_unpack_escr(config) & P4_EVNTSEL_EVENT_MASK) >> P4_EVNTSEL_EVENT_SHIFT; - s = (p4_config_unpack_cccr(config) & P4_CCCR_ESCR_SELECT_MASK) >> P4_CCCR_ESCR_SELECT_SHIFT; - - return P4_EVENT_PACK(e, s); -} - static inline bool p4_is_event_cascaded(u64 config) { u32 cccr = p4_config_unpack_cccr(config); @@ -212,19 +200,73 @@ static inline u32 p4_default_escr_conf(int cpu, int exclude_os, int exclude_usr) if (!p4_ht_thread(cpu)) { if (!exclude_os) - escr |= P4_EVNTSEL_T0_OS; + escr |= P4_ESCR_T0_OS; if (!exclude_usr) - escr |= P4_EVNTSEL_T0_USR; + escr |= P4_ESCR_T0_USR; } else { if (!exclude_os) - escr |= P4_EVNTSEL_T1_OS; + escr |= P4_ESCR_T1_OS; if (!exclude_usr) - escr |= P4_EVNTSEL_T1_USR; + escr |= P4_ESCR_T1_USR; } return escr; } +enum P4_EVENTS { + P4_EVENT_TC_DELIVER_MODE, + P4_EVENT_BPU_FETCH_REQUEST, + P4_EVENT_ITLB_REFERENCE, + P4_EVENT_MEMORY_CANCEL, + P4_EVENT_MEMORY_COMPLETE, + P4_EVENT_LOAD_PORT_REPLAY, + P4_EVENT_STORE_PORT_REPLAY, + P4_EVENT_MOB_LOAD_REPLAY, + P4_EVENT_PAGE_WALK_TYPE, + P4_EVENT_BSQ_CACHE_REFERENCE, + P4_EVENT_IOQ_ALLOCATION, + P4_EVENT_IOQ_ACTIVE_ENTRIES, + P4_EVENT_FSB_DATA_ACTIVITY, + P4_EVENT_BSQ_ALLOCATION, + P4_EVENT_BSQ_ACTIVE_ENTRIES, + P4_EVENT_SSE_INPUT_ASSIST, + P4_EVENT_PACKED_SP_UOP, + P4_EVENT_PACKED_DP_UOP, + P4_EVENT_SCALAR_SP_UOP, + P4_EVENT_SCALAR_DP_UOP, + P4_EVENT_64BIT_MMX_UOP, + P4_EVENT_128BIT_MMX_UOP, + P4_EVENT_X87_FP_UOP, + P4_EVENT_TC_MISC, + P4_EVENT_GLOBAL_POWER_EVENTS, + P4_EVENT_TC_MS_XFER, + P4_EVENT_UOP_QUEUE_WRITES, + P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, + P4_EVENT_RETIRED_BRANCH_TYPE, + P4_EVENT_RESOURCE_STALL, + P4_EVENT_WC_BUFFER, + P4_EVENT_B2B_CYCLES, + P4_EVENT_BNR, + P4_EVENT_SNOOP, + P4_EVENT_RESPONSE, + P4_EVENT_FRONT_END_EVENT, + P4_EVENT_EXECUTION_EVENT, + P4_EVENT_REPLAY_EVENT, + P4_EVENT_INSTR_RETIRED, + P4_EVENT_UOPS_RETIRED, + P4_EVENT_UOP_TYPE, + P4_EVENT_BRANCH_RETIRED, + P4_EVENT_MISPRED_BRANCH_RETIRED, + P4_EVENT_X87_ASSIST, + P4_EVENT_MACHINE_CLEAR, + P4_EVENT_INSTR_COMPLETED, +}; + +#define P4_OPCODE(event) event##_OPCODE +#define P4_OPCODE_ESEL(opcode) ((opcode & 0x00ff) >> 0) +#define P4_OPCODE_EVNT(opcode) ((opcode & 0xff00) >> 8) +#define P4_OPCODE_PACK(event, sel) (((event) << 8) | sel) + /* * Comments below the event represent ESCR restriction * for this event and counter index per ESCR @@ -238,484 +280,515 @@ static inline u32 p4_default_escr_conf(int cpu, int exclude_os, int exclude_usr) * working so that we should not use this CCCR and respective * counter as result */ -#define P4_TC_DELIVER_MODE P4_EVENT_PACK(0x01, 0x01) +enum P4_EVENT_OPCODES { + P4_OPCODE(P4_EVENT_TC_DELIVER_MODE) = P4_OPCODE_PACK(0x01, 0x01), /* * MSR_P4_TC_ESCR0: 4, 5 * MSR_P4_TC_ESCR1: 6, 7 */ -#define P4_BPU_FETCH_REQUEST P4_EVENT_PACK(0x03, 0x00) + P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST) = P4_OPCODE_PACK(0x03, 0x00), /* * MSR_P4_BPU_ESCR0: 0, 1 * MSR_P4_BPU_ESCR1: 2, 3 */ -#define P4_ITLB_REFERENCE P4_EVENT_PACK(0x18, 0x03) + P4_OPCODE(P4_EVENT_ITLB_REFERENCE) = P4_OPCODE_PACK(0x18, 0x03), /* * MSR_P4_ITLB_ESCR0: 0, 1 * MSR_P4_ITLB_ESCR1: 2, 3 */ -#define P4_MEMORY_CANCEL P4_EVENT_PACK(0x02, 0x05) + P4_OPCODE(P4_EVENT_MEMORY_CANCEL) = P4_OPCODE_PACK(0x02, 0x05), /* * MSR_P4_DAC_ESCR0: 8, 9 * MSR_P4_DAC_ESCR1: 10, 11 */ -#define P4_MEMORY_COMPLETE P4_EVENT_PACK(0x08, 0x02) + P4_OPCODE(P4_EVENT_MEMORY_COMPLETE) = P4_OPCODE_PACK(0x08, 0x02), /* * MSR_P4_SAAT_ESCR0: 8, 9 * MSR_P4_SAAT_ESCR1: 10, 11 */ -#define P4_LOAD_PORT_REPLAY P4_EVENT_PACK(0x04, 0x02) + P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY) = P4_OPCODE_PACK(0x04, 0x02), /* * MSR_P4_SAAT_ESCR0: 8, 9 * MSR_P4_SAAT_ESCR1: 10, 11 */ -#define P4_STORE_PORT_REPLAY P4_EVENT_PACK(0x05, 0x02) + P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY) = P4_OPCODE_PACK(0x05, 0x02), /* * MSR_P4_SAAT_ESCR0: 8, 9 * MSR_P4_SAAT_ESCR1: 10, 11 */ -#define P4_MOB_LOAD_REPLAY P4_EVENT_PACK(0x03, 0x02) + P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY) = P4_OPCODE_PACK(0x03, 0x02), /* * MSR_P4_MOB_ESCR0: 0, 1 * MSR_P4_MOB_ESCR1: 2, 3 */ -#define P4_PAGE_WALK_TYPE P4_EVENT_PACK(0x01, 0x04) + P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE) = P4_OPCODE_PACK(0x01, 0x04), /* * MSR_P4_PMH_ESCR0: 0, 1 * MSR_P4_PMH_ESCR1: 2, 3 */ -#define P4_BSQ_CACHE_REFERENCE P4_EVENT_PACK(0x0c, 0x07) + P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE) = P4_OPCODE_PACK(0x0c, 0x07), /* * MSR_P4_BSU_ESCR0: 0, 1 * MSR_P4_BSU_ESCR1: 2, 3 */ -#define P4_IOQ_ALLOCATION P4_EVENT_PACK(0x03, 0x06) + P4_OPCODE(P4_EVENT_IOQ_ALLOCATION) = P4_OPCODE_PACK(0x03, 0x06), /* * MSR_P4_FSB_ESCR0: 0, 1 * MSR_P4_FSB_ESCR1: 2, 3 */ -#define P4_IOQ_ACTIVE_ENTRIES P4_EVENT_PACK(0x1a, 0x06) + P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES) = P4_OPCODE_PACK(0x1a, 0x06), /* * MSR_P4_FSB_ESCR1: 2, 3 */ -#define P4_FSB_DATA_ACTIVITY P4_EVENT_PACK(0x17, 0x06) + P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY) = P4_OPCODE_PACK(0x17, 0x06), /* * MSR_P4_FSB_ESCR0: 0, 1 * MSR_P4_FSB_ESCR1: 2, 3 */ -#define P4_BSQ_ALLOCATION P4_EVENT_PACK(0x05, 0x07) + P4_OPCODE(P4_EVENT_BSQ_ALLOCATION) = P4_OPCODE_PACK(0x05, 0x07), /* * MSR_P4_BSU_ESCR0: 0, 1 */ -#define P4_BSQ_ACTIVE_ENTRIES P4_EVENT_PACK(0x06, 0x07) + P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES) = P4_OPCODE_PACK(0x06, 0x07), /* * NOTE: no ESCR name in docs, it's guessed * MSR_P4_BSU_ESCR1: 2, 3 */ -#define P4_SSE_INPUT_ASSIST P4_EVENT_PACK(0x34, 0x01) + P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST) = P4_OPCODE_PACK(0x34, 0x01), /* * MSR_P4_FIRM_ESCR0: 8, 9 * MSR_P4_FIRM_ESCR1: 10, 11 */ -#define P4_PACKED_SP_UOP P4_EVENT_PACK(0x08, 0x01) + P4_OPCODE(P4_EVENT_PACKED_SP_UOP) = P4_OPCODE_PACK(0x08, 0x01), /* * MSR_P4_FIRM_ESCR0: 8, 9 * MSR_P4_FIRM_ESCR1: 10, 11 */ -#define P4_PACKED_DP_UOP P4_EVENT_PACK(0x0c, 0x01) + P4_OPCODE(P4_EVENT_PACKED_DP_UOP) = P4_OPCODE_PACK(0x0c, 0x01), /* * MSR_P4_FIRM_ESCR0: 8, 9 * MSR_P4_FIRM_ESCR1: 10, 11 */ -#define P4_SCALAR_SP_UOP P4_EVENT_PACK(0x0a, 0x01) + P4_OPCODE(P4_EVENT_SCALAR_SP_UOP) = P4_OPCODE_PACK(0x0a, 0x01), /* * MSR_P4_FIRM_ESCR0: 8, 9 * MSR_P4_FIRM_ESCR1: 10, 11 */ -#define P4_SCALAR_DP_UOP P4_EVENT_PACK(0x0e, 0x01) + P4_OPCODE(P4_EVENT_SCALAR_DP_UOP) = P4_OPCODE_PACK(0x0e, 0x01), /* * MSR_P4_FIRM_ESCR0: 8, 9 * MSR_P4_FIRM_ESCR1: 10, 11 */ -#define P4_64BIT_MMX_UOP P4_EVENT_PACK(0x02, 0x01) + P4_OPCODE(P4_EVENT_64BIT_MMX_UOP) = P4_OPCODE_PACK(0x02, 0x01), /* * MSR_P4_FIRM_ESCR0: 8, 9 * MSR_P4_FIRM_ESCR1: 10, 11 */ -#define P4_128BIT_MMX_UOP P4_EVENT_PACK(0x1a, 0x01) + P4_OPCODE(P4_EVENT_128BIT_MMX_UOP) = P4_OPCODE_PACK(0x1a, 0x01), /* * MSR_P4_FIRM_ESCR0: 8, 9 * MSR_P4_FIRM_ESCR1: 10, 11 */ -#define P4_X87_FP_UOP P4_EVENT_PACK(0x04, 0x01) + P4_OPCODE(P4_EVENT_X87_FP_UOP) = P4_OPCODE_PACK(0x04, 0x01), /* * MSR_P4_FIRM_ESCR0: 8, 9 * MSR_P4_FIRM_ESCR1: 10, 11 */ -#define P4_TC_MISC P4_EVENT_PACK(0x06, 0x01) + P4_OPCODE(P4_EVENT_TC_MISC) = P4_OPCODE_PACK(0x06, 0x01), /* * MSR_P4_TC_ESCR0: 4, 5 * MSR_P4_TC_ESCR1: 6, 7 */ -#define P4_GLOBAL_POWER_EVENTS P4_EVENT_PACK(0x13, 0x06) + P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS) = P4_OPCODE_PACK(0x13, 0x06), /* * MSR_P4_FSB_ESCR0: 0, 1 * MSR_P4_FSB_ESCR1: 2, 3 */ -#define P4_TC_MS_XFER P4_EVENT_PACK(0x05, 0x00) + P4_OPCODE(P4_EVENT_TC_MS_XFER) = P4_OPCODE_PACK(0x05, 0x00), /* * MSR_P4_MS_ESCR0: 4, 5 * MSR_P4_MS_ESCR1: 6, 7 */ -#define P4_UOP_QUEUE_WRITES P4_EVENT_PACK(0x09, 0x00) + P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES) = P4_OPCODE_PACK(0x09, 0x00), /* * MSR_P4_MS_ESCR0: 4, 5 * MSR_P4_MS_ESCR1: 6, 7 */ -#define P4_RETIRED_MISPRED_BRANCH_TYPE P4_EVENT_PACK(0x05, 0x02) + P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE) = P4_OPCODE_PACK(0x05, 0x02), /* * MSR_P4_TBPU_ESCR0: 4, 5 * MSR_P4_TBPU_ESCR1: 6, 7 */ -#define P4_RETIRED_BRANCH_TYPE P4_EVENT_PACK(0x04, 0x02) + P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE) = P4_OPCODE_PACK(0x04, 0x02), /* * MSR_P4_TBPU_ESCR0: 4, 5 * MSR_P4_TBPU_ESCR1: 6, 7 */ -#define P4_RESOURCE_STALL P4_EVENT_PACK(0x01, 0x01) + P4_OPCODE(P4_EVENT_RESOURCE_STALL) = P4_OPCODE_PACK(0x01, 0x01), /* * MSR_P4_ALF_ESCR0: 12, 13, 16 * MSR_P4_ALF_ESCR1: 14, 15, 17 */ -#define P4_WC_BUFFER P4_EVENT_PACK(0x05, 0x05) + P4_OPCODE(P4_EVENT_WC_BUFFER) = P4_OPCODE_PACK(0x05, 0x05), /* * MSR_P4_DAC_ESCR0: 8, 9 * MSR_P4_DAC_ESCR1: 10, 11 */ -#define P4_B2B_CYCLES P4_EVENT_PACK(0x16, 0x03) + P4_OPCODE(P4_EVENT_B2B_CYCLES) = P4_OPCODE_PACK(0x16, 0x03), /* * MSR_P4_FSB_ESCR0: 0, 1 * MSR_P4_FSB_ESCR1: 2, 3 */ -#define P4_BNR P4_EVENT_PACK(0x08, 0x03) + P4_OPCODE(P4_EVENT_BNR) = P4_OPCODE_PACK(0x08, 0x03), /* * MSR_P4_FSB_ESCR0: 0, 1 * MSR_P4_FSB_ESCR1: 2, 3 */ -#define P4_SNOOP P4_EVENT_PACK(0x06, 0x03) + P4_OPCODE(P4_EVENT_SNOOP) = P4_OPCODE_PACK(0x06, 0x03), /* * MSR_P4_FSB_ESCR0: 0, 1 * MSR_P4_FSB_ESCR1: 2, 3 */ -#define P4_RESPONSE P4_EVENT_PACK(0x04, 0x03) + P4_OPCODE(P4_EVENT_RESPONSE) = P4_OPCODE_PACK(0x04, 0x03), /* * MSR_P4_FSB_ESCR0: 0, 1 * MSR_P4_FSB_ESCR1: 2, 3 */ -#define P4_FRONT_END_EVENT P4_EVENT_PACK(0x08, 0x05) + P4_OPCODE(P4_EVENT_FRONT_END_EVENT) = P4_OPCODE_PACK(0x08, 0x05), /* * MSR_P4_CRU_ESCR2: 12, 13, 16 * MSR_P4_CRU_ESCR3: 14, 15, 17 */ -#define P4_EXECUTION_EVENT P4_EVENT_PACK(0x0c, 0x05) + P4_OPCODE(P4_EVENT_EXECUTION_EVENT) = P4_OPCODE_PACK(0x0c, 0x05), /* * MSR_P4_CRU_ESCR2: 12, 13, 16 * MSR_P4_CRU_ESCR3: 14, 15, 17 */ -#define P4_REPLAY_EVENT P4_EVENT_PACK(0x09, 0x05) + P4_OPCODE(P4_EVENT_REPLAY_EVENT) = P4_OPCODE_PACK(0x09, 0x05), /* * MSR_P4_CRU_ESCR2: 12, 13, 16 * MSR_P4_CRU_ESCR3: 14, 15, 17 */ -#define P4_INSTR_RETIRED P4_EVENT_PACK(0x02, 0x04) + P4_OPCODE(P4_EVENT_INSTR_RETIRED) = P4_OPCODE_PACK(0x02, 0x04), /* * MSR_P4_CRU_ESCR0: 12, 13, 16 * MSR_P4_CRU_ESCR1: 14, 15, 17 */ -#define P4_UOPS_RETIRED P4_EVENT_PACK(0x01, 0x04) + P4_OPCODE(P4_EVENT_UOPS_RETIRED) = P4_OPCODE_PACK(0x01, 0x04), /* * MSR_P4_CRU_ESCR0: 12, 13, 16 * MSR_P4_CRU_ESCR1: 14, 15, 17 */ -#define P4_UOP_TYPE P4_EVENT_PACK(0x02, 0x02) + P4_OPCODE(P4_EVENT_UOP_TYPE) = P4_OPCODE_PACK(0x02, 0x02), /* * MSR_P4_RAT_ESCR0: 12, 13, 16 * MSR_P4_RAT_ESCR1: 14, 15, 17 */ -#define P4_BRANCH_RETIRED P4_EVENT_PACK(0x06, 0x05) + P4_OPCODE(P4_EVENT_BRANCH_RETIRED) = P4_OPCODE_PACK(0x06, 0x05), /* * MSR_P4_CRU_ESCR2: 12, 13, 16 * MSR_P4_CRU_ESCR3: 14, 15, 17 */ -#define P4_MISPRED_BRANCH_RETIRED P4_EVENT_PACK(0x03, 0x04) + P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED) = P4_OPCODE_PACK(0x03, 0x04), /* * MSR_P4_CRU_ESCR0: 12, 13, 16 * MSR_P4_CRU_ESCR1: 14, 15, 17 */ -#define P4_X87_ASSIST P4_EVENT_PACK(0x03, 0x05) + P4_OPCODE(P4_EVENT_X87_ASSIST) = P4_OPCODE_PACK(0x03, 0x05), /* * MSR_P4_CRU_ESCR2: 12, 13, 16 * MSR_P4_CRU_ESCR3: 14, 15, 17 */ -#define P4_MACHINE_CLEAR P4_EVENT_PACK(0x02, 0x05) + P4_OPCODE(P4_EVENT_MACHINE_CLEAR) = P4_OPCODE_PACK(0x02, 0x05), /* * MSR_P4_CRU_ESCR2: 12, 13, 16 * MSR_P4_CRU_ESCR3: 14, 15, 17 */ -#define P4_INSTR_COMPLETED P4_EVENT_PACK(0x07, 0x04) + P4_OPCODE(P4_EVENT_INSTR_COMPLETED) = P4_OPCODE_PACK(0x07, 0x04), /* * MSR_P4_CRU_ESCR0: 12, 13, 16 * MSR_P4_CRU_ESCR1: 14, 15, 17 */ +}; /* - * a caller should use P4_EVENT_ATTR helper to - * pick the attribute needed, for example + * a caller should use P4_ESCR_EMASK_NAME helper to + * pick the EventMask needed, for example * - * P4_EVENT_ATTR(P4_TC_DELIVER_MODE, DD) + * P4_ESCR_EMASK_NAME(P4_EVENT_TC_DELIVER_MODE, DD) */ -enum P4_EVENTS_ATTR { - P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, DD, 0), - P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, DB, 1), - P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, DI, 2), - P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, BD, 3), - P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, BB, 4), - P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, BI, 5), - P4_MAKE_EVENT_ATTR(P4_TC_DELIVER_MODE, ID, 6), - - P4_MAKE_EVENT_ATTR(P4_BPU_FETCH_REQUEST, TCMISS, 0), - - P4_MAKE_EVENT_ATTR(P4_ITLB_REFERENCE, HIT, 0), - P4_MAKE_EVENT_ATTR(P4_ITLB_REFERENCE, MISS, 1), - P4_MAKE_EVENT_ATTR(P4_ITLB_REFERENCE, HIT_UK, 2), - - P4_MAKE_EVENT_ATTR(P4_MEMORY_CANCEL, ST_RB_FULL, 2), - P4_MAKE_EVENT_ATTR(P4_MEMORY_CANCEL, 64K_CONF, 3), - - P4_MAKE_EVENT_ATTR(P4_MEMORY_COMPLETE, LSC, 0), - P4_MAKE_EVENT_ATTR(P4_MEMORY_COMPLETE, SSC, 1), - - P4_MAKE_EVENT_ATTR(P4_LOAD_PORT_REPLAY, SPLIT_LD, 1), - - P4_MAKE_EVENT_ATTR(P4_STORE_PORT_REPLAY, SPLIT_ST, 1), - - P4_MAKE_EVENT_ATTR(P4_MOB_LOAD_REPLAY, NO_STA, 1), - P4_MAKE_EVENT_ATTR(P4_MOB_LOAD_REPLAY, NO_STD, 3), - P4_MAKE_EVENT_ATTR(P4_MOB_LOAD_REPLAY, PARTIAL_DATA, 4), - P4_MAKE_EVENT_ATTR(P4_MOB_LOAD_REPLAY, UNALGN_ADDR, 5), - - P4_MAKE_EVENT_ATTR(P4_PAGE_WALK_TYPE, DTMISS, 0), - P4_MAKE_EVENT_ATTR(P4_PAGE_WALK_TYPE, ITMISS, 1), - - P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITS, 0), - P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITE, 1), - P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITM, 2), - P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITS, 3), - P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITE, 4), - P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITM, 5), - P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_MISS, 8), - P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_MISS, 9), - P4_MAKE_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, WR_2ndL_MISS, 10), - - P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, DEFAULT, 0), - P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, ALL_READ, 5), - P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, ALL_WRITE, 6), - P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, MEM_UC, 7), - P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, MEM_WC, 8), - P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, MEM_WT, 9), - P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, MEM_WP, 10), - P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, MEM_WB, 11), - P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, OWN, 13), - P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, OTHER, 14), - P4_MAKE_EVENT_ATTR(P4_IOQ_ALLOCATION, PREFETCH, 15), - - P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, DEFAULT, 0), - P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, ALL_READ, 5), - P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, ALL_WRITE, 6), - P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, MEM_UC, 7), - P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, MEM_WC, 8), - P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, MEM_WT, 9), - P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, MEM_WP, 10), - P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, MEM_WB, 11), - P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, OWN, 13), - P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, OTHER, 14), - P4_MAKE_EVENT_ATTR(P4_IOQ_ACTIVE_ENTRIES, PREFETCH, 15), - - P4_MAKE_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_DRV, 0), - P4_MAKE_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_OWN, 1), - P4_MAKE_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_OTHER, 2), - P4_MAKE_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DBSY_DRV, 3), - P4_MAKE_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DBSY_OWN, 4), - P4_MAKE_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DBSY_OTHER, 5), - - P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_TYPE0, 0), - P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_TYPE1, 1), - P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_LEN0, 2), - P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_LEN1, 3), - P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_IO_TYPE, 5), - P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_LOCK_TYPE, 6), - P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_CACHE_TYPE, 7), - P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_SPLIT_TYPE, 8), - P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_DEM_TYPE, 9), - P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, REQ_ORD_TYPE, 10), - P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, MEM_TYPE0, 11), - P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, MEM_TYPE1, 12), - P4_MAKE_EVENT_ATTR(P4_BSQ_ALLOCATION, MEM_TYPE2, 13), - - P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_TYPE0, 0), - P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_TYPE1, 1), - P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_LEN0, 2), - P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_LEN1, 3), - P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE, 5), - P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE, 6), - P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE, 7), - P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE, 8), - P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE, 9), - P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE, 10), - P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, MEM_TYPE0, 11), - P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, MEM_TYPE1, 12), - P4_MAKE_EVENT_ATTR(P4_BSQ_ACTIVE_ENTRIES, MEM_TYPE2, 13), - - P4_MAKE_EVENT_ATTR(P4_SSE_INPUT_ASSIST, ALL, 15), - - P4_MAKE_EVENT_ATTR(P4_PACKED_SP_UOP, ALL, 15), - - P4_MAKE_EVENT_ATTR(P4_PACKED_DP_UOP, ALL, 15), - - P4_MAKE_EVENT_ATTR(P4_SCALAR_SP_UOP, ALL, 15), - - P4_MAKE_EVENT_ATTR(P4_SCALAR_DP_UOP, ALL, 15), - - P4_MAKE_EVENT_ATTR(P4_64BIT_MMX_UOP, ALL, 15), - - P4_MAKE_EVENT_ATTR(P4_128BIT_MMX_UOP, ALL, 15), - - P4_MAKE_EVENT_ATTR(P4_X87_FP_UOP, ALL, 15), - - P4_MAKE_EVENT_ATTR(P4_TC_MISC, FLUSH, 4), - - P4_MAKE_EVENT_ATTR(P4_GLOBAL_POWER_EVENTS, RUNNING, 0), - - P4_MAKE_EVENT_ATTR(P4_TC_MS_XFER, CISC, 0), - - P4_MAKE_EVENT_ATTR(P4_UOP_QUEUE_WRITES, FROM_TC_BUILD, 0), - P4_MAKE_EVENT_ATTR(P4_UOP_QUEUE_WRITES, FROM_TC_DELIVER, 1), - P4_MAKE_EVENT_ATTR(P4_UOP_QUEUE_WRITES, FROM_ROM, 2), - - P4_MAKE_EVENT_ATTR(P4_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL, 1), - P4_MAKE_EVENT_ATTR(P4_RETIRED_MISPRED_BRANCH_TYPE, CALL, 2), - P4_MAKE_EVENT_ATTR(P4_RETIRED_MISPRED_BRANCH_TYPE, RETURN, 3), - P4_MAKE_EVENT_ATTR(P4_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT, 4), - - P4_MAKE_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CONDITIONAL, 1), - P4_MAKE_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CALL, 2), - P4_MAKE_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, RETURN, 3), - P4_MAKE_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, INDIRECT, 4), - - P4_MAKE_EVENT_ATTR(P4_RESOURCE_STALL, SBFULL, 5), - - P4_MAKE_EVENT_ATTR(P4_WC_BUFFER, WCB_EVICTS, 0), - P4_MAKE_EVENT_ATTR(P4_WC_BUFFER, WCB_FULL_EVICTS, 1), - - P4_MAKE_EVENT_ATTR(P4_FRONT_END_EVENT, NBOGUS, 0), - P4_MAKE_EVENT_ATTR(P4_FRONT_END_EVENT, BOGUS, 1), - - P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, NBOGUS0, 0), - P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, NBOGUS1, 1), - P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, NBOGUS2, 2), - P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, NBOGUS3, 3), - P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, BOGUS0, 4), - P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, BOGUS1, 5), - P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, BOGUS2, 6), - P4_MAKE_EVENT_ATTR(P4_EXECUTION_EVENT, BOGUS3, 7), - - P4_MAKE_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS, 0), - P4_MAKE_EVENT_ATTR(P4_REPLAY_EVENT, BOGUS, 1), - - P4_MAKE_EVENT_ATTR(P4_INSTR_RETIRED, NBOGUSNTAG, 0), - P4_MAKE_EVENT_ATTR(P4_INSTR_RETIRED, NBOGUSTAG, 1), - P4_MAKE_EVENT_ATTR(P4_INSTR_RETIRED, BOGUSNTAG, 2), - P4_MAKE_EVENT_ATTR(P4_INSTR_RETIRED, BOGUSTAG, 3), - - P4_MAKE_EVENT_ATTR(P4_UOPS_RETIRED, NBOGUS, 0), - P4_MAKE_EVENT_ATTR(P4_UOPS_RETIRED, BOGUS, 1), +enum P4_ESCR_EMASKS { + P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DD, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DB, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, DI, 2), + P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, BD, 3), + P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, BB, 4), + P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, BI, 5), + P4_GEN_ESCR_EMASK(P4_EVENT_TC_DELIVER_MODE, ID, 6), + + P4_GEN_ESCR_EMASK(P4_EVENT_BPU_FETCH_REQUEST, TCMISS, 0), + + P4_GEN_ESCR_EMASK(P4_EVENT_ITLB_REFERENCE, HIT, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_ITLB_REFERENCE, MISS, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_ITLB_REFERENCE, HIT_UK, 2), + + P4_GEN_ESCR_EMASK(P4_EVENT_MEMORY_CANCEL, ST_RB_FULL, 2), + P4_GEN_ESCR_EMASK(P4_EVENT_MEMORY_CANCEL, 64K_CONF, 3), + + P4_GEN_ESCR_EMASK(P4_EVENT_MEMORY_COMPLETE, LSC, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_MEMORY_COMPLETE, SSC, 1), + + P4_GEN_ESCR_EMASK(P4_EVENT_LOAD_PORT_REPLAY, SPLIT_LD, 1), + + P4_GEN_ESCR_EMASK(P4_EVENT_STORE_PORT_REPLAY, SPLIT_ST, 1), + + P4_GEN_ESCR_EMASK(P4_EVENT_MOB_LOAD_REPLAY, NO_STA, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_MOB_LOAD_REPLAY, NO_STD, 3), + P4_GEN_ESCR_EMASK(P4_EVENT_MOB_LOAD_REPLAY, PARTIAL_DATA, 4), + P4_GEN_ESCR_EMASK(P4_EVENT_MOB_LOAD_REPLAY, UNALGN_ADDR, 5), + + P4_GEN_ESCR_EMASK(P4_EVENT_PAGE_WALK_TYPE, DTMISS, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_PAGE_WALK_TYPE, ITMISS, 1), + + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM, 2), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS, 3), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE, 4), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM, 5), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS, 8), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS, 9), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS, 10), + + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, DEFAULT, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, ALL_READ, 5), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, ALL_WRITE, 6), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_UC, 7), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_WC, 8), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_WT, 9), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_WP, 10), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, MEM_WB, 11), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, OWN, 13), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, OTHER, 14), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ALLOCATION, PREFETCH, 15), + + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, DEFAULT, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_READ, 5), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, ALL_WRITE, 6), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_UC, 7), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WC, 8), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WT, 9), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WP, 10), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, MEM_WB, 11), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, OWN, 13), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, OTHER, 14), + P4_GEN_ESCR_EMASK(P4_EVENT_IOQ_ACTIVE_ENTRIES, PREFETCH, 15), + + P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OTHER, 2), + P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_DRV, 3), + P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OWN, 4), + P4_GEN_ESCR_EMASK(P4_EVENT_FSB_DATA_ACTIVITY, DBSY_OTHER, 5), + + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE0, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_TYPE1, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_LEN0, 2), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_LEN1, 3), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_IO_TYPE, 5), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_LOCK_TYPE, 6), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_CACHE_TYPE, 7), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_SPLIT_TYPE, 8), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_DEM_TYPE, 9), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, REQ_ORD_TYPE, 10), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE0, 11), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE1, 12), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ALLOCATION, MEM_TYPE2, 13), + + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE0, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_TYPE1, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN0, 2), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LEN1, 3), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_IO_TYPE, 5), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_LOCK_TYPE, 6), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_CACHE_TYPE, 7), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_SPLIT_TYPE, 8), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_DEM_TYPE, 9), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, REQ_ORD_TYPE, 10), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE0, 11), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE1, 12), + P4_GEN_ESCR_EMASK(P4_EVENT_BSQ_ACTIVE_ENTRIES, MEM_TYPE2, 13), + + P4_GEN_ESCR_EMASK(P4_EVENT_SSE_INPUT_ASSIST, ALL, 15), + + P4_GEN_ESCR_EMASK(P4_EVENT_PACKED_SP_UOP, ALL, 15), + + P4_GEN_ESCR_EMASK(P4_EVENT_PACKED_DP_UOP, ALL, 15), + + P4_GEN_ESCR_EMASK(P4_EVENT_SCALAR_SP_UOP, ALL, 15), + + P4_GEN_ESCR_EMASK(P4_EVENT_SCALAR_DP_UOP, ALL, 15), + + P4_GEN_ESCR_EMASK(P4_EVENT_64BIT_MMX_UOP, ALL, 15), + + P4_GEN_ESCR_EMASK(P4_EVENT_128BIT_MMX_UOP, ALL, 15), + + P4_GEN_ESCR_EMASK(P4_EVENT_X87_FP_UOP, ALL, 15), + + P4_GEN_ESCR_EMASK(P4_EVENT_TC_MISC, FLUSH, 4), + + P4_GEN_ESCR_EMASK(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING, 0), + + P4_GEN_ESCR_EMASK(P4_EVENT_TC_MS_XFER, CISC, 0), + + P4_GEN_ESCR_EMASK(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_BUILD, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_UOP_QUEUE_WRITES, FROM_TC_DELIVER, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_UOP_QUEUE_WRITES, FROM_ROM, 2), + + P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CONDITIONAL, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, CALL, 2), + P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, RETURN, 3), + P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE, INDIRECT, 4), + + P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_BRANCH_TYPE, CALL, 2), + P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN, 3), + P4_GEN_ESCR_EMASK(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT, 4), + + P4_GEN_ESCR_EMASK(P4_EVENT_RESOURCE_STALL, SBFULL, 5), + + P4_GEN_ESCR_EMASK(P4_EVENT_WC_BUFFER, WCB_EVICTS, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_WC_BUFFER, WCB_FULL_EVICTS, 1), + + P4_GEN_ESCR_EMASK(P4_EVENT_FRONT_END_EVENT, NBOGUS, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_FRONT_END_EVENT, BOGUS, 1), + + P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, NBOGUS0, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, NBOGUS1, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, NBOGUS2, 2), + P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, NBOGUS3, 3), + P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, BOGUS0, 4), + P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, BOGUS1, 5), + P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, BOGUS2, 6), + P4_GEN_ESCR_EMASK(P4_EVENT_EXECUTION_EVENT, BOGUS3, 7), + + P4_GEN_ESCR_EMASK(P4_EVENT_REPLAY_EVENT, NBOGUS, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_REPLAY_EVENT, BOGUS, 1), + + P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_RETIRED, NBOGUSTAG, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_RETIRED, BOGUSNTAG, 2), + P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_RETIRED, BOGUSTAG, 3), + + P4_GEN_ESCR_EMASK(P4_EVENT_UOPS_RETIRED, NBOGUS, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_UOPS_RETIRED, BOGUS, 1), - P4_MAKE_EVENT_ATTR(P4_UOP_TYPE, TAGLOADS, 1), - P4_MAKE_EVENT_ATTR(P4_UOP_TYPE, TAGSTORES, 2), - - P4_MAKE_EVENT_ATTR(P4_BRANCH_RETIRED, MMNP, 0), - P4_MAKE_EVENT_ATTR(P4_BRANCH_RETIRED, MMNM, 1), - P4_MAKE_EVENT_ATTR(P4_BRANCH_RETIRED, MMTP, 2), - P4_MAKE_EVENT_ATTR(P4_BRANCH_RETIRED, MMTM, 3), + P4_GEN_ESCR_EMASK(P4_EVENT_UOP_TYPE, TAGLOADS, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_UOP_TYPE, TAGSTORES, 2), + + P4_GEN_ESCR_EMASK(P4_EVENT_BRANCH_RETIRED, MMNP, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_BRANCH_RETIRED, MMNM, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_BRANCH_RETIRED, MMTP, 2), + P4_GEN_ESCR_EMASK(P4_EVENT_BRANCH_RETIRED, MMTM, 3), - P4_MAKE_EVENT_ATTR(P4_MISPRED_BRANCH_RETIRED, NBOGUS, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS, 0), - P4_MAKE_EVENT_ATTR(P4_X87_ASSIST, FPSU, 0), - P4_MAKE_EVENT_ATTR(P4_X87_ASSIST, FPSO, 1), - P4_MAKE_EVENT_ATTR(P4_X87_ASSIST, POAO, 2), - P4_MAKE_EVENT_ATTR(P4_X87_ASSIST, POAU, 3), - P4_MAKE_EVENT_ATTR(P4_X87_ASSIST, PREA, 4), + P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, FPSU, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, FPSO, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, POAO, 2), + P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, POAU, 3), + P4_GEN_ESCR_EMASK(P4_EVENT_X87_ASSIST, PREA, 4), - P4_MAKE_EVENT_ATTR(P4_MACHINE_CLEAR, CLEAR, 0), - P4_MAKE_EVENT_ATTR(P4_MACHINE_CLEAR, MOCLEAR, 1), - P4_MAKE_EVENT_ATTR(P4_MACHINE_CLEAR, SMCLEAR, 2), + P4_GEN_ESCR_EMASK(P4_EVENT_MACHINE_CLEAR, CLEAR, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_MACHINE_CLEAR, MOCLEAR, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_MACHINE_CLEAR, SMCLEAR, 2), - P4_MAKE_EVENT_ATTR(P4_INSTR_COMPLETED, NBOGUS, 0), - P4_MAKE_EVENT_ATTR(P4_INSTR_COMPLETED, BOGUS, 1), + P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_COMPLETED, NBOGUS, 0), + P4_GEN_ESCR_EMASK(P4_EVENT_INSTR_COMPLETED, BOGUS, 1), }; -enum { - KEY_P4_L1D_OP_READ_RESULT_MISS = PERF_COUNT_HW_MAX, - KEY_P4_LL_OP_READ_RESULT_MISS, - KEY_P4_DTLB_OP_READ_RESULT_MISS, - KEY_P4_DTLB_OP_WRITE_RESULT_MISS, - KEY_P4_ITLB_OP_READ_RESULT_ACCESS, - KEY_P4_ITLB_OP_READ_RESULT_MISS, - KEY_P4_UOP_TYPE, +/* P4 PEBS: stale for a while */ +#define P4_PEBS_METRIC_MASK 0x00001fffU +#define P4_PEBS_UOB_TAG 0x01000000U +#define P4_PEBS_ENABLE 0x02000000U + +/* Replay metrics for MSR_IA32_PEBS_ENABLE and MSR_P4_PEBS_MATRIX_VERT */ +#define P4_PEBS__1stl_cache_load_miss_retired 0x3000001 +#define P4_PEBS__2ndl_cache_load_miss_retired 0x3000002 +#define P4_PEBS__dtlb_load_miss_retired 0x3000004 +#define P4_PEBS__dtlb_store_miss_retired 0x3000004 +#define P4_PEBS__dtlb_all_miss_retired 0x3000004 +#define P4_PEBS__tagged_mispred_branch 0x3018000 +#define P4_PEBS__mob_load_replay_retired 0x3000200 +#define P4_PEBS__split_load_retired 0x3000400 +#define P4_PEBS__split_store_retired 0x3000400 + +#define P4_VERT__1stl_cache_load_miss_retired 0x0000001 +#define P4_VERT__2ndl_cache_load_miss_retired 0x0000001 +#define P4_VERT__dtlb_load_miss_retired 0x0000001 +#define P4_VERT__dtlb_store_miss_retired 0x0000002 +#define P4_VERT__dtlb_all_miss_retired 0x0000003 +#define P4_VERT__tagged_mispred_branch 0x0000010 +#define P4_VERT__mob_load_replay_retired 0x0000001 +#define P4_VERT__split_load_retired 0x0000001 +#define P4_VERT__split_store_retired 0x0000002 + +enum P4_CACHE_EVENTS { + P4_CACHE__NONE, + + P4_CACHE__1stl_cache_load_miss_retired, + P4_CACHE__2ndl_cache_load_miss_retired, + P4_CACHE__dtlb_load_miss_retired, + P4_CACHE__dtlb_store_miss_retired, + P4_CACHE__itlb_reference_hit, + P4_CACHE__itlb_reference_miss, + + P4_CACHE__MAX }; #endif /* PERF_EVENT_P4_H */ diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index b8a811a..f8fe069 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -11,35 +11,281 @@ #include +#define P4_CNTR_LIMIT 3 /* * array indices: 0,1 - HT threads, used with HT enabled cpu */ -struct p4_event_template { - u32 opcode; /* ESCR event + CCCR selector */ - u64 config; /* packed predefined bits */ - int dep; /* upstream dependency event index */ - int key; /* index into p4_templates */ - u64 msr; /* - * the high 32 bits set into MSR_IA32_PEBS_ENABLE and - * the low 32 bits set into MSR_P4_PEBS_MATRIX_VERT - * for cache events - */ - unsigned int emask; /* ESCR EventMask */ - unsigned int escr_msr[2]; /* ESCR MSR for this event */ - unsigned int cntr[2]; /* counter index (offset) */ +struct p4_event_bind { + unsigned int opcode; /* Event code and ESCR selector */ + unsigned int escr_msr[2]; /* ESCR MSR for this event */ + unsigned char cntr[2][P4_CNTR_LIMIT]; /* counter index (offset), -1 on abscence */ }; -struct p4_pmu_res { - /* maps hw_conf::idx into template for ESCR sake */ - struct p4_event_template *tpl[ARCH_P4_MAX_CCCR]; +struct p4_cache_event_bind { + unsigned int metric_pebs; + unsigned int metric_vert; }; -static DEFINE_PER_CPU(struct p4_pmu_res, p4_pmu_config); +#define P4_GEN_CACHE_EVENT_BIND(name) \ + [P4_CACHE__##name] = { \ + .metric_pebs = P4_PEBS__##name, \ + .metric_vert = P4_VERT__##name, \ + } + +static struct p4_cache_event_bind p4_cache_event_bind_map[] = { + P4_GEN_CACHE_EVENT_BIND(1stl_cache_load_miss_retired), + P4_GEN_CACHE_EVENT_BIND(2ndl_cache_load_miss_retired), + P4_GEN_CACHE_EVENT_BIND(dtlb_load_miss_retired), + P4_GEN_CACHE_EVENT_BIND(dtlb_store_miss_retired), +}; + +/* + * Note that we don't use CCCR1 here, there is an + * exception for P4_BSQ_ALLOCATION but we just have + * no workaround + * + * consider this binding as resources which particular + * event may borrow, it doesn't contain EventMask, + * Tags and friends -- they are left to a caller + */ +static struct p4_event_bind p4_event_bind_map[] = { + [P4_EVENT_TC_DELIVER_MODE] = { + .opcode = P4_OPCODE(P4_EVENT_TC_DELIVER_MODE), + .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, + .cntr = { {4, 5, -1}, {6, 7, -1} }, + }, + [P4_EVENT_BPU_FETCH_REQUEST] = { + .opcode = P4_OPCODE(P4_EVENT_BPU_FETCH_REQUEST), + .escr_msr = { MSR_P4_BPU_ESCR0, MSR_P4_BPU_ESCR1 }, + .cntr = { {0, -1, -1}, {2, -1, -1} }, + }, + [P4_EVENT_ITLB_REFERENCE] = { + .opcode = P4_OPCODE(P4_EVENT_ITLB_REFERENCE), + .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 }, + .cntr = { {0, -1, -1}, {2, -1, -1} }, + }, + [P4_EVENT_MEMORY_CANCEL] = { + .opcode = P4_OPCODE(P4_EVENT_MEMORY_CANCEL), + .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, + .cntr = { {8, 9, -1}, {10, 11, -1} }, + }, + [P4_EVENT_MEMORY_COMPLETE] = { + .opcode = P4_OPCODE(P4_EVENT_MEMORY_COMPLETE), + .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, + .cntr = { {8, 9, -1}, {10, 11, -1} }, + }, + [P4_EVENT_LOAD_PORT_REPLAY] = { + .opcode = P4_OPCODE(P4_EVENT_LOAD_PORT_REPLAY), + .escr_msr = { MSR_P4_SAAT_ESCR0, MSR_P4_SAAT_ESCR1 }, + .cntr = { {8, 9, -1}, {10, 11, -1} }, + }, + [P4_EVENT_STORE_PORT_REPLAY] = { + .opcode = P4_OPCODE(P4_EVENT_STORE_PORT_REPLAY), + .escr_msr = { MSR_P4_SAAT_ESCR0 , MSR_P4_SAAT_ESCR1 }, + .cntr = { {8, 9, -1}, {10, 11, -1} }, + }, + [P4_EVENT_MOB_LOAD_REPLAY] = { + .opcode = P4_OPCODE(P4_EVENT_MOB_LOAD_REPLAY), + .escr_msr = { MSR_P4_MOB_ESCR0, MSR_P4_MOB_ESCR1 }, + .cntr = { {0, -1, -1}, {2, -1, -1} }, + }, + [P4_EVENT_PAGE_WALK_TYPE] = { + .opcode = P4_OPCODE(P4_EVENT_PAGE_WALK_TYPE), + .escr_msr = { MSR_P4_PMH_ESCR0, MSR_P4_PMH_ESCR1 }, + .cntr = { {0, -1, -1}, {2, -1, -1} }, + }, + [P4_EVENT_BSQ_CACHE_REFERENCE] = { + .opcode = P4_OPCODE(P4_EVENT_BSQ_CACHE_REFERENCE), + .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, + .cntr = { {0, -1, -1}, {2, -1, -1} }, + }, + [P4_EVENT_IOQ_ALLOCATION] = { + .opcode = P4_OPCODE(P4_EVENT_IOQ_ALLOCATION), + .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .cntr = { {0, -1, -1}, {2, -1, -1} }, + }, + [P4_EVENT_IOQ_ACTIVE_ENTRIES] = { /* shared ESCR */ + .opcode = P4_OPCODE(P4_EVENT_IOQ_ACTIVE_ENTRIES), + .escr_msr = { MSR_P4_FSB_ESCR1, MSR_P4_FSB_ESCR1 }, + .cntr = { {2, -1, -1}, {3, -1, -1} }, + }, + [P4_EVENT_FSB_DATA_ACTIVITY] = { + .opcode = P4_OPCODE(P4_EVENT_FSB_DATA_ACTIVITY), + .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .cntr = { {0, -1, -1}, {2, -1, -1} }, + }, + [P4_EVENT_BSQ_ALLOCATION] = { /* shared ESCR, broken CCCR1 */ + .opcode = P4_OPCODE(P4_EVENT_BSQ_ALLOCATION), + .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR0 }, + .cntr = { {0, -1, -1}, {1, -1, -1} }, + }, + [P4_EVENT_BSQ_ACTIVE_ENTRIES] = { /* shared ESCR */ + .opcode = P4_OPCODE(P4_EVENT_BSQ_ACTIVE_ENTRIES), + .escr_msr = { MSR_P4_BSU_ESCR1 , MSR_P4_BSU_ESCR1 }, + .cntr = { {2, -1, -1}, {3, -1, -1} }, + }, + [P4_EVENT_SSE_INPUT_ASSIST] = { + .opcode = P4_OPCODE(P4_EVENT_SSE_INPUT_ASSIST), + .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .cntr = { {8, 9, -1}, {10, 11, -1} }, + }, + [P4_EVENT_PACKED_SP_UOP] = { + .opcode = P4_OPCODE(P4_EVENT_PACKED_SP_UOP), + .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .cntr = { {8, 9, -1}, {10, 11, -1} }, + }, + [P4_EVENT_PACKED_DP_UOP] = { + .opcode = P4_OPCODE(P4_EVENT_PACKED_DP_UOP), + .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .cntr = { {8, 9, -1}, {10, 11, -1} }, + }, + [P4_EVENT_SCALAR_SP_UOP] = { + .opcode = P4_OPCODE(P4_EVENT_SCALAR_SP_UOP), + .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .cntr = { {8, 9, -1}, {10, 11, -1} }, + }, + [P4_EVENT_SCALAR_DP_UOP] = { + .opcode = P4_OPCODE(P4_EVENT_SCALAR_DP_UOP), + .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .cntr = { {8, 9, -1}, {10, 11, -1} }, + }, + [P4_EVENT_64BIT_MMX_UOP] = { + .opcode = P4_OPCODE(P4_EVENT_64BIT_MMX_UOP), + .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .cntr = { {8, 9, -1}, {10, 11, -1} }, + }, + [P4_EVENT_128BIT_MMX_UOP] = { + .opcode = P4_OPCODE(P4_EVENT_128BIT_MMX_UOP), + .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .cntr = { {8, 9, -1}, {10, 11, -1} }, + }, + [P4_EVENT_X87_FP_UOP] = { + .opcode = P4_OPCODE(P4_EVENT_X87_FP_UOP), + .escr_msr = { MSR_P4_FIRM_ESCR0, MSR_P4_FIRM_ESCR1 }, + .cntr = { {8, 9, -1}, {10, 11, -1} }, + }, + [P4_EVENT_TC_MISC] = { + .opcode = P4_OPCODE(P4_EVENT_TC_MISC), + .escr_msr = { MSR_P4_TC_ESCR0, MSR_P4_TC_ESCR1 }, + .cntr = { {4, 5, -1}, {6, 7, -1} }, + }, + [P4_EVENT_GLOBAL_POWER_EVENTS] = { + .opcode = P4_OPCODE(P4_EVENT_GLOBAL_POWER_EVENTS), + .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .cntr = { {0, -1, -1}, {2, -1, -1} }, + }, + [P4_EVENT_TC_MS_XFER] = { + .opcode = P4_OPCODE(P4_EVENT_TC_MS_XFER), + .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, + .cntr = { {4, 5, -1}, {6, 7, -1} }, + }, + [P4_EVENT_UOP_QUEUE_WRITES] = { + .opcode = P4_OPCODE(P4_EVENT_UOP_QUEUE_WRITES), + .escr_msr = { MSR_P4_MS_ESCR0, MSR_P4_MS_ESCR1 }, + .cntr = { {4, 5, -1}, {6, 7, -1} }, + }, + [P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE] = { + .opcode = P4_OPCODE(P4_EVENT_RETIRED_MISPRED_BRANCH_TYPE), + .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR0 }, + .cntr = { {4, 5, -1}, {6, 7, -1} }, + }, + [P4_EVENT_RETIRED_BRANCH_TYPE] = { + .opcode = P4_OPCODE(P4_EVENT_RETIRED_BRANCH_TYPE), + .escr_msr = { MSR_P4_TBPU_ESCR0 , MSR_P4_TBPU_ESCR1 }, + .cntr = { {4, 5, -1}, {6, 7, -1} }, + }, + [P4_EVENT_RESOURCE_STALL] = { + .opcode = P4_OPCODE(P4_EVENT_RESOURCE_STALL), + .escr_msr = { MSR_P4_ALF_ESCR0, MSR_P4_ALF_ESCR1 }, + .cntr = { {12, 13, 16}, {14, 15, 17} }, + }, + [P4_EVENT_WC_BUFFER] = { + .opcode = P4_OPCODE(P4_EVENT_WC_BUFFER), + .escr_msr = { MSR_P4_DAC_ESCR0, MSR_P4_DAC_ESCR1 }, + .cntr = { {8, 9, -1}, {10, 11, -1} }, + }, + [P4_EVENT_B2B_CYCLES] = { + .opcode = P4_OPCODE(P4_EVENT_B2B_CYCLES), + .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .cntr = { {0, -1, -1}, {2, -1, -1} }, + }, + [P4_EVENT_BNR] = { + .opcode = P4_OPCODE(P4_EVENT_BNR), + .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .cntr = { {0, -1, -1}, {2, -1, -1} }, + }, + [P4_EVENT_SNOOP] = { + .opcode = P4_OPCODE(P4_EVENT_SNOOP), + .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .cntr = { {0, -1, -1}, {2, -1, -1} }, + }, + [P4_EVENT_RESPONSE] = { + .opcode = P4_OPCODE(P4_EVENT_RESPONSE), + .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, + .cntr = { {0, -1, -1}, {2, -1, -1} }, + }, + [P4_EVENT_FRONT_END_EVENT] = { + .opcode = P4_OPCODE(P4_EVENT_FRONT_END_EVENT), + .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, + .cntr = { {12, 13, 16}, {14, 15, 17} }, + }, + [P4_EVENT_EXECUTION_EVENT] = { + .opcode = P4_OPCODE(P4_EVENT_EXECUTION_EVENT), + .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, + .cntr = { {12, 13, 16}, {14, 15, 17} }, + }, + [P4_EVENT_REPLAY_EVENT] = { + .opcode = P4_OPCODE(P4_EVENT_REPLAY_EVENT), + .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, + .cntr = { {12, 13, 16}, {14, 15, 17} }, + }, + [P4_EVENT_INSTR_RETIRED] = { + .opcode = P4_OPCODE(P4_EVENT_INSTR_RETIRED), + .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, + .cntr = { {12, 13, 16}, {14, 15, 17} }, + }, + [P4_EVENT_UOPS_RETIRED] = { + .opcode = P4_OPCODE(P4_EVENT_UOPS_RETIRED), + .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, + .cntr = { {12, 13, 16}, {14, 15, 17} }, + }, + [P4_EVENT_UOP_TYPE] = { + .opcode = P4_OPCODE(P4_EVENT_UOP_TYPE), + .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 }, + .cntr = { {12, 13, 16}, {14, 15, 17} }, + }, + [P4_EVENT_BRANCH_RETIRED] = { + .opcode = P4_OPCODE(P4_EVENT_BRANCH_RETIRED), + .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, + .cntr = { {12, 13, 16}, {14, 15, 17} }, + }, + [P4_EVENT_MISPRED_BRANCH_RETIRED] = { + .opcode = P4_OPCODE(P4_EVENT_MISPRED_BRANCH_RETIRED), + .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, + .cntr = { {12, 13, 16}, {14, 15, 17} }, + }, + [P4_EVENT_X87_ASSIST] = { + .opcode = P4_OPCODE(P4_EVENT_X87_ASSIST), + .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, + .cntr = { {12, 13, 16}, {14, 15, 17} }, + }, + [P4_EVENT_MACHINE_CLEAR] = { + .opcode = P4_OPCODE(P4_EVENT_MACHINE_CLEAR), + .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR3 }, + .cntr = { {12, 13, 16}, {14, 15, 17} }, + }, + [P4_EVENT_INSTR_COMPLETED] = { + .opcode = P4_OPCODE(P4_EVENT_INSTR_COMPLETED), + .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, + .cntr = { {12, 13, 16}, {14, 15, 17} }, + }, +}; -#define P4_CACHE_EVENT_CONFIG(event, bit) \ - p4_config_pack_escr(P4_EVENT_UNPACK_EVENT(event) << P4_EVNTSEL_EVENT_SHIFT) | \ - p4_config_pack_escr((event##_##bit) << P4_EVNTSEL_EVENTMASK_SHIFT) | \ - p4_config_pack_cccr(P4_EVENT_UNPACK_SELECTOR(event) << P4_CCCR_ESCR_SELECT_SHIFT) +#define P4_GEN_CACHE_EVENT(event, bit, cache_event) \ + p4_config_pack_escr(P4_ESCR_EVENT(event) | \ + P4_ESCR_EMASK_BIT(event, bit)) | \ + p4_config_pack_cccr(cache_event | \ + P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event)))) static __initconst u64 p4_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] @@ -49,42 +295,35 @@ static __initconst u64 p4_hw_cache_event_ids [ C(L1D ) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0x0, - /* 1stL_cache_load_miss_retired */ - [ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_REPLAY_EVENT, NBOGUS) - | KEY_P4_L1D_OP_READ_RESULT_MISS, + [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, + P4_CACHE__1stl_cache_load_miss_retired), }, }, [ C(LL ) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0x0, - /* 2ndL_cache_load_miss_retired */ - [ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_REPLAY_EVENT, NBOGUS) - | KEY_P4_LL_OP_READ_RESULT_MISS, + [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, + P4_CACHE__2ndl_cache_load_miss_retired), }, - }, +}, [ C(DTLB) ] = { [ C(OP_READ) ] = { [ C(RESULT_ACCESS) ] = 0x0, - /* DTLB_load_miss_retired */ - [ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_REPLAY_EVENT, NBOGUS) - | KEY_P4_DTLB_OP_READ_RESULT_MISS, + [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, + P4_CACHE__dtlb_load_miss_retired), }, [ C(OP_WRITE) ] = { [ C(RESULT_ACCESS) ] = 0x0, - /* DTLB_store_miss_retired */ - [ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_REPLAY_EVENT, NBOGUS) - | KEY_P4_DTLB_OP_WRITE_RESULT_MISS, + [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_REPLAY_EVENT, NBOGUS, + P4_CACHE__dtlb_store_miss_retired), }, }, [ C(ITLB) ] = { [ C(OP_READ) ] = { - /* ITLB_reference.HIT */ - [ C(RESULT_ACCESS) ] = P4_CACHE_EVENT_CONFIG(P4_ITLB_REFERENCE, HIT) - | KEY_P4_ITLB_OP_READ_RESULT_ACCESS, - - /* ITLB_reference.MISS */ - [ C(RESULT_MISS) ] = P4_CACHE_EVENT_CONFIG(P4_ITLB_REFERENCE, MISS) - | KEY_P4_ITLB_OP_READ_RESULT_MISS, + [ C(RESULT_ACCESS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, HIT, + P4_CACHE__itlb_reference_hit), + [ C(RESULT_MISS) ] = P4_GEN_CACHE_EVENT(P4_EVENT_ITLB_REFERENCE, MISS, + P4_CACHE__itlb_reference_miss), }, [ C(OP_WRITE) ] = { [ C(RESULT_ACCESS) ] = -1, @@ -97,220 +336,90 @@ static __initconst u64 p4_hw_cache_event_ids }, }; -/* - * WARN: CCCR1 doesn't have a working enable bit so try to not - * use it if possible - * - * Also as only we start to support raw events we will need to - * append _all_ P4_EVENT_PACK'ed events here - */ -struct p4_event_template p4_templates[] = { - [0] = { - .opcode = P4_GLOBAL_POWER_EVENTS, - .config = 0, - .dep = -1, - .key = 0, - .emask = - P4_EVENT_ATTR(P4_GLOBAL_POWER_EVENTS, RUNNING), - .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, - .cntr = { 0, 2 }, - }, - [1] = { - .opcode = P4_INSTR_RETIRED, - .config = 0, - .dep = -1, /* needs front-end tagging */ - .key = 1, - .emask = - P4_EVENT_ATTR(P4_INSTR_RETIRED, NBOGUSNTAG) | - P4_EVENT_ATTR(P4_INSTR_RETIRED, BOGUSNTAG), - .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, - .cntr = { 12, 14 }, - }, - [2] = { - .opcode = P4_BSQ_CACHE_REFERENCE, - .config = 0, - .dep = -1, - .key = 2, - .emask = - P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) | - P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) | - P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) | - P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) | - P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) | - P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_HITM), - .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, - .cntr = { 0, 2 }, - }, - [3] = { - .opcode = P4_BSQ_CACHE_REFERENCE, - .config = 0, - .dep = -1, - .key = 3, - .emask = - P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) | - P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) | - P4_EVENT_ATTR(P4_BSQ_CACHE_REFERENCE, WR_2ndL_MISS), - .escr_msr = { MSR_P4_BSU_ESCR0, MSR_P4_BSU_ESCR1 }, - .cntr = { 0, 3 }, - }, - [4] = { - .opcode = P4_RETIRED_BRANCH_TYPE, - .config = 0, - .dep = -1, - .key = 4, - .emask = - P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CONDITIONAL) | - P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, CALL) | - P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, RETURN) | - P4_EVENT_ATTR(P4_RETIRED_BRANCH_TYPE, INDIRECT), - .escr_msr = { MSR_P4_TBPU_ESCR0, MSR_P4_TBPU_ESCR1 }, - .cntr = { 4, 6 }, - }, - [5] = { - .opcode = P4_MISPRED_BRANCH_RETIRED, - .config = 0, - .dep = -1, - .key = 5, - .emask = - P4_EVENT_ATTR(P4_MISPRED_BRANCH_RETIRED, NBOGUS), - .escr_msr = { MSR_P4_CRU_ESCR0, MSR_P4_CRU_ESCR1 }, - .cntr = { 12, 14 }, - }, - [6] = { - .opcode = P4_FSB_DATA_ACTIVITY, - .config = p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE), - .dep = -1, - .key = 6, - .emask = - P4_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_DRV) | - P4_EVENT_ATTR(P4_FSB_DATA_ACTIVITY, DRDY_OWN), - .escr_msr = { MSR_P4_FSB_ESCR0, MSR_P4_FSB_ESCR1 }, - .cntr = { 0, 2 }, - }, - [KEY_P4_L1D_OP_READ_RESULT_MISS] = { - .opcode = P4_REPLAY_EVENT, - .config = 0, - .dep = -1, - .msr = (u64)(1 << 0 | 1 << 24) << 32 | (1 << 0), - .key = KEY_P4_L1D_OP_READ_RESULT_MISS, - .emask = - P4_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS), - .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR2 }, - .cntr = { 16, 17 }, - }, - [KEY_P4_LL_OP_READ_RESULT_MISS] = { - .opcode = P4_REPLAY_EVENT, - .config = 0, - .dep = -1, - .msr = (u64)(1 << 1 | 1 << 24) << 32 | (1 << 0), - .key = KEY_P4_LL_OP_READ_RESULT_MISS, - .emask = - P4_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS), - .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR2 }, - .cntr = { 16, 17 }, - }, - [KEY_P4_DTLB_OP_READ_RESULT_MISS] = { - .opcode = P4_REPLAY_EVENT, - .config = 0, - .dep = -1, - .msr = (u64)(1 << 2 | 1 << 24) << 32 | (1 << 0), - .key = KEY_P4_DTLB_OP_READ_RESULT_MISS, - .emask = - P4_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS), - .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR2 }, - .cntr = { 16, 17 }, - }, - [KEY_P4_DTLB_OP_WRITE_RESULT_MISS] = { - .opcode = P4_REPLAY_EVENT, - .config = 0, - .dep = -1, - .msr = (u64)(1 << 2 | 1 << 24) << 32 | (1 << 1), - .key = KEY_P4_DTLB_OP_WRITE_RESULT_MISS, - .emask = - P4_EVENT_ATTR(P4_REPLAY_EVENT, NBOGUS), - .escr_msr = { MSR_P4_CRU_ESCR2, MSR_P4_CRU_ESCR2 }, - .cntr = { 16, 17 }, - }, - [KEY_P4_ITLB_OP_READ_RESULT_ACCESS] = { - .opcode = P4_ITLB_REFERENCE, - .config = 0, - .dep = -1, - .msr = 0, - .key = KEY_P4_ITLB_OP_READ_RESULT_ACCESS, - .emask = - P4_EVENT_ATTR(P4_ITLB_REFERENCE, HIT), - .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 }, - .cntr = { 0, 2 }, - }, - [KEY_P4_ITLB_OP_READ_RESULT_MISS] = { - .opcode = P4_ITLB_REFERENCE, - .config = 0, - .dep = -1, - .msr = 0, - .key = KEY_P4_ITLB_OP_READ_RESULT_MISS, - .emask = - P4_EVENT_ATTR(P4_ITLB_REFERENCE, MISS), - .escr_msr = { MSR_P4_ITLB_ESCR0, MSR_P4_ITLB_ESCR1 }, - .cntr = { 0, 2 }, - }, - [KEY_P4_UOP_TYPE] = { - .opcode = P4_UOP_TYPE, - .config = 0, - .dep = -1, - .key = KEY_P4_UOP_TYPE, - .emask = - P4_EVENT_ATTR(P4_UOP_TYPE, TAGLOADS) | - P4_EVENT_ATTR(P4_UOP_TYPE, TAGSTORES), - .escr_msr = { MSR_P4_RAT_ESCR0, MSR_P4_RAT_ESCR1 }, - .cntr = { 16, 17 }, - }, +static u64 p4_general_events[PERF_COUNT_HW_MAX] = { + /* non-halted CPU clocks */ + [PERF_COUNT_HW_CPU_CYCLES] = + p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_GLOBAL_POWER_EVENTS) | + P4_ESCR_EMASK_BIT(P4_EVENT_GLOBAL_POWER_EVENTS, RUNNING)), + + /* + * retired instructions + * in a sake of simplicity we don't use the FSB tagging + */ + [PERF_COUNT_HW_INSTRUCTIONS] = + p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_INSTR_RETIRED) | + P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, NBOGUSNTAG) | + P4_ESCR_EMASK_BIT(P4_EVENT_INSTR_RETIRED, BOGUSNTAG)), + + /* cache hits */ + [PERF_COUNT_HW_CACHE_REFERENCES] = + p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITS) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_HITM) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITS) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_HITM)), + + /* cache misses */ + [PERF_COUNT_HW_CACHE_MISSES] = + p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_BSQ_CACHE_REFERENCE) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_2ndL_MISS) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, RD_3rdL_MISS) | + P4_ESCR_EMASK_BIT(P4_EVENT_BSQ_CACHE_REFERENCE, WR_2ndL_MISS)), + + /* branch instructions retired */ + [PERF_COUNT_HW_BRANCH_INSTRUCTIONS] = + p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_RETIRED_BRANCH_TYPE) | + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CONDITIONAL) | + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, CALL) | + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, RETURN) | + P4_ESCR_EMASK_BIT(P4_EVENT_RETIRED_BRANCH_TYPE, INDIRECT)), + + /* mispredicted branches retired */ + [PERF_COUNT_HW_BRANCH_MISSES] = + p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_MISPRED_BRANCH_RETIRED) | + P4_ESCR_EMASK_BIT(P4_EVENT_MISPRED_BRANCH_RETIRED, NBOGUS)), + + /* bus ready clocks (cpu is driving #DRDY_DRV\#DRDY_OWN): */ + [PERF_COUNT_HW_BUS_CYCLES] = + p4_config_pack_escr(P4_ESCR_EVENT(P4_EVENT_FSB_DATA_ACTIVITY) | + P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_DRV) | + P4_ESCR_EMASK_BIT(P4_EVENT_FSB_DATA_ACTIVITY, DRDY_OWN)) | + p4_config_pack_cccr(P4_CCCR_EDGE | P4_CCCR_COMPARE), }; +static struct p4_event_bind *p4_config_get_bind(u64 config) +{ + unsigned int evnt = p4_config_unpack_event(config); + struct p4_event_bind *bind = NULL; + + if (evnt < ARRAY_SIZE(p4_event_bind_map)) + bind = &p4_event_bind_map[evnt]; + + return bind; +} + static u64 p4_pmu_event_map(int hw_event) { - struct p4_event_template *tpl; + struct p4_event_bind *bind; + unsigned int esel; u64 config; - if (hw_event > ARRAY_SIZE(p4_templates)) { - printk_once(KERN_ERR "PMU: Incorrect event index\n"); + if (hw_event > ARRAY_SIZE(p4_general_events)) { + printk_once(KERN_ERR "P4 PMU: Bad index: %i\n", hw_event); return 0; } - tpl = &p4_templates[hw_event]; - /* - * fill config up according to - * a predefined event template - */ - config = tpl->config; - config |= p4_config_pack_escr(P4_EVENT_UNPACK_EVENT(tpl->opcode) << P4_EVNTSEL_EVENT_SHIFT); - config |= p4_config_pack_escr(tpl->emask << P4_EVNTSEL_EVENTMASK_SHIFT); - config |= p4_config_pack_cccr(P4_EVENT_UNPACK_SELECTOR(tpl->opcode) << P4_CCCR_ESCR_SELECT_SHIFT); - config |= p4_config_pack_cccr(hw_event & P4_CCCR_RESERVED); + config = p4_general_events[hw_event]; + bind = p4_config_get_bind(config); + esel = P4_OPCODE_ESEL(bind->opcode); + config |= p4_config_pack_cccr(P4_CCCR_ESEL(esel)); return config; } /* - * Note that we still have 5 events (from global events SDM list) - * intersected in opcode+emask bits so we will need another - * scheme there do distinguish templates. - */ -static inline int p4_pmu_emask_match(unsigned int dst, unsigned int src) -{ - return dst & src; -} - -static struct p4_event_template *p4_pmu_template_lookup(u64 config) -{ - int key = p4_config_unpack_key(config); - - if (key < ARRAY_SIZE(p4_templates)) - return &p4_templates[key]; - else - return NULL; -} - -/* * We don't control raw events so it's up to the caller * to pass sane values (and we don't count the thread number * on HT machine but allow HT-compatible specifics to be @@ -319,13 +428,14 @@ static struct p4_event_template *p4_pmu_template_lookup(u64 config) static u64 p4_pmu_raw_event(u64 hw_event) { return hw_event & - (p4_config_pack_escr(P4_EVNTSEL_MASK_HT) | + (p4_config_pack_escr(P4_ESCR_MASK_HT) | p4_config_pack_cccr(P4_CCCR_MASK_HT)); } static int p4_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc) { int cpu = raw_smp_processor_id(); + u32 escr, cccr; /* * the reason we use cpu that early is that: if we get scheduled @@ -333,13 +443,10 @@ static int p4_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc) * specific flags in config (and will save some cpu cycles) */ - /* CCCR by default */ - hwc->config = p4_config_pack_cccr(p4_default_cccr_conf(cpu)); + cccr = p4_default_cccr_conf(cpu); + escr = p4_default_escr_conf(cpu, attr->exclude_kernel, attr->exclude_user); + hwc->config = p4_config_pack_escr(escr) | p4_config_pack_cccr(cccr); - /* Count user and OS events unless not requested to */ - hwc->config |= p4_config_pack_escr(p4_default_escr_conf(cpu, attr->exclude_kernel, - attr->exclude_user)); - /* on HT machine we need a special bit */ if (p4_ht_active() && p4_ht_thread(cpu)) hwc->config = p4_set_ht_bit(hwc->config); @@ -368,7 +475,7 @@ static inline void p4_pmu_disable_event(struct perf_event *event) */ (void)checking_wrmsrl(hwc->config_base + hwc->idx, (u64)(p4_config_unpack_cccr(hwc->config)) & - ~P4_CCCR_ENABLE & ~P4_CCCR_OVF); + ~P4_CCCR_ENABLE & ~P4_CCCR_OVF & ~P4_CCCR_RESERVED); } static void p4_pmu_disable_all(void) @@ -389,27 +496,14 @@ static void p4_pmu_enable_event(struct perf_event *event) struct hw_perf_event *hwc = &event->hw; int thread = p4_ht_config_thread(hwc->config); u64 escr_conf = p4_config_unpack_escr(p4_clear_ht_bit(hwc->config)); - u64 escr_base; - struct p4_event_template *tpl; - struct p4_pmu_res *c; + unsigned int idx = p4_config_unpack_event(hwc->config); + unsigned int idx_cache = p4_config_unpack_cache_event(hwc->config); + struct p4_event_bind *bind; + struct p4_cache_event_bind *bind_cache; + u64 escr_addr, cccr; - /* - * some preparation work from per-cpu private fields - * since we need to find out which ESCR to use - */ - c = &__get_cpu_var(p4_pmu_config); - tpl = c->tpl[hwc->idx]; - if (!tpl) { - pr_crit("%s: Wrong index: %d\n", __func__, hwc->idx); - return; - } - - if (tpl->msr) { - (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, tpl->msr >> 32); - (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, tpl->msr & 0xffffffff); - } - - escr_base = (u64)tpl->escr_msr[thread]; + bind = &p4_event_bind_map[idx]; + escr_addr = (u64)bind->escr_msr[thread]; /* * - we dont support cascaded counters yet @@ -418,9 +512,27 @@ static void p4_pmu_enable_event(struct perf_event *event) WARN_ON_ONCE(p4_is_event_cascaded(hwc->config)); WARN_ON_ONCE(hwc->idx == 1); - (void)checking_wrmsrl(escr_base, escr_conf); + /* we need a real Event value */ + escr_conf &= ~P4_ESCR_EVENT_MASK; + escr_conf |= P4_ESCR_EVENT(P4_OPCODE_EVNT(bind->opcode)); + + cccr = p4_config_unpack_cccr(hwc->config); + + /* + * it could be Cache event so that we need to + * set metrics into additional MSRs + */ + BUILD_BUG_ON(P4_CACHE__MAX > P4_CCCR_CACHE_OPS_MASK); + if (idx_cache > P4_CACHE__NONE && + idx_cache < ARRAY_SIZE(p4_cache_event_bind_map)) { + bind_cache = &p4_cache_event_bind_map[idx_cache]; + (void)checking_wrmsrl(MSR_IA32_PEBS_ENABLE, (u64)bind_cache->metric_pebs); + (void)checking_wrmsrl(MSR_P4_PEBS_MATRIX_VERT, (u64)bind_cache->metric_vert); + } + + (void)checking_wrmsrl(escr_addr, escr_conf); (void)checking_wrmsrl(hwc->config_base + hwc->idx, - (u64)(p4_config_unpack_cccr(hwc->config)) | P4_CCCR_ENABLE); + (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE); } static void p4_pmu_enable_all(void) @@ -516,13 +628,13 @@ static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu) if (p4_ht_thread(cpu)) { cccr &= ~P4_CCCR_OVF_PMI_T0; cccr |= P4_CCCR_OVF_PMI_T1; - if (escr & P4_EVNTSEL_T0_OS) { - escr &= ~P4_EVNTSEL_T0_OS; - escr |= P4_EVNTSEL_T1_OS; + if (escr & P4_ESCR_T0_OS) { + escr &= ~P4_ESCR_T0_OS; + escr |= P4_ESCR_T1_OS; } - if (escr & P4_EVNTSEL_T0_USR) { - escr &= ~P4_EVNTSEL_T0_USR; - escr |= P4_EVNTSEL_T1_USR; + if (escr & P4_ESCR_T0_USR) { + escr &= ~P4_ESCR_T0_USR; + escr |= P4_ESCR_T1_USR; } hwc->config = p4_config_pack_escr(escr); hwc->config |= p4_config_pack_cccr(cccr); @@ -530,13 +642,13 @@ static void p4_pmu_swap_config_ts(struct hw_perf_event *hwc, int cpu) } else { cccr &= ~P4_CCCR_OVF_PMI_T1; cccr |= P4_CCCR_OVF_PMI_T0; - if (escr & P4_EVNTSEL_T1_OS) { - escr &= ~P4_EVNTSEL_T1_OS; - escr |= P4_EVNTSEL_T0_OS; + if (escr & P4_ESCR_T1_OS) { + escr &= ~P4_ESCR_T1_OS; + escr |= P4_ESCR_T0_OS; } - if (escr & P4_EVNTSEL_T1_USR) { - escr &= ~P4_EVNTSEL_T1_USR; - escr |= P4_EVNTSEL_T0_USR; + if (escr & P4_ESCR_T1_USR) { + escr &= ~P4_ESCR_T1_USR; + escr |= P4_ESCR_T0_USR; } hwc->config = p4_config_pack_escr(escr); hwc->config |= p4_config_pack_cccr(cccr); @@ -606,66 +718,56 @@ static int p4_get_escr_idx(unsigned int addr) return -1; } +static int p4_next_cntr(int thread, unsigned long *used_mask, + struct p4_event_bind *bind) +{ + int i = 0, j; + + for (i = 0; i < P4_CNTR_LIMIT; i++) { + j = bind->cntr[thread][i++]; + if (j == -1 || !test_bit(j, used_mask)) + return j; + } + + return -1; +} + static int p4_pmu_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) { unsigned long used_mask[BITS_TO_LONGS(X86_PMC_IDX_MAX)]; unsigned long escr_mask[BITS_TO_LONGS(ARCH_P4_TOTAL_ESCR)]; - - struct hw_perf_event *hwc; - struct p4_event_template *tpl; - struct p4_pmu_res *c; int cpu = raw_smp_processor_id(); - int escr_idx, thread, i, num; + struct hw_perf_event *hwc; + struct p4_event_bind *bind; + unsigned int i, thread, num; + int cntr_idx, escr_idx; bitmap_zero(used_mask, X86_PMC_IDX_MAX); bitmap_zero(escr_mask, ARCH_P4_TOTAL_ESCR); - c = &__get_cpu_var(p4_pmu_config); - /* - * Firstly find out which resource events are going - * to use, if ESCR+CCCR tuple is already borrowed - * then get out of here - */ for (i = 0, num = n; i < n; i++, num--) { + hwc = &cpuc->event_list[i]->hw; - tpl = p4_pmu_template_lookup(hwc->config); - if (!tpl) - goto done; thread = p4_ht_thread(cpu); - escr_idx = p4_get_escr_idx(tpl->escr_msr[thread]); - if (escr_idx == -1) - goto done; + bind = p4_config_get_bind(hwc->config); + escr_idx = p4_get_escr_idx(bind->escr_msr[thread]); - /* already allocated and remains on the same cpu */ if (hwc->idx != -1 && !p4_should_swap_ts(hwc->config, cpu)) { + cntr_idx = hwc->idx; if (assign) assign[i] = hwc->idx; - /* upstream dependent event */ - if (unlikely(tpl->dep != -1)) - printk_once(KERN_WARNING "PMU: Dep events are " - "not implemented yet\n"); goto reserve; } - /* it may be already borrowed */ - if (test_bit(tpl->cntr[thread], used_mask) || - test_bit(escr_idx, escr_mask)) + cntr_idx = p4_next_cntr(thread, used_mask, bind); + if (cntr_idx == -1 || test_bit(escr_idx, escr_mask)) goto done; - /* - * ESCR+CCCR+COUNTERs are available to use lets swap - * thread specific bits, push assigned bits - * back and save template into per-cpu - * area (which will allow us to find out the ESCR - * to be used at moment of "enable event via real MSR") - */ p4_pmu_swap_config_ts(hwc, cpu); - if (assign) { - assign[i] = tpl->cntr[thread]; - c->tpl[assign[i]] = tpl; - } + if (assign) + assign[i] = cntr_idx; reserve: - set_bit(tpl->cntr[thread], used_mask); + set_bit(cntr_idx, used_mask); set_bit(escr_idx, escr_mask); } @@ -684,7 +786,7 @@ static __initconst struct x86_pmu p4_pmu = { .perfctr = MSR_P4_BPU_PERFCTR0, .event_map = p4_pmu_event_map, .raw_event = p4_pmu_raw_event, - .max_events = ARRAY_SIZE(p4_templates), + .max_events = ARRAY_SIZE(p4_general_events), .get_event_constraints = x86_get_event_constraints, /* * IF HT disabled we may need to use all @@ -716,7 +818,7 @@ static __init int p4_pmu_init(void) } memcpy(hw_cache_event_ids, p4_hw_cache_event_ids, - sizeof(hw_cache_event_ids)); + sizeof(hw_cache_event_ids)); pr_cont("Netburst events, "); -- cgit v1.1 From 7c5ecaf7666617889f337296c610815b519abfa9 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 25 Mar 2010 14:51:49 +0100 Subject: perf, x86: Clean up debugctlmsr bit definitions Move all debugctlmsr thingies into msr-index.h Signed-off-by: Peter Zijlstra LKML-Reference: <20100325135413.861425293@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/msr-index.h | 13 ++++++++----- arch/x86/kernel/cpu/perf_event_intel_ds.c | 23 +++++++---------------- arch/x86/kernel/cpu/perf_event_intel_lbr.c | 7 ++----- 3 files changed, 17 insertions(+), 26 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/msr-index.h b/arch/x86/include/asm/msr-index.h index aef562c..06e4cf0 100644 --- a/arch/x86/include/asm/msr-index.h +++ b/arch/x86/include/asm/msr-index.h @@ -71,11 +71,14 @@ #define MSR_IA32_LASTINTTOIP 0x000001de /* DEBUGCTLMSR bits (others vary by model): */ -#define _DEBUGCTLMSR_LBR 0 /* last branch recording */ -#define _DEBUGCTLMSR_BTF 1 /* single-step on branches */ - -#define DEBUGCTLMSR_LBR (1UL << _DEBUGCTLMSR_LBR) -#define DEBUGCTLMSR_BTF (1UL << _DEBUGCTLMSR_BTF) +#define DEBUGCTLMSR_LBR (1UL << 0) /* last branch recording */ +#define DEBUGCTLMSR_BTF (1UL << 1) /* single-step on branches */ +#define DEBUGCTLMSR_TR (1UL << 6) +#define DEBUGCTLMSR_BTS (1UL << 7) +#define DEBUGCTLMSR_BTINT (1UL << 8) +#define DEBUGCTLMSR_BTS_OFF_OS (1UL << 9) +#define DEBUGCTLMSR_BTS_OFF_USR (1UL << 10) +#define DEBUGCTLMSR_FREEZE_LBRS_ON_PMI (1UL << 11) #define MSR_IA32_MC0_CTL 0x00000400 #define MSR_IA32_MC0_STATUS 0x00000401 diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index c59678a..2fea362 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -38,15 +38,6 @@ struct pebs_record_nhm { }; /* - * Bits in the debugctlmsr controlling branch tracing. - */ -#define X86_DEBUGCTL_TR (1 << 6) -#define X86_DEBUGCTL_BTS (1 << 7) -#define X86_DEBUGCTL_BTINT (1 << 8) -#define X86_DEBUGCTL_BTS_OFF_OS (1 << 9) -#define X86_DEBUGCTL_BTS_OFF_USR (1 << 10) - -/* * A debug store configuration. * * We only support architectures that use 64bit fields. @@ -193,15 +184,15 @@ static void intel_pmu_enable_bts(u64 config) debugctlmsr = get_debugctlmsr(); - debugctlmsr |= X86_DEBUGCTL_TR; - debugctlmsr |= X86_DEBUGCTL_BTS; - debugctlmsr |= X86_DEBUGCTL_BTINT; + debugctlmsr |= DEBUGCTLMSR_TR; + debugctlmsr |= DEBUGCTLMSR_BTS; + debugctlmsr |= DEBUGCTLMSR_BTINT; if (!(config & ARCH_PERFMON_EVENTSEL_OS)) - debugctlmsr |= X86_DEBUGCTL_BTS_OFF_OS; + debugctlmsr |= DEBUGCTLMSR_BTS_OFF_OS; if (!(config & ARCH_PERFMON_EVENTSEL_USR)) - debugctlmsr |= X86_DEBUGCTL_BTS_OFF_USR; + debugctlmsr |= DEBUGCTLMSR_BTS_OFF_USR; update_debugctlmsr(debugctlmsr); } @@ -217,8 +208,8 @@ static void intel_pmu_disable_bts(void) debugctlmsr = get_debugctlmsr(); debugctlmsr &= - ~(X86_DEBUGCTL_TR | X86_DEBUGCTL_BTS | X86_DEBUGCTL_BTINT | - X86_DEBUGCTL_BTS_OFF_OS | X86_DEBUGCTL_BTS_OFF_USR); + ~(DEBUGCTLMSR_TR | DEBUGCTLMSR_BTS | DEBUGCTLMSR_BTINT | + DEBUGCTLMSR_BTS_OFF_OS | DEBUGCTLMSR_BTS_OFF_USR); update_debugctlmsr(debugctlmsr); } diff --git a/arch/x86/kernel/cpu/perf_event_intel_lbr.c b/arch/x86/kernel/cpu/perf_event_intel_lbr.c index df4c98e2..d202c1b 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_lbr.c +++ b/arch/x86/kernel/cpu/perf_event_intel_lbr.c @@ -12,15 +12,12 @@ enum { * otherwise it becomes near impossible to get a reliable stack. */ -#define X86_DEBUGCTL_LBR (1 << 0) -#define X86_DEBUGCTL_FREEZE_LBRS_ON_PMI (1 << 11) - static void __intel_pmu_lbr_enable(void) { u64 debugctl; rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); - debugctl |= (X86_DEBUGCTL_LBR | X86_DEBUGCTL_FREEZE_LBRS_ON_PMI); + debugctl |= (DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); } @@ -29,7 +26,7 @@ static void __intel_pmu_lbr_disable(void) u64 debugctl; rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); - debugctl &= ~(X86_DEBUGCTL_LBR | X86_DEBUGCTL_FREEZE_LBRS_ON_PMI); + debugctl &= ~(DEBUGCTLMSR_LBR | DEBUGCTLMSR_FREEZE_LBRS_ON_PMI); wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctl); } -- cgit v1.1 From faa4602e47690fb11221e00f9b9697c8dc0d4b19 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 25 Mar 2010 14:51:50 +0100 Subject: x86, perf, bts, mm: Delete the never used BTS-ptrace code Support for the PMU's BTS features has been upstreamed in v2.6.32, but we still have the old and disabled ptrace-BTS, as Linus noticed it not so long ago. It's buggy: TIF_DEBUGCTLMSR is trampling all over that MSR without regard for other uses (perf) and doesn't provide the flexibility needed for perf either. Its users are ptrace-block-step and ptrace-bts, since ptrace-bts was never used and ptrace-block-step can be implemented using a much simpler approach. So axe all 3000 lines of it. That includes the *locked_memory*() APIs in mm/mlock.c as well. Reported-by: Linus Torvalds Signed-off-by: Peter Zijlstra Cc: Roland McGrath Cc: Oleg Nesterov Cc: Markus Metzger Cc: Steven Rostedt Cc: Andrew Morton LKML-Reference: <20100325135413.938004390@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/Kconfig.cpu | 20 - arch/x86/Kconfig.debug | 9 - arch/x86/include/asm/ds.h | 302 -------- arch/x86/include/asm/processor.h | 33 +- arch/x86/include/asm/ptrace-abi.h | 57 +- arch/x86/include/asm/ptrace.h | 6 - arch/x86/include/asm/thread_info.h | 6 +- arch/x86/kernel/Makefile | 2 - arch/x86/kernel/cpu/intel.c | 2 - arch/x86/kernel/ds.c | 1437 ------------------------------------ arch/x86/kernel/ds_selftest.c | 408 ---------- arch/x86/kernel/ds_selftest.h | 15 - arch/x86/kernel/dumpstack.c | 5 - arch/x86/kernel/kprobes.c | 6 +- arch/x86/kernel/process.c | 9 - arch/x86/kernel/process_32.c | 8 - arch/x86/kernel/process_64.c | 8 - arch/x86/kernel/ptrace.c | 382 ---------- arch/x86/kernel/step.c | 36 +- arch/x86/kernel/traps.c | 5 - 20 files changed, 9 insertions(+), 2747 deletions(-) delete mode 100644 arch/x86/include/asm/ds.h delete mode 100644 arch/x86/kernel/ds.c delete mode 100644 arch/x86/kernel/ds_selftest.c delete mode 100644 arch/x86/kernel/ds_selftest.h (limited to 'arch/x86') diff --git a/arch/x86/Kconfig.cpu b/arch/x86/Kconfig.cpu index a198293..918fbb1 100644 --- a/arch/x86/Kconfig.cpu +++ b/arch/x86/Kconfig.cpu @@ -502,23 +502,3 @@ config CPU_SUP_UMC_32 CPU might render the kernel unbootable. If unsure, say N. - -config X86_DS - def_bool X86_PTRACE_BTS - depends on X86_DEBUGCTLMSR - select HAVE_HW_BRANCH_TRACER - -config X86_PTRACE_BTS - bool "Branch Trace Store" - default y - depends on X86_DEBUGCTLMSR - depends on BROKEN - ---help--- - This adds a ptrace interface to the hardware's branch trace store. - - Debuggers may use it to collect an execution trace of the debugged - application in order to answer the question 'how did I get here?'. - Debuggers may trace user mode as well as kernel mode. - - Say Y unless there is no application development on this machine - and you want to save a small amount of code size. diff --git a/arch/x86/Kconfig.debug b/arch/x86/Kconfig.debug index bc01e3e..bd58c8a 100644 --- a/arch/x86/Kconfig.debug +++ b/arch/x86/Kconfig.debug @@ -174,15 +174,6 @@ config IOMMU_LEAK Add a simple leak tracer to the IOMMU code. This is useful when you are debugging a buggy device driver that leaks IOMMU mappings. -config X86_DS_SELFTEST - bool "DS selftest" - default y - depends on DEBUG_KERNEL - depends on X86_DS - ---help--- - Perform Debug Store selftests at boot time. - If in doubt, say "N". - config HAVE_MMIOTRACE_SUPPORT def_bool y diff --git a/arch/x86/include/asm/ds.h b/arch/x86/include/asm/ds.h deleted file mode 100644 index 70dac19..0000000 --- a/arch/x86/include/asm/ds.h +++ /dev/null @@ -1,302 +0,0 @@ -/* - * Debug Store (DS) support - * - * This provides a low-level interface to the hardware's Debug Store - * feature that is used for branch trace store (BTS) and - * precise-event based sampling (PEBS). - * - * It manages: - * - DS and BTS hardware configuration - * - buffer overflow handling (to be done) - * - buffer access - * - * It does not do: - * - security checking (is the caller allowed to trace the task) - * - buffer allocation (memory accounting) - * - * - * Copyright (C) 2007-2009 Intel Corporation. - * Markus Metzger , 2007-2009 - */ - -#ifndef _ASM_X86_DS_H -#define _ASM_X86_DS_H - - -#include -#include -#include - - -#ifdef CONFIG_X86_DS - -struct task_struct; -struct ds_context; -struct ds_tracer; -struct bts_tracer; -struct pebs_tracer; - -typedef void (*bts_ovfl_callback_t)(struct bts_tracer *); -typedef void (*pebs_ovfl_callback_t)(struct pebs_tracer *); - - -/* - * A list of features plus corresponding macros to talk about them in - * the ds_request function's flags parameter. - * - * We use the enum to index an array of corresponding control bits; - * we use the macro to index a flags bit-vector. - */ -enum ds_feature { - dsf_bts = 0, - dsf_bts_kernel, -#define BTS_KERNEL (1 << dsf_bts_kernel) - /* trace kernel-mode branches */ - - dsf_bts_user, -#define BTS_USER (1 << dsf_bts_user) - /* trace user-mode branches */ - - dsf_bts_overflow, - dsf_bts_max, - dsf_pebs = dsf_bts_max, - - dsf_pebs_max, - dsf_ctl_max = dsf_pebs_max, - dsf_bts_timestamps = dsf_ctl_max, -#define BTS_TIMESTAMPS (1 << dsf_bts_timestamps) - /* add timestamps into BTS trace */ - -#define BTS_USER_FLAGS (BTS_KERNEL | BTS_USER | BTS_TIMESTAMPS) -}; - - -/* - * Request BTS or PEBS - * - * Due to alignement constraints, the actual buffer may be slightly - * smaller than the requested or provided buffer. - * - * Returns a pointer to a tracer structure on success, or - * ERR_PTR(errcode) on failure. - * - * The interrupt threshold is independent from the overflow callback - * to allow users to use their own overflow interrupt handling mechanism. - * - * The function might sleep. - * - * task: the task to request recording for - * cpu: the cpu to request recording for - * base: the base pointer for the (non-pageable) buffer; - * size: the size of the provided buffer in bytes - * ovfl: pointer to a function to be called on buffer overflow; - * NULL if cyclic buffer requested - * th: the interrupt threshold in records from the end of the buffer; - * -1 if no interrupt threshold is requested. - * flags: a bit-mask of the above flags - */ -extern struct bts_tracer *ds_request_bts_task(struct task_struct *task, - void *base, size_t size, - bts_ovfl_callback_t ovfl, - size_t th, unsigned int flags); -extern struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size, - bts_ovfl_callback_t ovfl, - size_t th, unsigned int flags); -extern struct pebs_tracer *ds_request_pebs_task(struct task_struct *task, - void *base, size_t size, - pebs_ovfl_callback_t ovfl, - size_t th, unsigned int flags); -extern struct pebs_tracer *ds_request_pebs_cpu(int cpu, - void *base, size_t size, - pebs_ovfl_callback_t ovfl, - size_t th, unsigned int flags); - -/* - * Release BTS or PEBS resources - * Suspend and resume BTS or PEBS tracing - * - * Must be called with irq's enabled. - * - * tracer: the tracer handle returned from ds_request_~() - */ -extern void ds_release_bts(struct bts_tracer *tracer); -extern void ds_suspend_bts(struct bts_tracer *tracer); -extern void ds_resume_bts(struct bts_tracer *tracer); -extern void ds_release_pebs(struct pebs_tracer *tracer); -extern void ds_suspend_pebs(struct pebs_tracer *tracer); -extern void ds_resume_pebs(struct pebs_tracer *tracer); - -/* - * Release BTS or PEBS resources - * Suspend and resume BTS or PEBS tracing - * - * Cpu tracers must call this on the traced cpu. - * Task tracers must call ds_release_~_noirq() for themselves. - * - * May be called with irq's disabled. - * - * Returns 0 if successful; - * -EPERM if the cpu tracer does not trace the current cpu. - * -EPERM if the task tracer does not trace itself. - * - * tracer: the tracer handle returned from ds_request_~() - */ -extern int ds_release_bts_noirq(struct bts_tracer *tracer); -extern int ds_suspend_bts_noirq(struct bts_tracer *tracer); -extern int ds_resume_bts_noirq(struct bts_tracer *tracer); -extern int ds_release_pebs_noirq(struct pebs_tracer *tracer); -extern int ds_suspend_pebs_noirq(struct pebs_tracer *tracer); -extern int ds_resume_pebs_noirq(struct pebs_tracer *tracer); - - -/* - * The raw DS buffer state as it is used for BTS and PEBS recording. - * - * This is the low-level, arch-dependent interface for working - * directly on the raw trace data. - */ -struct ds_trace { - /* the number of bts/pebs records */ - size_t n; - /* the size of a bts/pebs record in bytes */ - size_t size; - /* pointers into the raw buffer: - - to the first entry */ - void *begin; - /* - one beyond the last entry */ - void *end; - /* - one beyond the newest entry */ - void *top; - /* - the interrupt threshold */ - void *ith; - /* flags given on ds_request() */ - unsigned int flags; -}; - -/* - * An arch-independent view on branch trace data. - */ -enum bts_qualifier { - bts_invalid, -#define BTS_INVALID bts_invalid - - bts_branch, -#define BTS_BRANCH bts_branch - - bts_task_arrives, -#define BTS_TASK_ARRIVES bts_task_arrives - - bts_task_departs, -#define BTS_TASK_DEPARTS bts_task_departs - - bts_qual_bit_size = 4, - bts_qual_max = (1 << bts_qual_bit_size), -}; - -struct bts_struct { - __u64 qualifier; - union { - /* BTS_BRANCH */ - struct { - __u64 from; - __u64 to; - } lbr; - /* BTS_TASK_ARRIVES or BTS_TASK_DEPARTS */ - struct { - __u64 clock; - pid_t pid; - } event; - } variant; -}; - - -/* - * The BTS state. - * - * This gives access to the raw DS state and adds functions to provide - * an arch-independent view of the BTS data. - */ -struct bts_trace { - struct ds_trace ds; - - int (*read)(struct bts_tracer *tracer, const void *at, - struct bts_struct *out); - int (*write)(struct bts_tracer *tracer, const struct bts_struct *in); -}; - - -/* - * The PEBS state. - * - * This gives access to the raw DS state and the PEBS-specific counter - * reset value. - */ -struct pebs_trace { - struct ds_trace ds; - - /* the number of valid counters in the below array */ - unsigned int counters; - -#define MAX_PEBS_COUNTERS 4 - /* the counter reset value */ - unsigned long long counter_reset[MAX_PEBS_COUNTERS]; -}; - - -/* - * Read the BTS or PEBS trace. - * - * Returns a view on the trace collected for the parameter tracer. - * - * The view remains valid as long as the traced task is not running or - * the tracer is suspended. - * Writes into the trace buffer are not reflected. - * - * tracer: the tracer handle returned from ds_request_~() - */ -extern const struct bts_trace *ds_read_bts(struct bts_tracer *tracer); -extern const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer); - - -/* - * Reset the write pointer of the BTS/PEBS buffer. - * - * Returns 0 on success; -Eerrno on error - * - * tracer: the tracer handle returned from ds_request_~() - */ -extern int ds_reset_bts(struct bts_tracer *tracer); -extern int ds_reset_pebs(struct pebs_tracer *tracer); - -/* - * Set the PEBS counter reset value. - * - * Returns 0 on success; -Eerrno on error - * - * tracer: the tracer handle returned from ds_request_pebs() - * counter: the index of the counter - * value: the new counter reset value - */ -extern int ds_set_pebs_reset(struct pebs_tracer *tracer, - unsigned int counter, u64 value); - -/* - * Initialization - */ -struct cpuinfo_x86; -extern void __cpuinit ds_init_intel(struct cpuinfo_x86 *); - -/* - * Context switch work - */ -extern void ds_switch_to(struct task_struct *prev, struct task_struct *next); - -#else /* CONFIG_X86_DS */ - -struct cpuinfo_x86; -static inline void __cpuinit ds_init_intel(struct cpuinfo_x86 *ignored) {} -static inline void ds_switch_to(struct task_struct *prev, - struct task_struct *next) {} - -#endif /* CONFIG_X86_DS */ -#endif /* _ASM_X86_DS_H */ diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index b753ea5..5bec21a 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -21,7 +21,6 @@ struct mm_struct; #include #include #include -#include #include #include @@ -29,6 +28,7 @@ struct mm_struct; #include #include #include +#include #define HBP_NUM 4 /* @@ -473,10 +473,6 @@ struct thread_struct { unsigned long iopl; /* Max allowed port in the bitmap, in bytes: */ unsigned io_bitmap_max; -/* MSR_IA32_DEBUGCTLMSR value to switch in if TIF_DEBUGCTLMSR is set. */ - unsigned long debugctlmsr; - /* Debug Store context; see asm/ds.h */ - struct ds_context *ds_ctx; }; static inline unsigned long native_get_debugreg(int regno) @@ -814,21 +810,6 @@ static inline unsigned long get_debugctlmsr(void) return debugctlmsr; } -static inline unsigned long get_debugctlmsr_on_cpu(int cpu) -{ - u64 debugctlmsr = 0; - u32 val1, val2; - -#ifndef CONFIG_X86_DEBUGCTLMSR - if (boot_cpu_data.x86 < 6) - return 0; -#endif - rdmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, &val1, &val2); - debugctlmsr = val1 | ((u64)val2 << 32); - - return debugctlmsr; -} - static inline void update_debugctlmsr(unsigned long debugctlmsr) { #ifndef CONFIG_X86_DEBUGCTLMSR @@ -838,18 +819,6 @@ static inline void update_debugctlmsr(unsigned long debugctlmsr) wrmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); } -static inline void update_debugctlmsr_on_cpu(int cpu, - unsigned long debugctlmsr) -{ -#ifndef CONFIG_X86_DEBUGCTLMSR - if (boot_cpu_data.x86 < 6) - return; -#endif - wrmsr_on_cpu(cpu, MSR_IA32_DEBUGCTLMSR, - (u32)((u64)debugctlmsr), - (u32)((u64)debugctlmsr >> 32)); -} - /* * from system description table in BIOS. Mostly for MCA use, but * others may find it useful: diff --git a/arch/x86/include/asm/ptrace-abi.h b/arch/x86/include/asm/ptrace-abi.h index 8672303..52b098a 100644 --- a/arch/x86/include/asm/ptrace-abi.h +++ b/arch/x86/include/asm/ptrace-abi.h @@ -82,61 +82,6 @@ #ifndef __ASSEMBLY__ #include - -/* configuration/status structure used in PTRACE_BTS_CONFIG and - PTRACE_BTS_STATUS commands. -*/ -struct ptrace_bts_config { - /* requested or actual size of BTS buffer in bytes */ - __u32 size; - /* bitmask of below flags */ - __u32 flags; - /* buffer overflow signal */ - __u32 signal; - /* actual size of bts_struct in bytes */ - __u32 bts_size; -}; -#endif /* __ASSEMBLY__ */ - -#define PTRACE_BTS_O_TRACE 0x1 /* branch trace */ -#define PTRACE_BTS_O_SCHED 0x2 /* scheduling events w/ jiffies */ -#define PTRACE_BTS_O_SIGNAL 0x4 /* send SIG on buffer overflow - instead of wrapping around */ -#define PTRACE_BTS_O_ALLOC 0x8 /* (re)allocate buffer */ - -#define PTRACE_BTS_CONFIG 40 -/* Configure branch trace recording. - ADDR points to a struct ptrace_bts_config. - DATA gives the size of that buffer. - A new buffer is allocated, if requested in the flags. - An overflow signal may only be requested for new buffers. - Returns the number of bytes read. -*/ -#define PTRACE_BTS_STATUS 41 -/* Return the current configuration in a struct ptrace_bts_config - pointed to by ADDR; DATA gives the size of that buffer. - Returns the number of bytes written. -*/ -#define PTRACE_BTS_SIZE 42 -/* Return the number of available BTS records for draining. - DATA and ADDR are ignored. -*/ -#define PTRACE_BTS_GET 43 -/* Get a single BTS record. - DATA defines the index into the BTS array, where 0 is the newest - entry, and higher indices refer to older entries. - ADDR is pointing to struct bts_struct (see asm/ds.h). -*/ -#define PTRACE_BTS_CLEAR 44 -/* Clear the BTS buffer. - DATA and ADDR are ignored. -*/ -#define PTRACE_BTS_DRAIN 45 -/* Read all available BTS records and clear the buffer. - ADDR points to an array of struct bts_struct. - DATA gives the size of that buffer. - BTS records are read from oldest to newest. - Returns number of BTS records drained. -*/ +#endif #endif /* _ASM_X86_PTRACE_ABI_H */ diff --git a/arch/x86/include/asm/ptrace.h b/arch/x86/include/asm/ptrace.h index 69a686a..78cd1ea 100644 --- a/arch/x86/include/asm/ptrace.h +++ b/arch/x86/include/asm/ptrace.h @@ -289,12 +289,6 @@ extern int do_get_thread_area(struct task_struct *p, int idx, extern int do_set_thread_area(struct task_struct *p, int idx, struct user_desc __user *info, int can_allocate); -#ifdef CONFIG_X86_PTRACE_BTS -extern void ptrace_bts_untrace(struct task_struct *tsk); - -#define arch_ptrace_untrace(tsk) ptrace_bts_untrace(tsk) -#endif /* CONFIG_X86_PTRACE_BTS */ - #endif /* __KERNEL__ */ #endif /* !__ASSEMBLY__ */ diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index e0d2890..dc85e12 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -92,8 +92,6 @@ struct thread_info { #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ #define TIF_FREEZE 23 /* is freezing for suspend */ #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ -#define TIF_DEBUGCTLMSR 25 /* uses thread_struct.debugctlmsr */ -#define TIF_DS_AREA_MSR 26 /* uses thread_struct.ds_area_msr */ #define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ #define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */ @@ -115,8 +113,6 @@ struct thread_info { #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) #define _TIF_FREEZE (1 << TIF_FREEZE) #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) -#define _TIF_DEBUGCTLMSR (1 << TIF_DEBUGCTLMSR) -#define _TIF_DS_AREA_MSR (1 << TIF_DS_AREA_MSR) #define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) @@ -147,7 +143,7 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_DEBUGCTLMSR|_TIF_DS_AREA_MSR|_TIF_NOTSC) + (_TIF_IO_BITMAP|_TIF_NOTSC) #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) diff --git a/arch/x86/kernel/Makefile b/arch/x86/kernel/Makefile index 4c58352..e77b220 100644 --- a/arch/x86/kernel/Makefile +++ b/arch/x86/kernel/Makefile @@ -47,8 +47,6 @@ obj-$(CONFIG_X86_TRAMPOLINE) += trampoline.o obj-y += process.o obj-y += i387.o xsave.o obj-y += ptrace.o -obj-$(CONFIG_X86_DS) += ds.o -obj-$(CONFIG_X86_DS_SELFTEST) += ds_selftest.o obj-$(CONFIG_X86_32) += tls.o obj-$(CONFIG_IA32_EMULATION) += tls.o obj-y += step.o diff --git a/arch/x86/kernel/cpu/intel.c b/arch/x86/kernel/cpu/intel.c index 7e1cca1..d72377c 100644 --- a/arch/x86/kernel/cpu/intel.c +++ b/arch/x86/kernel/cpu/intel.c @@ -12,7 +12,6 @@ #include #include #include -#include #include #include @@ -367,7 +366,6 @@ static void __cpuinit init_intel(struct cpuinfo_x86 *c) set_cpu_cap(c, X86_FEATURE_BTS); if (!(l1 & (1<<12))) set_cpu_cap(c, X86_FEATURE_PEBS); - ds_init_intel(c); } if (c->x86 == 6 && c->x86_model == 29 && cpu_has_clflush) diff --git a/arch/x86/kernel/ds.c b/arch/x86/kernel/ds.c deleted file mode 100644 index 1c47390..0000000 --- a/arch/x86/kernel/ds.c +++ /dev/null @@ -1,1437 +0,0 @@ -/* - * Debug Store support - * - * This provides a low-level interface to the hardware's Debug Store - * feature that is used for branch trace store (BTS) and - * precise-event based sampling (PEBS). - * - * It manages: - * - DS and BTS hardware configuration - * - buffer overflow handling (to be done) - * - buffer access - * - * It does not do: - * - security checking (is the caller allowed to trace the task) - * - buffer allocation (memory accounting) - * - * - * Copyright (C) 2007-2009 Intel Corporation. - * Markus Metzger , 2007-2009 - */ - -#include -#include -#include -#include -#include -#include -#include - -#include - -#include "ds_selftest.h" - -/* - * The configuration for a particular DS hardware implementation: - */ -struct ds_configuration { - /* The name of the configuration: */ - const char *name; - - /* The size of pointer-typed fields in DS, BTS, and PEBS: */ - unsigned char sizeof_ptr_field; - - /* The size of a BTS/PEBS record in bytes: */ - unsigned char sizeof_rec[2]; - - /* The number of pebs counter reset values in the DS structure. */ - unsigned char nr_counter_reset; - - /* Control bit-masks indexed by enum ds_feature: */ - unsigned long ctl[dsf_ctl_max]; -}; -static struct ds_configuration ds_cfg __read_mostly; - - -/* Maximal size of a DS configuration: */ -#define MAX_SIZEOF_DS 0x80 - -/* Maximal size of a BTS record: */ -#define MAX_SIZEOF_BTS (3 * 8) - -/* BTS and PEBS buffer alignment: */ -#define DS_ALIGNMENT (1 << 3) - -/* Number of buffer pointers in DS: */ -#define NUM_DS_PTR_FIELDS 8 - -/* Size of a pebs reset value in DS: */ -#define PEBS_RESET_FIELD_SIZE 8 - -/* Mask of control bits in the DS MSR register: */ -#define BTS_CONTROL \ - ( ds_cfg.ctl[dsf_bts] | \ - ds_cfg.ctl[dsf_bts_kernel] | \ - ds_cfg.ctl[dsf_bts_user] | \ - ds_cfg.ctl[dsf_bts_overflow] ) - -/* - * A BTS or PEBS tracer. - * - * This holds the configuration of the tracer and serves as a handle - * to identify tracers. - */ -struct ds_tracer { - /* The DS context (partially) owned by this tracer. */ - struct ds_context *context; - /* The buffer provided on ds_request() and its size in bytes. */ - void *buffer; - size_t size; -}; - -struct bts_tracer { - /* The common DS part: */ - struct ds_tracer ds; - - /* The trace including the DS configuration: */ - struct bts_trace trace; - - /* Buffer overflow notification function: */ - bts_ovfl_callback_t ovfl; - - /* Active flags affecting trace collection. */ - unsigned int flags; -}; - -struct pebs_tracer { - /* The common DS part: */ - struct ds_tracer ds; - - /* The trace including the DS configuration: */ - struct pebs_trace trace; - - /* Buffer overflow notification function: */ - pebs_ovfl_callback_t ovfl; -}; - -/* - * Debug Store (DS) save area configuration (see Intel64 and IA32 - * Architectures Software Developer's Manual, section 18.5) - * - * The DS configuration consists of the following fields; different - * architetures vary in the size of those fields. - * - * - double-word aligned base linear address of the BTS buffer - * - write pointer into the BTS buffer - * - end linear address of the BTS buffer (one byte beyond the end of - * the buffer) - * - interrupt pointer into BTS buffer - * (interrupt occurs when write pointer passes interrupt pointer) - * - double-word aligned base linear address of the PEBS buffer - * - write pointer into the PEBS buffer - * - end linear address of the PEBS buffer (one byte beyond the end of - * the buffer) - * - interrupt pointer into PEBS buffer - * (interrupt occurs when write pointer passes interrupt pointer) - * - value to which counter is reset following counter overflow - * - * Later architectures use 64bit pointers throughout, whereas earlier - * architectures use 32bit pointers in 32bit mode. - * - * - * We compute the base address for the first 8 fields based on: - * - the field size stored in the DS configuration - * - the relative field position - * - an offset giving the start of the respective region - * - * This offset is further used to index various arrays holding - * information for BTS and PEBS at the respective index. - * - * On later 32bit processors, we only access the lower 32bit of the - * 64bit pointer fields. The upper halves will be zeroed out. - */ - -enum ds_field { - ds_buffer_base = 0, - ds_index, - ds_absolute_maximum, - ds_interrupt_threshold, -}; - -enum ds_qualifier { - ds_bts = 0, - ds_pebs -}; - -static inline unsigned long -ds_get(const unsigned char *base, enum ds_qualifier qual, enum ds_field field) -{ - base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual))); - return *(unsigned long *)base; -} - -static inline void -ds_set(unsigned char *base, enum ds_qualifier qual, enum ds_field field, - unsigned long value) -{ - base += (ds_cfg.sizeof_ptr_field * (field + (4 * qual))); - (*(unsigned long *)base) = value; -} - - -/* - * Locking is done only for allocating BTS or PEBS resources. - */ -static DEFINE_SPINLOCK(ds_lock); - -/* - * We either support (system-wide) per-cpu or per-thread allocation. - * We distinguish the two based on the task_struct pointer, where a - * NULL pointer indicates per-cpu allocation for the current cpu. - * - * Allocations are use-counted. As soon as resources are allocated, - * further allocations must be of the same type (per-cpu or - * per-thread). We model this by counting allocations (i.e. the number - * of tracers of a certain type) for one type negatively: - * =0 no tracers - * >0 number of per-thread tracers - * <0 number of per-cpu tracers - * - * Tracers essentially gives the number of ds contexts for a certain - * type of allocation. - */ -static atomic_t tracers = ATOMIC_INIT(0); - -static inline int get_tracer(struct task_struct *task) -{ - int error; - - spin_lock_irq(&ds_lock); - - if (task) { - error = -EPERM; - if (atomic_read(&tracers) < 0) - goto out; - atomic_inc(&tracers); - } else { - error = -EPERM; - if (atomic_read(&tracers) > 0) - goto out; - atomic_dec(&tracers); - } - - error = 0; -out: - spin_unlock_irq(&ds_lock); - return error; -} - -static inline void put_tracer(struct task_struct *task) -{ - if (task) - atomic_dec(&tracers); - else - atomic_inc(&tracers); -} - -/* - * The DS context is either attached to a thread or to a cpu: - * - in the former case, the thread_struct contains a pointer to the - * attached context. - * - in the latter case, we use a static array of per-cpu context - * pointers. - * - * Contexts are use-counted. They are allocated on first access and - * deallocated when the last user puts the context. - */ -struct ds_context { - /* The DS configuration; goes into MSR_IA32_DS_AREA: */ - unsigned char ds[MAX_SIZEOF_DS]; - - /* The owner of the BTS and PEBS configuration, respectively: */ - struct bts_tracer *bts_master; - struct pebs_tracer *pebs_master; - - /* Use count: */ - unsigned long count; - - /* Pointer to the context pointer field: */ - struct ds_context **this; - - /* The traced task; NULL for cpu tracing: */ - struct task_struct *task; - - /* The traced cpu; only valid if task is NULL: */ - int cpu; -}; - -static DEFINE_PER_CPU(struct ds_context *, cpu_ds_context); - - -static struct ds_context *ds_get_context(struct task_struct *task, int cpu) -{ - struct ds_context **p_context = - (task ? &task->thread.ds_ctx : &per_cpu(cpu_ds_context, cpu)); - struct ds_context *context = NULL; - struct ds_context *new_context = NULL; - - /* Chances are small that we already have a context. */ - new_context = kzalloc(sizeof(*new_context), GFP_KERNEL); - if (!new_context) - return NULL; - - spin_lock_irq(&ds_lock); - - context = *p_context; - if (likely(!context)) { - context = new_context; - - context->this = p_context; - context->task = task; - context->cpu = cpu; - context->count = 0; - - *p_context = context; - } - - context->count++; - - spin_unlock_irq(&ds_lock); - - if (context != new_context) - kfree(new_context); - - return context; -} - -static void ds_put_context(struct ds_context *context) -{ - struct task_struct *task; - unsigned long irq; - - if (!context) - return; - - spin_lock_irqsave(&ds_lock, irq); - - if (--context->count) { - spin_unlock_irqrestore(&ds_lock, irq); - return; - } - - *(context->this) = NULL; - - task = context->task; - - if (task) - clear_tsk_thread_flag(task, TIF_DS_AREA_MSR); - - /* - * We leave the (now dangling) pointer to the DS configuration in - * the DS_AREA msr. This is as good or as bad as replacing it with - * NULL - the hardware would crash if we enabled tracing. - * - * This saves us some problems with having to write an msr on a - * different cpu while preventing others from doing the same for the - * next context for that same cpu. - */ - - spin_unlock_irqrestore(&ds_lock, irq); - - /* The context might still be in use for context switching. */ - if (task && (task != current)) - wait_task_context_switch(task); - - kfree(context); -} - -static void ds_install_ds_area(struct ds_context *context) -{ - unsigned long ds; - - ds = (unsigned long)context->ds; - - /* - * There is a race between the bts master and the pebs master. - * - * The thread/cpu access is synchronized via get/put_cpu() for - * task tracing and via wrmsr_on_cpu for cpu tracing. - * - * If bts and pebs are collected for the same task or same cpu, - * the same confiuration is written twice. - */ - if (context->task) { - get_cpu(); - if (context->task == current) - wrmsrl(MSR_IA32_DS_AREA, ds); - set_tsk_thread_flag(context->task, TIF_DS_AREA_MSR); - put_cpu(); - } else - wrmsr_on_cpu(context->cpu, MSR_IA32_DS_AREA, - (u32)((u64)ds), (u32)((u64)ds >> 32)); -} - -/* - * Call the tracer's callback on a buffer overflow. - * - * context: the ds context - * qual: the buffer type - */ -static void ds_overflow(struct ds_context *context, enum ds_qualifier qual) -{ - switch (qual) { - case ds_bts: - if (context->bts_master && - context->bts_master->ovfl) - context->bts_master->ovfl(context->bts_master); - break; - case ds_pebs: - if (context->pebs_master && - context->pebs_master->ovfl) - context->pebs_master->ovfl(context->pebs_master); - break; - } -} - - -/* - * Write raw data into the BTS or PEBS buffer. - * - * The remainder of any partially written record is zeroed out. - * - * context: the DS context - * qual: the buffer type - * record: the data to write - * size: the size of the data - */ -static int ds_write(struct ds_context *context, enum ds_qualifier qual, - const void *record, size_t size) -{ - int bytes_written = 0; - - if (!record) - return -EINVAL; - - while (size) { - unsigned long base, index, end, write_end, int_th; - unsigned long write_size, adj_write_size; - - /* - * Write as much as possible without producing an - * overflow interrupt. - * - * Interrupt_threshold must either be - * - bigger than absolute_maximum or - * - point to a record between buffer_base and absolute_maximum - * - * Index points to a valid record. - */ - base = ds_get(context->ds, qual, ds_buffer_base); - index = ds_get(context->ds, qual, ds_index); - end = ds_get(context->ds, qual, ds_absolute_maximum); - int_th = ds_get(context->ds, qual, ds_interrupt_threshold); - - write_end = min(end, int_th); - - /* - * If we are already beyond the interrupt threshold, - * we fill the entire buffer. - */ - if (write_end <= index) - write_end = end; - - if (write_end <= index) - break; - - write_size = min((unsigned long) size, write_end - index); - memcpy((void *)index, record, write_size); - - record = (const char *)record + write_size; - size -= write_size; - bytes_written += write_size; - - adj_write_size = write_size / ds_cfg.sizeof_rec[qual]; - adj_write_size *= ds_cfg.sizeof_rec[qual]; - - /* Zero out trailing bytes. */ - memset((char *)index + write_size, 0, - adj_write_size - write_size); - index += adj_write_size; - - if (index >= end) - index = base; - ds_set(context->ds, qual, ds_index, index); - - if (index >= int_th) - ds_overflow(context, qual); - } - - return bytes_written; -} - - -/* - * Branch Trace Store (BTS) uses the following format. Different - * architectures vary in the size of those fields. - * - source linear address - * - destination linear address - * - flags - * - * Later architectures use 64bit pointers throughout, whereas earlier - * architectures use 32bit pointers in 32bit mode. - * - * We compute the base address for the fields based on: - * - the field size stored in the DS configuration - * - the relative field position - * - * In order to store additional information in the BTS buffer, we use - * a special source address to indicate that the record requires - * special interpretation. - * - * Netburst indicated via a bit in the flags field whether the branch - * was predicted; this is ignored. - * - * We use two levels of abstraction: - * - the raw data level defined here - * - an arch-independent level defined in ds.h - */ - -enum bts_field { - bts_from, - bts_to, - bts_flags, - - bts_qual = bts_from, - bts_clock = bts_to, - bts_pid = bts_flags, - - bts_qual_mask = (bts_qual_max - 1), - bts_escape = ((unsigned long)-1 & ~bts_qual_mask) -}; - -static inline unsigned long bts_get(const char *base, unsigned long field) -{ - base += (ds_cfg.sizeof_ptr_field * field); - return *(unsigned long *)base; -} - -static inline void bts_set(char *base, unsigned long field, unsigned long val) -{ - base += (ds_cfg.sizeof_ptr_field * field); - (*(unsigned long *)base) = val; -} - - -/* - * The raw BTS data is architecture dependent. - * - * For higher-level users, we give an arch-independent view. - * - ds.h defines struct bts_struct - * - bts_read translates one raw bts record into a bts_struct - * - bts_write translates one bts_struct into the raw format and - * writes it into the top of the parameter tracer's buffer. - * - * return: bytes read/written on success; -Eerrno, otherwise - */ -static int -bts_read(struct bts_tracer *tracer, const void *at, struct bts_struct *out) -{ - if (!tracer) - return -EINVAL; - - if (at < tracer->trace.ds.begin) - return -EINVAL; - - if (tracer->trace.ds.end < (at + tracer->trace.ds.size)) - return -EINVAL; - - memset(out, 0, sizeof(*out)); - if ((bts_get(at, bts_qual) & ~bts_qual_mask) == bts_escape) { - out->qualifier = (bts_get(at, bts_qual) & bts_qual_mask); - out->variant.event.clock = bts_get(at, bts_clock); - out->variant.event.pid = bts_get(at, bts_pid); - } else { - out->qualifier = bts_branch; - out->variant.lbr.from = bts_get(at, bts_from); - out->variant.lbr.to = bts_get(at, bts_to); - - if (!out->variant.lbr.from && !out->variant.lbr.to) - out->qualifier = bts_invalid; - } - - return ds_cfg.sizeof_rec[ds_bts]; -} - -static int bts_write(struct bts_tracer *tracer, const struct bts_struct *in) -{ - unsigned char raw[MAX_SIZEOF_BTS]; - - if (!tracer) - return -EINVAL; - - if (MAX_SIZEOF_BTS < ds_cfg.sizeof_rec[ds_bts]) - return -EOVERFLOW; - - switch (in->qualifier) { - case bts_invalid: - bts_set(raw, bts_from, 0); - bts_set(raw, bts_to, 0); - bts_set(raw, bts_flags, 0); - break; - case bts_branch: - bts_set(raw, bts_from, in->variant.lbr.from); - bts_set(raw, bts_to, in->variant.lbr.to); - bts_set(raw, bts_flags, 0); - break; - case bts_task_arrives: - case bts_task_departs: - bts_set(raw, bts_qual, (bts_escape | in->qualifier)); - bts_set(raw, bts_clock, in->variant.event.clock); - bts_set(raw, bts_pid, in->variant.event.pid); - break; - default: - return -EINVAL; - } - - return ds_write(tracer->ds.context, ds_bts, raw, - ds_cfg.sizeof_rec[ds_bts]); -} - - -static void ds_write_config(struct ds_context *context, - struct ds_trace *cfg, enum ds_qualifier qual) -{ - unsigned char *ds = context->ds; - - ds_set(ds, qual, ds_buffer_base, (unsigned long)cfg->begin); - ds_set(ds, qual, ds_index, (unsigned long)cfg->top); - ds_set(ds, qual, ds_absolute_maximum, (unsigned long)cfg->end); - ds_set(ds, qual, ds_interrupt_threshold, (unsigned long)cfg->ith); -} - -static void ds_read_config(struct ds_context *context, - struct ds_trace *cfg, enum ds_qualifier qual) -{ - unsigned char *ds = context->ds; - - cfg->begin = (void *)ds_get(ds, qual, ds_buffer_base); - cfg->top = (void *)ds_get(ds, qual, ds_index); - cfg->end = (void *)ds_get(ds, qual, ds_absolute_maximum); - cfg->ith = (void *)ds_get(ds, qual, ds_interrupt_threshold); -} - -static void ds_init_ds_trace(struct ds_trace *trace, enum ds_qualifier qual, - void *base, size_t size, size_t ith, - unsigned int flags) { - unsigned long buffer, adj; - - /* - * Adjust the buffer address and size to meet alignment - * constraints: - * - buffer is double-word aligned - * - size is multiple of record size - * - * We checked the size at the very beginning; we have enough - * space to do the adjustment. - */ - buffer = (unsigned long)base; - - adj = ALIGN(buffer, DS_ALIGNMENT) - buffer; - buffer += adj; - size -= adj; - - trace->n = size / ds_cfg.sizeof_rec[qual]; - trace->size = ds_cfg.sizeof_rec[qual]; - - size = (trace->n * trace->size); - - trace->begin = (void *)buffer; - trace->top = trace->begin; - trace->end = (void *)(buffer + size); - /* - * The value for 'no threshold' is -1, which will set the - * threshold outside of the buffer, just like we want it. - */ - ith *= ds_cfg.sizeof_rec[qual]; - trace->ith = (void *)(buffer + size - ith); - - trace->flags = flags; -} - - -static int ds_request(struct ds_tracer *tracer, struct ds_trace *trace, - enum ds_qualifier qual, struct task_struct *task, - int cpu, void *base, size_t size, size_t th) -{ - struct ds_context *context; - int error; - size_t req_size; - - error = -EOPNOTSUPP; - if (!ds_cfg.sizeof_rec[qual]) - goto out; - - error = -EINVAL; - if (!base) - goto out; - - req_size = ds_cfg.sizeof_rec[qual]; - /* We might need space for alignment adjustments. */ - if (!IS_ALIGNED((unsigned long)base, DS_ALIGNMENT)) - req_size += DS_ALIGNMENT; - - error = -EINVAL; - if (size < req_size) - goto out; - - if (th != (size_t)-1) { - th *= ds_cfg.sizeof_rec[qual]; - - error = -EINVAL; - if (size <= th) - goto out; - } - - tracer->buffer = base; - tracer->size = size; - - error = -ENOMEM; - context = ds_get_context(task, cpu); - if (!context) - goto out; - tracer->context = context; - - /* - * Defer any tracer-specific initialization work for the context until - * context ownership has been clarified. - */ - - error = 0; - out: - return error; -} - -static struct bts_tracer *ds_request_bts(struct task_struct *task, int cpu, - void *base, size_t size, - bts_ovfl_callback_t ovfl, size_t th, - unsigned int flags) -{ - struct bts_tracer *tracer; - int error; - - /* Buffer overflow notification is not yet implemented. */ - error = -EOPNOTSUPP; - if (ovfl) - goto out; - - error = get_tracer(task); - if (error < 0) - goto out; - - error = -ENOMEM; - tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); - if (!tracer) - goto out_put_tracer; - tracer->ovfl = ovfl; - - /* Do some more error checking and acquire a tracing context. */ - error = ds_request(&tracer->ds, &tracer->trace.ds, - ds_bts, task, cpu, base, size, th); - if (error < 0) - goto out_tracer; - - /* Claim the bts part of the tracing context we acquired above. */ - spin_lock_irq(&ds_lock); - - error = -EPERM; - if (tracer->ds.context->bts_master) - goto out_unlock; - tracer->ds.context->bts_master = tracer; - - spin_unlock_irq(&ds_lock); - - /* - * Now that we own the bts part of the context, let's complete the - * initialization for that part. - */ - ds_init_ds_trace(&tracer->trace.ds, ds_bts, base, size, th, flags); - ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_bts); - ds_install_ds_area(tracer->ds.context); - - tracer->trace.read = bts_read; - tracer->trace.write = bts_write; - - /* Start tracing. */ - ds_resume_bts(tracer); - - return tracer; - - out_unlock: - spin_unlock_irq(&ds_lock); - ds_put_context(tracer->ds.context); - out_tracer: - kfree(tracer); - out_put_tracer: - put_tracer(task); - out: - return ERR_PTR(error); -} - -struct bts_tracer *ds_request_bts_task(struct task_struct *task, - void *base, size_t size, - bts_ovfl_callback_t ovfl, - size_t th, unsigned int flags) -{ - return ds_request_bts(task, 0, base, size, ovfl, th, flags); -} - -struct bts_tracer *ds_request_bts_cpu(int cpu, void *base, size_t size, - bts_ovfl_callback_t ovfl, - size_t th, unsigned int flags) -{ - return ds_request_bts(NULL, cpu, base, size, ovfl, th, flags); -} - -static struct pebs_tracer *ds_request_pebs(struct task_struct *task, int cpu, - void *base, size_t size, - pebs_ovfl_callback_t ovfl, size_t th, - unsigned int flags) -{ - struct pebs_tracer *tracer; - int error; - - /* Buffer overflow notification is not yet implemented. */ - error = -EOPNOTSUPP; - if (ovfl) - goto out; - - error = get_tracer(task); - if (error < 0) - goto out; - - error = -ENOMEM; - tracer = kzalloc(sizeof(*tracer), GFP_KERNEL); - if (!tracer) - goto out_put_tracer; - tracer->ovfl = ovfl; - - /* Do some more error checking and acquire a tracing context. */ - error = ds_request(&tracer->ds, &tracer->trace.ds, - ds_pebs, task, cpu, base, size, th); - if (error < 0) - goto out_tracer; - - /* Claim the pebs part of the tracing context we acquired above. */ - spin_lock_irq(&ds_lock); - - error = -EPERM; - if (tracer->ds.context->pebs_master) - goto out_unlock; - tracer->ds.context->pebs_master = tracer; - - spin_unlock_irq(&ds_lock); - - /* - * Now that we own the pebs part of the context, let's complete the - * initialization for that part. - */ - ds_init_ds_trace(&tracer->trace.ds, ds_pebs, base, size, th, flags); - ds_write_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); - ds_install_ds_area(tracer->ds.context); - - /* Start tracing. */ - ds_resume_pebs(tracer); - - return tracer; - - out_unlock: - spin_unlock_irq(&ds_lock); - ds_put_context(tracer->ds.context); - out_tracer: - kfree(tracer); - out_put_tracer: - put_tracer(task); - out: - return ERR_PTR(error); -} - -struct pebs_tracer *ds_request_pebs_task(struct task_struct *task, - void *base, size_t size, - pebs_ovfl_callback_t ovfl, - size_t th, unsigned int flags) -{ - return ds_request_pebs(task, 0, base, size, ovfl, th, flags); -} - -struct pebs_tracer *ds_request_pebs_cpu(int cpu, void *base, size_t size, - pebs_ovfl_callback_t ovfl, - size_t th, unsigned int flags) -{ - return ds_request_pebs(NULL, cpu, base, size, ovfl, th, flags); -} - -static void ds_free_bts(struct bts_tracer *tracer) -{ - struct task_struct *task; - - task = tracer->ds.context->task; - - WARN_ON_ONCE(tracer->ds.context->bts_master != tracer); - tracer->ds.context->bts_master = NULL; - - /* Make sure tracing stopped and the tracer is not in use. */ - if (task && (task != current)) - wait_task_context_switch(task); - - ds_put_context(tracer->ds.context); - put_tracer(task); - - kfree(tracer); -} - -void ds_release_bts(struct bts_tracer *tracer) -{ - might_sleep(); - - if (!tracer) - return; - - ds_suspend_bts(tracer); - ds_free_bts(tracer); -} - -int ds_release_bts_noirq(struct bts_tracer *tracer) -{ - struct task_struct *task; - unsigned long irq; - int error; - - if (!tracer) - return 0; - - task = tracer->ds.context->task; - - local_irq_save(irq); - - error = -EPERM; - if (!task && - (tracer->ds.context->cpu != smp_processor_id())) - goto out; - - error = -EPERM; - if (task && (task != current)) - goto out; - - ds_suspend_bts_noirq(tracer); - ds_free_bts(tracer); - - error = 0; - out: - local_irq_restore(irq); - return error; -} - -static void update_task_debugctlmsr(struct task_struct *task, - unsigned long debugctlmsr) -{ - task->thread.debugctlmsr = debugctlmsr; - - get_cpu(); - if (task == current) - update_debugctlmsr(debugctlmsr); - put_cpu(); -} - -void ds_suspend_bts(struct bts_tracer *tracer) -{ - struct task_struct *task; - unsigned long debugctlmsr; - int cpu; - - if (!tracer) - return; - - tracer->flags = 0; - - task = tracer->ds.context->task; - cpu = tracer->ds.context->cpu; - - WARN_ON(!task && irqs_disabled()); - - debugctlmsr = (task ? - task->thread.debugctlmsr : - get_debugctlmsr_on_cpu(cpu)); - debugctlmsr &= ~BTS_CONTROL; - - if (task) - update_task_debugctlmsr(task, debugctlmsr); - else - update_debugctlmsr_on_cpu(cpu, debugctlmsr); -} - -int ds_suspend_bts_noirq(struct bts_tracer *tracer) -{ - struct task_struct *task; - unsigned long debugctlmsr, irq; - int cpu, error = 0; - - if (!tracer) - return 0; - - tracer->flags = 0; - - task = tracer->ds.context->task; - cpu = tracer->ds.context->cpu; - - local_irq_save(irq); - - error = -EPERM; - if (!task && (cpu != smp_processor_id())) - goto out; - - debugctlmsr = (task ? - task->thread.debugctlmsr : - get_debugctlmsr()); - debugctlmsr &= ~BTS_CONTROL; - - if (task) - update_task_debugctlmsr(task, debugctlmsr); - else - update_debugctlmsr(debugctlmsr); - - error = 0; - out: - local_irq_restore(irq); - return error; -} - -static unsigned long ds_bts_control(struct bts_tracer *tracer) -{ - unsigned long control; - - control = ds_cfg.ctl[dsf_bts]; - if (!(tracer->trace.ds.flags & BTS_KERNEL)) - control |= ds_cfg.ctl[dsf_bts_kernel]; - if (!(tracer->trace.ds.flags & BTS_USER)) - control |= ds_cfg.ctl[dsf_bts_user]; - - return control; -} - -void ds_resume_bts(struct bts_tracer *tracer) -{ - struct task_struct *task; - unsigned long debugctlmsr; - int cpu; - - if (!tracer) - return; - - tracer->flags = tracer->trace.ds.flags; - - task = tracer->ds.context->task; - cpu = tracer->ds.context->cpu; - - WARN_ON(!task && irqs_disabled()); - - debugctlmsr = (task ? - task->thread.debugctlmsr : - get_debugctlmsr_on_cpu(cpu)); - debugctlmsr |= ds_bts_control(tracer); - - if (task) - update_task_debugctlmsr(task, debugctlmsr); - else - update_debugctlmsr_on_cpu(cpu, debugctlmsr); -} - -int ds_resume_bts_noirq(struct bts_tracer *tracer) -{ - struct task_struct *task; - unsigned long debugctlmsr, irq; - int cpu, error = 0; - - if (!tracer) - return 0; - - tracer->flags = tracer->trace.ds.flags; - - task = tracer->ds.context->task; - cpu = tracer->ds.context->cpu; - - local_irq_save(irq); - - error = -EPERM; - if (!task && (cpu != smp_processor_id())) - goto out; - - debugctlmsr = (task ? - task->thread.debugctlmsr : - get_debugctlmsr()); - debugctlmsr |= ds_bts_control(tracer); - - if (task) - update_task_debugctlmsr(task, debugctlmsr); - else - update_debugctlmsr(debugctlmsr); - - error = 0; - out: - local_irq_restore(irq); - return error; -} - -static void ds_free_pebs(struct pebs_tracer *tracer) -{ - struct task_struct *task; - - task = tracer->ds.context->task; - - WARN_ON_ONCE(tracer->ds.context->pebs_master != tracer); - tracer->ds.context->pebs_master = NULL; - - ds_put_context(tracer->ds.context); - put_tracer(task); - - kfree(tracer); -} - -void ds_release_pebs(struct pebs_tracer *tracer) -{ - might_sleep(); - - if (!tracer) - return; - - ds_suspend_pebs(tracer); - ds_free_pebs(tracer); -} - -int ds_release_pebs_noirq(struct pebs_tracer *tracer) -{ - struct task_struct *task; - unsigned long irq; - int error; - - if (!tracer) - return 0; - - task = tracer->ds.context->task; - - local_irq_save(irq); - - error = -EPERM; - if (!task && - (tracer->ds.context->cpu != smp_processor_id())) - goto out; - - error = -EPERM; - if (task && (task != current)) - goto out; - - ds_suspend_pebs_noirq(tracer); - ds_free_pebs(tracer); - - error = 0; - out: - local_irq_restore(irq); - return error; -} - -void ds_suspend_pebs(struct pebs_tracer *tracer) -{ - -} - -int ds_suspend_pebs_noirq(struct pebs_tracer *tracer) -{ - return 0; -} - -void ds_resume_pebs(struct pebs_tracer *tracer) -{ - -} - -int ds_resume_pebs_noirq(struct pebs_tracer *tracer) -{ - return 0; -} - -const struct bts_trace *ds_read_bts(struct bts_tracer *tracer) -{ - if (!tracer) - return NULL; - - ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_bts); - return &tracer->trace; -} - -const struct pebs_trace *ds_read_pebs(struct pebs_tracer *tracer) -{ - if (!tracer) - return NULL; - - ds_read_config(tracer->ds.context, &tracer->trace.ds, ds_pebs); - - tracer->trace.counters = ds_cfg.nr_counter_reset; - memcpy(tracer->trace.counter_reset, - tracer->ds.context->ds + - (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field), - ds_cfg.nr_counter_reset * PEBS_RESET_FIELD_SIZE); - - return &tracer->trace; -} - -int ds_reset_bts(struct bts_tracer *tracer) -{ - if (!tracer) - return -EINVAL; - - tracer->trace.ds.top = tracer->trace.ds.begin; - - ds_set(tracer->ds.context->ds, ds_bts, ds_index, - (unsigned long)tracer->trace.ds.top); - - return 0; -} - -int ds_reset_pebs(struct pebs_tracer *tracer) -{ - if (!tracer) - return -EINVAL; - - tracer->trace.ds.top = tracer->trace.ds.begin; - - ds_set(tracer->ds.context->ds, ds_pebs, ds_index, - (unsigned long)tracer->trace.ds.top); - - return 0; -} - -int ds_set_pebs_reset(struct pebs_tracer *tracer, - unsigned int counter, u64 value) -{ - if (!tracer) - return -EINVAL; - - if (ds_cfg.nr_counter_reset < counter) - return -EINVAL; - - *(u64 *)(tracer->ds.context->ds + - (NUM_DS_PTR_FIELDS * ds_cfg.sizeof_ptr_field) + - (counter * PEBS_RESET_FIELD_SIZE)) = value; - - return 0; -} - -static const struct ds_configuration ds_cfg_netburst = { - .name = "Netburst", - .ctl[dsf_bts] = (1 << 2) | (1 << 3), - .ctl[dsf_bts_kernel] = (1 << 5), - .ctl[dsf_bts_user] = (1 << 6), - .nr_counter_reset = 1, -}; -static const struct ds_configuration ds_cfg_pentium_m = { - .name = "Pentium M", - .ctl[dsf_bts] = (1 << 6) | (1 << 7), - .nr_counter_reset = 1, -}; -static const struct ds_configuration ds_cfg_core2_atom = { - .name = "Core 2/Atom", - .ctl[dsf_bts] = (1 << 6) | (1 << 7), - .ctl[dsf_bts_kernel] = (1 << 9), - .ctl[dsf_bts_user] = (1 << 10), - .nr_counter_reset = 1, -}; -static const struct ds_configuration ds_cfg_core_i7 = { - .name = "Core i7", - .ctl[dsf_bts] = (1 << 6) | (1 << 7), - .ctl[dsf_bts_kernel] = (1 << 9), - .ctl[dsf_bts_user] = (1 << 10), - .nr_counter_reset = 4, -}; - -static void -ds_configure(const struct ds_configuration *cfg, - struct cpuinfo_x86 *cpu) -{ - unsigned long nr_pebs_fields = 0; - - printk(KERN_INFO "[ds] using %s configuration\n", cfg->name); - -#ifdef __i386__ - nr_pebs_fields = 10; -#else - nr_pebs_fields = 18; -#endif - - /* - * Starting with version 2, architectural performance - * monitoring supports a format specifier. - */ - if ((cpuid_eax(0xa) & 0xff) > 1) { - unsigned long perf_capabilities, format; - - rdmsrl(MSR_IA32_PERF_CAPABILITIES, perf_capabilities); - - format = (perf_capabilities >> 8) & 0xf; - - switch (format) { - case 0: - nr_pebs_fields = 18; - break; - case 1: - nr_pebs_fields = 22; - break; - default: - printk(KERN_INFO - "[ds] unknown PEBS format: %lu\n", format); - nr_pebs_fields = 0; - break; - } - } - - memset(&ds_cfg, 0, sizeof(ds_cfg)); - ds_cfg = *cfg; - - ds_cfg.sizeof_ptr_field = - (cpu_has(cpu, X86_FEATURE_DTES64) ? 8 : 4); - - ds_cfg.sizeof_rec[ds_bts] = ds_cfg.sizeof_ptr_field * 3; - ds_cfg.sizeof_rec[ds_pebs] = ds_cfg.sizeof_ptr_field * nr_pebs_fields; - - if (!cpu_has(cpu, X86_FEATURE_BTS)) { - ds_cfg.sizeof_rec[ds_bts] = 0; - printk(KERN_INFO "[ds] bts not available\n"); - } - if (!cpu_has(cpu, X86_FEATURE_PEBS)) { - ds_cfg.sizeof_rec[ds_pebs] = 0; - printk(KERN_INFO "[ds] pebs not available\n"); - } - - printk(KERN_INFO "[ds] sizes: address: %u bit, ", - 8 * ds_cfg.sizeof_ptr_field); - printk("bts/pebs record: %u/%u bytes\n", - ds_cfg.sizeof_rec[ds_bts], ds_cfg.sizeof_rec[ds_pebs]); - - WARN_ON_ONCE(MAX_PEBS_COUNTERS < ds_cfg.nr_counter_reset); -} - -void __cpuinit ds_init_intel(struct cpuinfo_x86 *c) -{ - /* Only configure the first cpu. Others are identical. */ - if (ds_cfg.name) - return; - - switch (c->x86) { - case 0x6: - switch (c->x86_model) { - case 0x9: - case 0xd: /* Pentium M */ - ds_configure(&ds_cfg_pentium_m, c); - break; - case 0xf: - case 0x17: /* Core2 */ - case 0x1c: /* Atom */ - ds_configure(&ds_cfg_core2_atom, c); - break; - case 0x1a: /* Core i7 */ - ds_configure(&ds_cfg_core_i7, c); - break; - default: - /* Sorry, don't know about them. */ - break; - } - break; - case 0xf: - switch (c->x86_model) { - case 0x0: - case 0x1: - case 0x2: /* Netburst */ - ds_configure(&ds_cfg_netburst, c); - break; - default: - /* Sorry, don't know about them. */ - break; - } - break; - default: - /* Sorry, don't know about them. */ - break; - } -} - -static inline void ds_take_timestamp(struct ds_context *context, - enum bts_qualifier qualifier, - struct task_struct *task) -{ - struct bts_tracer *tracer = context->bts_master; - struct bts_struct ts; - - /* Prevent compilers from reading the tracer pointer twice. */ - barrier(); - - if (!tracer || !(tracer->flags & BTS_TIMESTAMPS)) - return; - - memset(&ts, 0, sizeof(ts)); - ts.qualifier = qualifier; - ts.variant.event.clock = trace_clock_global(); - ts.variant.event.pid = task->pid; - - bts_write(tracer, &ts); -} - -/* - * Change the DS configuration from tracing prev to tracing next. - */ -void ds_switch_to(struct task_struct *prev, struct task_struct *next) -{ - struct ds_context *prev_ctx = prev->thread.ds_ctx; - struct ds_context *next_ctx = next->thread.ds_ctx; - unsigned long debugctlmsr = next->thread.debugctlmsr; - - /* Make sure all data is read before we start. */ - barrier(); - - if (prev_ctx) { - update_debugctlmsr(0); - - ds_take_timestamp(prev_ctx, bts_task_departs, prev); - } - - if (next_ctx) { - ds_take_timestamp(next_ctx, bts_task_arrives, next); - - wrmsrl(MSR_IA32_DS_AREA, (unsigned long)next_ctx->ds); - } - - update_debugctlmsr(debugctlmsr); -} - -static __init int ds_selftest(void) -{ - if (ds_cfg.sizeof_rec[ds_bts]) { - int error; - - error = ds_selftest_bts(); - if (error) { - WARN(1, "[ds] selftest failed. disabling bts.\n"); - ds_cfg.sizeof_rec[ds_bts] = 0; - } - } - - if (ds_cfg.sizeof_rec[ds_pebs]) { - int error; - - error = ds_selftest_pebs(); - if (error) { - WARN(1, "[ds] selftest failed. disabling pebs.\n"); - ds_cfg.sizeof_rec[ds_pebs] = 0; - } - } - - return 0; -} -device_initcall(ds_selftest); diff --git a/arch/x86/kernel/ds_selftest.c b/arch/x86/kernel/ds_selftest.c deleted file mode 100644 index 6bc7c19..0000000 --- a/arch/x86/kernel/ds_selftest.c +++ /dev/null @@ -1,408 +0,0 @@ -/* - * Debug Store support - selftest - * - * - * Copyright (C) 2009 Intel Corporation. - * Markus Metzger , 2009 - */ - -#include "ds_selftest.h" - -#include -#include -#include -#include - -#include - - -#define BUFFER_SIZE 521 /* Intentionally chose an odd size. */ -#define SMALL_BUFFER_SIZE 24 /* A single bts entry. */ - -struct ds_selftest_bts_conf { - struct bts_tracer *tracer; - int error; - int (*suspend)(struct bts_tracer *); - int (*resume)(struct bts_tracer *); -}; - -static int ds_selftest_bts_consistency(const struct bts_trace *trace) -{ - int error = 0; - - if (!trace) { - printk(KERN_CONT "failed to access trace..."); - /* Bail out. Other tests are pointless. */ - return -1; - } - - if (!trace->read) { - printk(KERN_CONT "bts read not available..."); - error = -1; - } - - /* Do some sanity checks on the trace configuration. */ - if (!trace->ds.n) { - printk(KERN_CONT "empty bts buffer..."); - error = -1; - } - if (!trace->ds.size) { - printk(KERN_CONT "bad bts trace setup..."); - error = -1; - } - if (trace->ds.end != - (char *)trace->ds.begin + (trace->ds.n * trace->ds.size)) { - printk(KERN_CONT "bad bts buffer setup..."); - error = -1; - } - /* - * We allow top in [begin; end], since its not clear when the - * overflow adjustment happens: after the increment or before the - * write. - */ - if ((trace->ds.top < trace->ds.begin) || - (trace->ds.end < trace->ds.top)) { - printk(KERN_CONT "bts top out of bounds..."); - error = -1; - } - - return error; -} - -static int ds_selftest_bts_read(struct bts_tracer *tracer, - const struct bts_trace *trace, - const void *from, const void *to) -{ - const unsigned char *at; - - /* - * Check a few things which do not belong to this test. - * They should be covered by other tests. - */ - if (!trace) - return -1; - - if (!trace->read) - return -1; - - if (to < from) - return -1; - - if (from < trace->ds.begin) - return -1; - - if (trace->ds.end < to) - return -1; - - if (!trace->ds.size) - return -1; - - /* Now to the test itself. */ - for (at = from; (void *)at < to; at += trace->ds.size) { - struct bts_struct bts; - unsigned long index; - int error; - - if (((void *)at - trace->ds.begin) % trace->ds.size) { - printk(KERN_CONT - "read from non-integer index..."); - return -1; - } - index = ((void *)at - trace->ds.begin) / trace->ds.size; - - memset(&bts, 0, sizeof(bts)); - error = trace->read(tracer, at, &bts); - if (error < 0) { - printk(KERN_CONT - "error reading bts trace at [%lu] (0x%p)...", - index, at); - return error; - } - - switch (bts.qualifier) { - case BTS_BRANCH: - break; - default: - printk(KERN_CONT - "unexpected bts entry %llu at [%lu] (0x%p)...", - bts.qualifier, index, at); - return -1; - } - } - - return 0; -} - -static void ds_selftest_bts_cpu(void *arg) -{ - struct ds_selftest_bts_conf *conf = arg; - const struct bts_trace *trace; - void *top; - - if (IS_ERR(conf->tracer)) { - conf->error = PTR_ERR(conf->tracer); - conf->tracer = NULL; - - printk(KERN_CONT - "initialization failed (err: %d)...", conf->error); - return; - } - - /* We should meanwhile have enough trace. */ - conf->error = conf->suspend(conf->tracer); - if (conf->error < 0) - return; - - /* Let's see if we can access the trace. */ - trace = ds_read_bts(conf->tracer); - - conf->error = ds_selftest_bts_consistency(trace); - if (conf->error < 0) - return; - - /* If everything went well, we should have a few trace entries. */ - if (trace->ds.top == trace->ds.begin) { - /* - * It is possible but highly unlikely that we got a - * buffer overflow and end up at exactly the same - * position we started from. - * Let's issue a warning, but continue. - */ - printk(KERN_CONT "no trace/overflow..."); - } - - /* Let's try to read the trace we collected. */ - conf->error = - ds_selftest_bts_read(conf->tracer, trace, - trace->ds.begin, trace->ds.top); - if (conf->error < 0) - return; - - /* - * Let's read the trace again. - * Since we suspended tracing, we should get the same result. - */ - top = trace->ds.top; - - trace = ds_read_bts(conf->tracer); - conf->error = ds_selftest_bts_consistency(trace); - if (conf->error < 0) - return; - - if (top != trace->ds.top) { - printk(KERN_CONT "suspend not working..."); - conf->error = -1; - return; - } - - /* Let's collect some more trace - see if resume is working. */ - conf->error = conf->resume(conf->tracer); - if (conf->error < 0) - return; - - conf->error = conf->suspend(conf->tracer); - if (conf->error < 0) - return; - - trace = ds_read_bts(conf->tracer); - - conf->error = ds_selftest_bts_consistency(trace); - if (conf->error < 0) - return; - - if (trace->ds.top == top) { - /* - * It is possible but highly unlikely that we got a - * buffer overflow and end up at exactly the same - * position we started from. - * Let's issue a warning and check the full trace. - */ - printk(KERN_CONT - "no resume progress/overflow..."); - - conf->error = - ds_selftest_bts_read(conf->tracer, trace, - trace->ds.begin, trace->ds.end); - } else if (trace->ds.top < top) { - /* - * We had a buffer overflow - the entire buffer should - * contain trace records. - */ - conf->error = - ds_selftest_bts_read(conf->tracer, trace, - trace->ds.begin, trace->ds.end); - } else { - /* - * It is quite likely that the buffer did not overflow. - * Let's just check the delta trace. - */ - conf->error = - ds_selftest_bts_read(conf->tracer, trace, top, - trace->ds.top); - } - if (conf->error < 0) - return; - - conf->error = 0; -} - -static int ds_suspend_bts_wrap(struct bts_tracer *tracer) -{ - ds_suspend_bts(tracer); - return 0; -} - -static int ds_resume_bts_wrap(struct bts_tracer *tracer) -{ - ds_resume_bts(tracer); - return 0; -} - -static void ds_release_bts_noirq_wrap(void *tracer) -{ - (void)ds_release_bts_noirq(tracer); -} - -static int ds_selftest_bts_bad_release_noirq(int cpu, - struct bts_tracer *tracer) -{ - int error = -EPERM; - - /* Try to release the tracer on the wrong cpu. */ - get_cpu(); - if (cpu != smp_processor_id()) { - error = ds_release_bts_noirq(tracer); - if (error != -EPERM) - printk(KERN_CONT "release on wrong cpu..."); - } - put_cpu(); - - return error ? 0 : -1; -} - -static int ds_selftest_bts_bad_request_cpu(int cpu, void *buffer) -{ - struct bts_tracer *tracer; - int error; - - /* Try to request cpu tracing while task tracing is active. */ - tracer = ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, NULL, - (size_t)-1, BTS_KERNEL); - error = PTR_ERR(tracer); - if (!IS_ERR(tracer)) { - ds_release_bts(tracer); - error = 0; - } - - if (error != -EPERM) - printk(KERN_CONT "cpu/task tracing overlap..."); - - return error ? 0 : -1; -} - -static int ds_selftest_bts_bad_request_task(void *buffer) -{ - struct bts_tracer *tracer; - int error; - - /* Try to request cpu tracing while task tracing is active. */ - tracer = ds_request_bts_task(current, buffer, BUFFER_SIZE, NULL, - (size_t)-1, BTS_KERNEL); - error = PTR_ERR(tracer); - if (!IS_ERR(tracer)) { - error = 0; - ds_release_bts(tracer); - } - - if (error != -EPERM) - printk(KERN_CONT "task/cpu tracing overlap..."); - - return error ? 0 : -1; -} - -int ds_selftest_bts(void) -{ - struct ds_selftest_bts_conf conf; - unsigned char buffer[BUFFER_SIZE], *small_buffer; - unsigned long irq; - int cpu; - - printk(KERN_INFO "[ds] bts selftest..."); - conf.error = 0; - - small_buffer = (unsigned char *)ALIGN((unsigned long)buffer, 8) + 8; - - get_online_cpus(); - for_each_online_cpu(cpu) { - conf.suspend = ds_suspend_bts_wrap; - conf.resume = ds_resume_bts_wrap; - conf.tracer = - ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, - NULL, (size_t)-1, BTS_KERNEL); - ds_selftest_bts_cpu(&conf); - if (conf.error >= 0) - conf.error = ds_selftest_bts_bad_request_task(buffer); - ds_release_bts(conf.tracer); - if (conf.error < 0) - goto out; - - conf.suspend = ds_suspend_bts_noirq; - conf.resume = ds_resume_bts_noirq; - conf.tracer = - ds_request_bts_cpu(cpu, buffer, BUFFER_SIZE, - NULL, (size_t)-1, BTS_KERNEL); - smp_call_function_single(cpu, ds_selftest_bts_cpu, &conf, 1); - if (conf.error >= 0) { - conf.error = - ds_selftest_bts_bad_release_noirq(cpu, - conf.tracer); - /* We must not release the tracer twice. */ - if (conf.error < 0) - conf.tracer = NULL; - } - if (conf.error >= 0) - conf.error = ds_selftest_bts_bad_request_task(buffer); - smp_call_function_single(cpu, ds_release_bts_noirq_wrap, - conf.tracer, 1); - if (conf.error < 0) - goto out; - } - - conf.suspend = ds_suspend_bts_wrap; - conf.resume = ds_resume_bts_wrap; - conf.tracer = - ds_request_bts_task(current, buffer, BUFFER_SIZE, - NULL, (size_t)-1, BTS_KERNEL); - ds_selftest_bts_cpu(&conf); - if (conf.error >= 0) - conf.error = ds_selftest_bts_bad_request_cpu(0, buffer); - ds_release_bts(conf.tracer); - if (conf.error < 0) - goto out; - - conf.suspend = ds_suspend_bts_noirq; - conf.resume = ds_resume_bts_noirq; - conf.tracer = - ds_request_bts_task(current, small_buffer, SMALL_BUFFER_SIZE, - NULL, (size_t)-1, BTS_KERNEL); - local_irq_save(irq); - ds_selftest_bts_cpu(&conf); - if (conf.error >= 0) - conf.error = ds_selftest_bts_bad_request_cpu(0, buffer); - ds_release_bts_noirq(conf.tracer); - local_irq_restore(irq); - if (conf.error < 0) - goto out; - - conf.error = 0; - out: - put_online_cpus(); - printk(KERN_CONT "%s.\n", (conf.error ? "failed" : "passed")); - - return conf.error; -} - -int ds_selftest_pebs(void) -{ - return 0; -} diff --git a/arch/x86/kernel/ds_selftest.h b/arch/x86/kernel/ds_selftest.h deleted file mode 100644 index 2ba8745..0000000 --- a/arch/x86/kernel/ds_selftest.h +++ /dev/null @@ -1,15 +0,0 @@ -/* - * Debug Store support - selftest - * - * - * Copyright (C) 2009 Intel Corporation. - * Markus Metzger , 2009 - */ - -#ifdef CONFIG_X86_DS_SELFTEST -extern int ds_selftest_bts(void); -extern int ds_selftest_pebs(void); -#else -static inline int ds_selftest_bts(void) { return 0; } -static inline int ds_selftest_pebs(void) { return 0; } -#endif diff --git a/arch/x86/kernel/dumpstack.c b/arch/x86/kernel/dumpstack.c index 6d81755..c89a386 100644 --- a/arch/x86/kernel/dumpstack.c +++ b/arch/x86/kernel/dumpstack.c @@ -224,11 +224,6 @@ unsigned __kprobes long oops_begin(void) int cpu; unsigned long flags; - /* notify the hw-branch tracer so it may disable tracing and - add the last trace to the trace buffer - - the earlier this happens, the more useful the trace. */ - trace_hw_branch_oops(); - oops_enter(); /* racy, but better than risking deadlock. */ diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index b43bbae..7a880ad 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -422,14 +422,12 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, static void __kprobes clear_btf(void) { - if (test_thread_flag(TIF_DEBUGCTLMSR)) - update_debugctlmsr(0); + /* XXX */ } static void __kprobes restore_btf(void) { - if (test_thread_flag(TIF_DEBUGCTLMSR)) - update_debugctlmsr(current->thread.debugctlmsr); + /* XXX */ } void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index ad95406..1a60beb 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -20,7 +20,6 @@ #include #include #include -#include #include unsigned long idle_halt; @@ -50,8 +49,6 @@ void free_thread_xstate(struct task_struct *tsk) kmem_cache_free(task_xstate_cachep, tsk->thread.xstate); tsk->thread.xstate = NULL; } - - WARN(tsk->thread.ds_ctx, "leaking DS context\n"); } void free_thread_info(struct thread_info *ti) @@ -198,12 +195,6 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, prev = &prev_p->thread; next = &next_p->thread; - if (test_tsk_thread_flag(next_p, TIF_DS_AREA_MSR) || - test_tsk_thread_flag(prev_p, TIF_DS_AREA_MSR)) - ds_switch_to(prev_p, next_p); - else if (next->debugctlmsr != prev->debugctlmsr) - update_debugctlmsr(next->debugctlmsr); - if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ test_tsk_thread_flag(next_p, TIF_NOTSC)) { /* prev and next are different */ diff --git a/arch/x86/kernel/process_32.c b/arch/x86/kernel/process_32.c index f6c6266..75090c5 100644 --- a/arch/x86/kernel/process_32.c +++ b/arch/x86/kernel/process_32.c @@ -55,7 +55,6 @@ #include #include #include -#include #include asmlinkage void ret_from_fork(void) __asm__("ret_from_fork"); @@ -238,13 +237,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, kfree(p->thread.io_bitmap_ptr); p->thread.io_bitmap_max = 0; } - - clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); - p->thread.ds_ctx = NULL; - - clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); - p->thread.debugctlmsr = 0; - return err; } diff --git a/arch/x86/kernel/process_64.c b/arch/x86/kernel/process_64.c index dc9690b..cc4258f 100644 --- a/arch/x86/kernel/process_64.c +++ b/arch/x86/kernel/process_64.c @@ -49,7 +49,6 @@ #include #include #include -#include #include asmlinkage extern void ret_from_fork(void); @@ -313,13 +312,6 @@ int copy_thread(unsigned long clone_flags, unsigned long sp, if (err) goto out; } - - clear_tsk_thread_flag(p, TIF_DS_AREA_MSR); - p->thread.ds_ctx = NULL; - - clear_tsk_thread_flag(p, TIF_DEBUGCTLMSR); - p->thread.debugctlmsr = 0; - err = 0; out: if (err && p->thread.io_bitmap_ptr) { diff --git a/arch/x86/kernel/ptrace.c b/arch/x86/kernel/ptrace.c index a503b1f..f2fd3b8 100644 --- a/arch/x86/kernel/ptrace.c +++ b/arch/x86/kernel/ptrace.c @@ -2,9 +2,6 @@ /* * Pentium III FXSR, SSE support * Gareth Hughes , May 2000 - * - * BTS tracing - * Markus Metzger , Dec 2007 */ #include @@ -21,7 +18,6 @@ #include #include #include -#include #include #include @@ -35,7 +31,6 @@ #include #include #include -#include #include #include "tls.h" @@ -788,342 +783,6 @@ static int ioperm_get(struct task_struct *target, 0, IO_BITMAP_BYTES); } -#ifdef CONFIG_X86_PTRACE_BTS -/* - * A branch trace store context. - * - * Contexts may only be installed by ptrace_bts_config() and only for - * ptraced tasks. - * - * Contexts are destroyed when the tracee is detached from the tracer. - * The actual destruction work requires interrupts enabled, so the - * work is deferred and will be scheduled during __ptrace_unlink(). - * - * Contexts hold an additional task_struct reference on the traced - * task, as well as a reference on the tracer's mm. - * - * Ptrace already holds a task_struct for the duration of ptrace operations, - * but since destruction is deferred, it may be executed after both - * tracer and tracee exited. - */ -struct bts_context { - /* The branch trace handle. */ - struct bts_tracer *tracer; - - /* The buffer used to store the branch trace and its size. */ - void *buffer; - unsigned int size; - - /* The mm that paid for the above buffer. */ - struct mm_struct *mm; - - /* The task this context belongs to. */ - struct task_struct *task; - - /* The signal to send on a bts buffer overflow. */ - unsigned int bts_ovfl_signal; - - /* The work struct to destroy a context. */ - struct work_struct work; -}; - -static int alloc_bts_buffer(struct bts_context *context, unsigned int size) -{ - void *buffer = NULL; - int err = -ENOMEM; - - err = account_locked_memory(current->mm, current->signal->rlim, size); - if (err < 0) - return err; - - buffer = kzalloc(size, GFP_KERNEL); - if (!buffer) - goto out_refund; - - context->buffer = buffer; - context->size = size; - context->mm = get_task_mm(current); - - return 0; - - out_refund: - refund_locked_memory(current->mm, size); - return err; -} - -static inline void free_bts_buffer(struct bts_context *context) -{ - if (!context->buffer) - return; - - kfree(context->buffer); - context->buffer = NULL; - - refund_locked_memory(context->mm, context->size); - context->size = 0; - - mmput(context->mm); - context->mm = NULL; -} - -static void free_bts_context_work(struct work_struct *w) -{ - struct bts_context *context; - - context = container_of(w, struct bts_context, work); - - ds_release_bts(context->tracer); - put_task_struct(context->task); - free_bts_buffer(context); - kfree(context); -} - -static inline void free_bts_context(struct bts_context *context) -{ - INIT_WORK(&context->work, free_bts_context_work); - schedule_work(&context->work); -} - -static inline struct bts_context *alloc_bts_context(struct task_struct *task) -{ - struct bts_context *context = kzalloc(sizeof(*context), GFP_KERNEL); - if (context) { - context->task = task; - task->bts = context; - - get_task_struct(task); - } - - return context; -} - -static int ptrace_bts_read_record(struct task_struct *child, size_t index, - struct bts_struct __user *out) -{ - struct bts_context *context; - const struct bts_trace *trace; - struct bts_struct bts; - const unsigned char *at; - int error; - - context = child->bts; - if (!context) - return -ESRCH; - - trace = ds_read_bts(context->tracer); - if (!trace) - return -ESRCH; - - at = trace->ds.top - ((index + 1) * trace->ds.size); - if ((void *)at < trace->ds.begin) - at += (trace->ds.n * trace->ds.size); - - if (!trace->read) - return -EOPNOTSUPP; - - error = trace->read(context->tracer, at, &bts); - if (error < 0) - return error; - - if (copy_to_user(out, &bts, sizeof(bts))) - return -EFAULT; - - return sizeof(bts); -} - -static int ptrace_bts_drain(struct task_struct *child, - long size, - struct bts_struct __user *out) -{ - struct bts_context *context; - const struct bts_trace *trace; - const unsigned char *at; - int error, drained = 0; - - context = child->bts; - if (!context) - return -ESRCH; - - trace = ds_read_bts(context->tracer); - if (!trace) - return -ESRCH; - - if (!trace->read) - return -EOPNOTSUPP; - - if (size < (trace->ds.top - trace->ds.begin)) - return -EIO; - - for (at = trace->ds.begin; (void *)at < trace->ds.top; - out++, drained++, at += trace->ds.size) { - struct bts_struct bts; - - error = trace->read(context->tracer, at, &bts); - if (error < 0) - return error; - - if (copy_to_user(out, &bts, sizeof(bts))) - return -EFAULT; - } - - memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); - - error = ds_reset_bts(context->tracer); - if (error < 0) - return error; - - return drained; -} - -static int ptrace_bts_config(struct task_struct *child, - long cfg_size, - const struct ptrace_bts_config __user *ucfg) -{ - struct bts_context *context; - struct ptrace_bts_config cfg; - unsigned int flags = 0; - - if (cfg_size < sizeof(cfg)) - return -EIO; - - if (copy_from_user(&cfg, ucfg, sizeof(cfg))) - return -EFAULT; - - context = child->bts; - if (!context) - context = alloc_bts_context(child); - if (!context) - return -ENOMEM; - - if (cfg.flags & PTRACE_BTS_O_SIGNAL) { - if (!cfg.signal) - return -EINVAL; - - return -EOPNOTSUPP; - context->bts_ovfl_signal = cfg.signal; - } - - ds_release_bts(context->tracer); - context->tracer = NULL; - - if ((cfg.flags & PTRACE_BTS_O_ALLOC) && (cfg.size != context->size)) { - int err; - - free_bts_buffer(context); - if (!cfg.size) - return 0; - - err = alloc_bts_buffer(context, cfg.size); - if (err < 0) - return err; - } - - if (cfg.flags & PTRACE_BTS_O_TRACE) - flags |= BTS_USER; - - if (cfg.flags & PTRACE_BTS_O_SCHED) - flags |= BTS_TIMESTAMPS; - - context->tracer = - ds_request_bts_task(child, context->buffer, context->size, - NULL, (size_t)-1, flags); - if (unlikely(IS_ERR(context->tracer))) { - int error = PTR_ERR(context->tracer); - - free_bts_buffer(context); - context->tracer = NULL; - return error; - } - - return sizeof(cfg); -} - -static int ptrace_bts_status(struct task_struct *child, - long cfg_size, - struct ptrace_bts_config __user *ucfg) -{ - struct bts_context *context; - const struct bts_trace *trace; - struct ptrace_bts_config cfg; - - context = child->bts; - if (!context) - return -ESRCH; - - if (cfg_size < sizeof(cfg)) - return -EIO; - - trace = ds_read_bts(context->tracer); - if (!trace) - return -ESRCH; - - memset(&cfg, 0, sizeof(cfg)); - cfg.size = trace->ds.end - trace->ds.begin; - cfg.signal = context->bts_ovfl_signal; - cfg.bts_size = sizeof(struct bts_struct); - - if (cfg.signal) - cfg.flags |= PTRACE_BTS_O_SIGNAL; - - if (trace->ds.flags & BTS_USER) - cfg.flags |= PTRACE_BTS_O_TRACE; - - if (trace->ds.flags & BTS_TIMESTAMPS) - cfg.flags |= PTRACE_BTS_O_SCHED; - - if (copy_to_user(ucfg, &cfg, sizeof(cfg))) - return -EFAULT; - - return sizeof(cfg); -} - -static int ptrace_bts_clear(struct task_struct *child) -{ - struct bts_context *context; - const struct bts_trace *trace; - - context = child->bts; - if (!context) - return -ESRCH; - - trace = ds_read_bts(context->tracer); - if (!trace) - return -ESRCH; - - memset(trace->ds.begin, 0, trace->ds.n * trace->ds.size); - - return ds_reset_bts(context->tracer); -} - -static int ptrace_bts_size(struct task_struct *child) -{ - struct bts_context *context; - const struct bts_trace *trace; - - context = child->bts; - if (!context) - return -ESRCH; - - trace = ds_read_bts(context->tracer); - if (!trace) - return -ESRCH; - - return (trace->ds.top - trace->ds.begin) / trace->ds.size; -} - -/* - * Called from __ptrace_unlink() after the child has been moved back - * to its original parent. - */ -void ptrace_bts_untrace(struct task_struct *child) -{ - if (unlikely(child->bts)) { - free_bts_context(child->bts); - child->bts = NULL; - } -} -#endif /* CONFIG_X86_PTRACE_BTS */ - /* * Called by kernel/ptrace.c when detaching.. * @@ -1251,39 +910,6 @@ long arch_ptrace(struct task_struct *child, long request, long addr, long data) break; #endif - /* - * These bits need more cooking - not enabled yet: - */ -#ifdef CONFIG_X86_PTRACE_BTS - case PTRACE_BTS_CONFIG: - ret = ptrace_bts_config - (child, data, (struct ptrace_bts_config __user *)addr); - break; - - case PTRACE_BTS_STATUS: - ret = ptrace_bts_status - (child, data, (struct ptrace_bts_config __user *)addr); - break; - - case PTRACE_BTS_SIZE: - ret = ptrace_bts_size(child); - break; - - case PTRACE_BTS_GET: - ret = ptrace_bts_read_record - (child, data, (struct bts_struct __user *) addr); - break; - - case PTRACE_BTS_CLEAR: - ret = ptrace_bts_clear(child); - break; - - case PTRACE_BTS_DRAIN: - ret = ptrace_bts_drain - (child, data, (struct bts_struct __user *) addr); - break; -#endif /* CONFIG_X86_PTRACE_BTS */ - default: ret = ptrace_request(child, request, addr, data); break; @@ -1543,14 +1169,6 @@ long compat_arch_ptrace(struct task_struct *child, compat_long_t request, case PTRACE_GET_THREAD_AREA: case PTRACE_SET_THREAD_AREA: -#ifdef CONFIG_X86_PTRACE_BTS - case PTRACE_BTS_CONFIG: - case PTRACE_BTS_STATUS: - case PTRACE_BTS_SIZE: - case PTRACE_BTS_GET: - case PTRACE_BTS_CLEAR: - case PTRACE_BTS_DRAIN: -#endif /* CONFIG_X86_PTRACE_BTS */ return arch_ptrace(child, request, addr, data); default: diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 3149032..7beba07 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -158,22 +158,6 @@ static int enable_single_step(struct task_struct *child) } /* - * Install this value in MSR_IA32_DEBUGCTLMSR whenever child is running. - */ -static void write_debugctlmsr(struct task_struct *child, unsigned long val) -{ - if (child->thread.debugctlmsr == val) - return; - - child->thread.debugctlmsr = val; - - if (child != current) - return; - - update_debugctlmsr(val); -} - -/* * Enable single or block step. */ static void enable_step(struct task_struct *child, bool block) @@ -185,17 +169,9 @@ static void enable_step(struct task_struct *child, bool block) * So noone should try to use debugger block stepping in a program * that uses user-mode single stepping itself. */ - if (enable_single_step(child) && block) { - set_tsk_thread_flag(child, TIF_DEBUGCTLMSR); - write_debugctlmsr(child, - child->thread.debugctlmsr | DEBUGCTLMSR_BTF); - } else { - write_debugctlmsr(child, - child->thread.debugctlmsr & ~DEBUGCTLMSR_BTF); - - if (!child->thread.debugctlmsr) - clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); - } + if (!enable_single_step(child)) + return; + /* XXX */ } void user_enable_single_step(struct task_struct *child) @@ -213,11 +189,7 @@ void user_disable_single_step(struct task_struct *child) /* * Make sure block stepping (BTF) is disabled. */ - write_debugctlmsr(child, - child->thread.debugctlmsr & ~DEBUGCTLMSR_BTF); - - if (!child->thread.debugctlmsr) - clear_tsk_thread_flag(child, TIF_DEBUGCTLMSR); + /* XXX */ /* Always clear TIF_SINGLESTEP... */ clear_tsk_thread_flag(child, TIF_SINGLESTEP); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index 1168e44..e3da5d7 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -543,11 +543,6 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) /* DR6 may or may not be cleared by the CPU */ set_debugreg(0, 6); - /* - * The processor cleared BTF, so don't mark that we need it set. - */ - clear_tsk_thread_flag(tsk, TIF_DEBUGCTLMSR); - tsk->thread.debugctlmsr = 0; /* Store the virtualized DR6 value */ tsk->thread.debugreg6 = dr6; -- cgit v1.1 From ea8e61b7bbc4a2faef77db34eb2db2a2c2372ff6 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Thu, 25 Mar 2010 14:51:51 +0100 Subject: x86, ptrace: Fix block-step Implement ptrace-block-step using TIF_BLOCKSTEP which will set DEBUGCTLMSR_BTF when set for a task while preserving any other DEBUGCTLMSR bits. Signed-off-by: Peter Zijlstra LKML-Reference: <20100325135414.017536066@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/processor.h | 4 ++-- arch/x86/include/asm/thread_info.h | 4 +++- arch/x86/kernel/kprobes.c | 14 ++++++++++++-- arch/x86/kernel/process.c | 11 +++++++++++ arch/x86/kernel/step.c | 24 ++++++++++++++++++++---- arch/x86/kernel/traps.c | 5 +++++ 6 files changed, 53 insertions(+), 9 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/processor.h b/arch/x86/include/asm/processor.h index 5bec21a..32428b4 100644 --- a/arch/x86/include/asm/processor.h +++ b/arch/x86/include/asm/processor.h @@ -799,7 +799,7 @@ extern void cpu_init(void); static inline unsigned long get_debugctlmsr(void) { - unsigned long debugctlmsr = 0; + unsigned long debugctlmsr = 0; #ifndef CONFIG_X86_DEBUGCTLMSR if (boot_cpu_data.x86 < 6) @@ -807,7 +807,7 @@ static inline unsigned long get_debugctlmsr(void) #endif rdmsrl(MSR_IA32_DEBUGCTLMSR, debugctlmsr); - return debugctlmsr; + return debugctlmsr; } static inline void update_debugctlmsr(unsigned long debugctlmsr) diff --git a/arch/x86/include/asm/thread_info.h b/arch/x86/include/asm/thread_info.h index dc85e12..d017ed55 100644 --- a/arch/x86/include/asm/thread_info.h +++ b/arch/x86/include/asm/thread_info.h @@ -92,6 +92,7 @@ struct thread_info { #define TIF_IO_BITMAP 22 /* uses I/O bitmap */ #define TIF_FREEZE 23 /* is freezing for suspend */ #define TIF_FORCED_TF 24 /* true if TF in eflags artificially */ +#define TIF_BLOCKSTEP 25 /* set when we want DEBUGCTLMSR_BTF */ #define TIF_LAZY_MMU_UPDATES 27 /* task is updating the mmu lazily */ #define TIF_SYSCALL_TRACEPOINT 28 /* syscall tracepoint instrumentation */ @@ -113,6 +114,7 @@ struct thread_info { #define _TIF_IO_BITMAP (1 << TIF_IO_BITMAP) #define _TIF_FREEZE (1 << TIF_FREEZE) #define _TIF_FORCED_TF (1 << TIF_FORCED_TF) +#define _TIF_BLOCKSTEP (1 << TIF_BLOCKSTEP) #define _TIF_LAZY_MMU_UPDATES (1 << TIF_LAZY_MMU_UPDATES) #define _TIF_SYSCALL_TRACEPOINT (1 << TIF_SYSCALL_TRACEPOINT) @@ -143,7 +145,7 @@ struct thread_info { /* flags to check in __switch_to() */ #define _TIF_WORK_CTXSW \ - (_TIF_IO_BITMAP|_TIF_NOTSC) + (_TIF_IO_BITMAP|_TIF_NOTSC|_TIF_BLOCKSTEP) #define _TIF_WORK_CTXSW_PREV (_TIF_WORK_CTXSW|_TIF_USER_RETURN_NOTIFY) #define _TIF_WORK_CTXSW_NEXT (_TIF_WORK_CTXSW|_TIF_DEBUG) diff --git a/arch/x86/kernel/kprobes.c b/arch/x86/kernel/kprobes.c index 7a880ad..f2f56c0 100644 --- a/arch/x86/kernel/kprobes.c +++ b/arch/x86/kernel/kprobes.c @@ -422,12 +422,22 @@ static void __kprobes set_current_kprobe(struct kprobe *p, struct pt_regs *regs, static void __kprobes clear_btf(void) { - /* XXX */ + if (test_thread_flag(TIF_BLOCKSTEP)) { + unsigned long debugctl = get_debugctlmsr(); + + debugctl &= ~DEBUGCTLMSR_BTF; + update_debugctlmsr(debugctl); + } } static void __kprobes restore_btf(void) { - /* XXX */ + if (test_thread_flag(TIF_BLOCKSTEP)) { + unsigned long debugctl = get_debugctlmsr(); + + debugctl |= DEBUGCTLMSR_BTF; + update_debugctlmsr(debugctl); + } } void __kprobes arch_prepare_kretprobe(struct kretprobe_instance *ri, diff --git a/arch/x86/kernel/process.c b/arch/x86/kernel/process.c index 1a60beb..8328009 100644 --- a/arch/x86/kernel/process.c +++ b/arch/x86/kernel/process.c @@ -195,6 +195,17 @@ void __switch_to_xtra(struct task_struct *prev_p, struct task_struct *next_p, prev = &prev_p->thread; next = &next_p->thread; + if (test_tsk_thread_flag(prev_p, TIF_BLOCKSTEP) ^ + test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) { + unsigned long debugctl = get_debugctlmsr(); + + debugctl &= ~DEBUGCTLMSR_BTF; + if (test_tsk_thread_flag(next_p, TIF_BLOCKSTEP)) + debugctl |= DEBUGCTLMSR_BTF; + + update_debugctlmsr(debugctl); + } + if (test_tsk_thread_flag(prev_p, TIF_NOTSC) ^ test_tsk_thread_flag(next_p, TIF_NOTSC)) { /* prev and next are different */ diff --git a/arch/x86/kernel/step.c b/arch/x86/kernel/step.c index 7beba07..58de45e 100644 --- a/arch/x86/kernel/step.c +++ b/arch/x86/kernel/step.c @@ -169,9 +169,19 @@ static void enable_step(struct task_struct *child, bool block) * So noone should try to use debugger block stepping in a program * that uses user-mode single stepping itself. */ - if (!enable_single_step(child)) - return; - /* XXX */ + if (enable_single_step(child) && block) { + unsigned long debugctl = get_debugctlmsr(); + + debugctl |= DEBUGCTLMSR_BTF; + update_debugctlmsr(debugctl); + set_tsk_thread_flag(child, TIF_BLOCKSTEP); + } else if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) { + unsigned long debugctl = get_debugctlmsr(); + + debugctl &= ~DEBUGCTLMSR_BTF; + update_debugctlmsr(debugctl); + clear_tsk_thread_flag(child, TIF_BLOCKSTEP); + } } void user_enable_single_step(struct task_struct *child) @@ -189,7 +199,13 @@ void user_disable_single_step(struct task_struct *child) /* * Make sure block stepping (BTF) is disabled. */ - /* XXX */ + if (test_tsk_thread_flag(child, TIF_BLOCKSTEP)) { + unsigned long debugctl = get_debugctlmsr(); + + debugctl &= ~DEBUGCTLMSR_BTF; + update_debugctlmsr(debugctl); + clear_tsk_thread_flag(child, TIF_BLOCKSTEP); + } /* Always clear TIF_SINGLESTEP... */ clear_tsk_thread_flag(child, TIF_SINGLESTEP); diff --git a/arch/x86/kernel/traps.c b/arch/x86/kernel/traps.c index e3da5d7..36f1bd9 100644 --- a/arch/x86/kernel/traps.c +++ b/arch/x86/kernel/traps.c @@ -544,6 +544,11 @@ dotraplinkage void __kprobes do_debug(struct pt_regs *regs, long error_code) /* DR6 may or may not be cleared by the CPU */ set_debugreg(0, 6); + /* + * The processor cleared BTF, so don't mark that we need it set. + */ + clear_tsk_thread_flag(tsk, TIF_BLOCKSTEP); + /* Store the virtualized DR6 value */ tsk->thread.debugreg6 = dr6; -- cgit v1.1 From 11164cd4f6dab326a88bdf27f2f8f7c11977e91a Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Fri, 26 Mar 2010 14:08:44 +0100 Subject: perf, x86: Add Nehelem PMU programming errata workaround Implement the workaround for Intel Errata AAK100 and AAP53. Also, remove the Core-i7 name for Nehalem events since there are also Westmere based i7 chips. Signed-off-by: Peter Zijlstra Cc: Stephane Eranian LKML-Reference: <1269608924.12097.147.camel@laptop> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 8 +++---- arch/x86/kernel/cpu/perf_event_intel.c | 43 ++++++++++++++++++++++++++++++---- arch/x86/kernel/cpu/perf_event_p4.c | 2 +- arch/x86/kernel/cpu/perf_event_p6.c | 2 +- 4 files changed, 45 insertions(+), 10 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index f571f51..6f66d4a 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -184,7 +184,7 @@ struct x86_pmu { int version; int (*handle_irq)(struct pt_regs *); void (*disable_all)(void); - void (*enable_all)(void); + void (*enable_all)(int added); void (*enable)(struct perf_event *); void (*disable)(struct perf_event *); int (*hw_config)(struct perf_event_attr *attr, struct hw_perf_event *hwc); @@ -576,7 +576,7 @@ void hw_perf_disable(void) x86_pmu.disable_all(); } -static void x86_pmu_enable_all(void) +static void x86_pmu_enable_all(int added) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int idx; @@ -784,7 +784,7 @@ void hw_perf_enable(void) struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); struct perf_event *event; struct hw_perf_event *hwc; - int i; + int i, added = cpuc->n_added; if (!x86_pmu_initialized()) return; @@ -836,7 +836,7 @@ void hw_perf_enable(void) cpuc->enabled = 1; barrier(); - x86_pmu.enable_all(); + x86_pmu.enable_all(added); } static inline void __x86_pmu_enable_event(struct hw_perf_event *hwc) diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 044b843..676aac2 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -483,7 +483,7 @@ static void intel_pmu_disable_all(void) intel_pmu_lbr_disable_all(); } -static void intel_pmu_enable_all(void) +static void intel_pmu_enable_all(int added) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -502,6 +502,40 @@ static void intel_pmu_enable_all(void) } } +/* + * Workaround for: + * Intel Errata AAK100 (model 26) + * Intel Errata AAP53 (model 30) + * + * These chips need to be 'reset' when adding counters by programming + * the magic three (non counting) events 0x4300D2, 0x4300B1 and 0x4300B5 + * either in sequence on the same PMC or on different PMCs. + */ +static void intel_pmu_nhm_enable_all(int added) +{ + if (added) { + struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); + int i; + + wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 0, 0x4300D2); + wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 1, 0x4300B1); + wrmsrl(MSR_ARCH_PERFMON_EVENTSEL0 + 2, 0x4300B5); + + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x3); + wrmsrl(MSR_CORE_PERF_GLOBAL_CTRL, 0x0); + + for (i = 0; i < 3; i++) { + struct perf_event *event = cpuc->events[i]; + + if (!event) + continue; + + __x86_pmu_enable_event(&event->hw); + } + } + intel_pmu_enable_all(added); +} + static inline u64 intel_pmu_get_status(void) { u64 status; @@ -658,7 +692,7 @@ static int intel_pmu_handle_irq(struct pt_regs *regs) intel_pmu_drain_bts_buffer(); status = intel_pmu_get_status(); if (!status) { - intel_pmu_enable_all(); + intel_pmu_enable_all(0); return 0; } @@ -707,7 +741,7 @@ again: goto again; done: - intel_pmu_enable_all(); + intel_pmu_enable_all(0); return 1; } @@ -920,7 +954,8 @@ static __init int intel_pmu_init(void) intel_pmu_lbr_init_nhm(); x86_pmu.event_constraints = intel_nehalem_event_constraints; - pr_cont("Nehalem/Corei7 events, "); + x86_pmu.enable_all = intel_pmu_nhm_enable_all; + pr_cont("Nehalem events, "); break; case 28: /* Atom */ diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index f8fe069..0d1be36 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -535,7 +535,7 @@ static void p4_pmu_enable_event(struct perf_event *event) (cccr & ~P4_CCCR_RESERVED) | P4_CCCR_ENABLE); } -static void p4_pmu_enable_all(void) +static void p4_pmu_enable_all(int added) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int idx; diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index 6ff4d01d..877182c 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c @@ -66,7 +66,7 @@ static void p6_pmu_disable_all(void) wrmsrl(MSR_P6_EVNTSEL0, val); } -static void p6_pmu_enable_all(void) +static void p6_pmu_enable_all(int added) { unsigned long val; -- cgit v1.1 From 948b1bb89a44561560531394c18da4a99215f772 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Mon, 29 Mar 2010 18:36:50 +0200 Subject: perf, x86: Undo some some *_counter* -> *_event* renames The big rename: cdd6c48 perf: Do the big rename: Performance Counters -> Performance Events accidentally renamed some members of stucts that were named after registers in the spec. To avoid confusion this patch reverts some changes. The related specs are MSR descriptions in AMD's BKDGs and the ARCHITECTURAL PERFORMANCE MONITORING section in the Intel 64 and IA-32 Architectures Software Developer's Manuals. This patch does: $ sed -i -e 's:num_events:num_counters:g' \ arch/x86/include/asm/perf_event.h \ arch/x86/kernel/cpu/perf_event_amd.c \ arch/x86/kernel/cpu/perf_event.c \ arch/x86/kernel/cpu/perf_event_intel.c \ arch/x86/kernel/cpu/perf_event_p6.c \ arch/x86/kernel/cpu/perf_event_p4.c \ arch/x86/oprofile/op_model_ppro.c $ sed -i -e 's:event_bits:cntval_bits:g' -e 's:event_mask:cntval_mask:g' \ arch/x86/kernel/cpu/perf_event_amd.c \ arch/x86/kernel/cpu/perf_event.c \ arch/x86/kernel/cpu/perf_event_intel.c \ arch/x86/kernel/cpu/perf_event_p6.c \ arch/x86/kernel/cpu/perf_event_p4.c Signed-off-by: Robert Richter Signed-off-by: Peter Zijlstra LKML-Reference: <1269880612-25800-2-git-send-email-robert.richter@amd.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event.h | 4 +- arch/x86/kernel/cpu/perf_event.c | 74 +++++++++++++++++----------------- arch/x86/kernel/cpu/perf_event_amd.c | 12 +++--- arch/x86/kernel/cpu/perf_event_intel.c | 16 ++++---- arch/x86/kernel/cpu/perf_event_p4.c | 14 +++---- arch/x86/kernel/cpu/perf_event_p6.c | 6 +-- arch/x86/oprofile/op_model_ppro.c | 4 +- 7 files changed, 65 insertions(+), 65 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 124dddd..987bf67 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -67,7 +67,7 @@ union cpuid10_eax { struct { unsigned int version_id:8; - unsigned int num_events:8; + unsigned int num_counters:8; unsigned int bit_width:8; unsigned int mask_length:8; } split; @@ -76,7 +76,7 @@ union cpuid10_eax { union cpuid10_edx { struct { - unsigned int num_events_fixed:4; + unsigned int num_counters_fixed:4; unsigned int reserved:28; } split; unsigned int full; diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index b534356..9daaa1e 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -195,10 +195,10 @@ struct x86_pmu { u64 (*event_map)(int); u64 (*raw_event)(u64); int max_events; - int num_events; - int num_events_fixed; - int event_bits; - u64 event_mask; + int num_counters; + int num_counters_fixed; + int cntval_bits; + u64 cntval_mask; int apic; u64 max_period; struct event_constraint * @@ -268,7 +268,7 @@ static u64 x86_perf_event_update(struct perf_event *event) { struct hw_perf_event *hwc = &event->hw; - int shift = 64 - x86_pmu.event_bits; + int shift = 64 - x86_pmu.cntval_bits; u64 prev_raw_count, new_raw_count; int idx = hwc->idx; s64 delta; @@ -320,12 +320,12 @@ static bool reserve_pmc_hardware(void) if (nmi_watchdog == NMI_LOCAL_APIC) disable_lapic_nmi_watchdog(); - for (i = 0; i < x86_pmu.num_events; i++) { + for (i = 0; i < x86_pmu.num_counters; i++) { if (!reserve_perfctr_nmi(x86_pmu.perfctr + i)) goto perfctr_fail; } - for (i = 0; i < x86_pmu.num_events; i++) { + for (i = 0; i < x86_pmu.num_counters; i++) { if (!reserve_evntsel_nmi(x86_pmu.eventsel + i)) goto eventsel_fail; } @@ -336,7 +336,7 @@ eventsel_fail: for (i--; i >= 0; i--) release_evntsel_nmi(x86_pmu.eventsel + i); - i = x86_pmu.num_events; + i = x86_pmu.num_counters; perfctr_fail: for (i--; i >= 0; i--) @@ -352,7 +352,7 @@ static void release_pmc_hardware(void) { int i; - for (i = 0; i < x86_pmu.num_events; i++) { + for (i = 0; i < x86_pmu.num_counters; i++) { release_perfctr_nmi(x86_pmu.perfctr + i); release_evntsel_nmi(x86_pmu.eventsel + i); } @@ -547,7 +547,7 @@ static void x86_pmu_disable_all(void) struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int idx; - for (idx = 0; idx < x86_pmu.num_events; idx++) { + for (idx = 0; idx < x86_pmu.num_counters; idx++) { u64 val; if (!test_bit(idx, cpuc->active_mask)) @@ -582,7 +582,7 @@ static void x86_pmu_enable_all(int added) struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int idx; - for (idx = 0; idx < x86_pmu.num_events; idx++) { + for (idx = 0; idx < x86_pmu.num_counters; idx++) { struct perf_event *event = cpuc->events[idx]; u64 val; @@ -657,14 +657,14 @@ static int x86_schedule_events(struct cpu_hw_events *cpuc, int n, int *assign) * assign events to counters starting with most * constrained events. */ - wmax = x86_pmu.num_events; + wmax = x86_pmu.num_counters; /* * when fixed event counters are present, * wmax is incremented by 1 to account * for one more choice */ - if (x86_pmu.num_events_fixed) + if (x86_pmu.num_counters_fixed) wmax++; for (w = 1, num = n; num && w <= wmax; w++) { @@ -714,7 +714,7 @@ static int collect_events(struct cpu_hw_events *cpuc, struct perf_event *leader, struct perf_event *event; int n, max_count; - max_count = x86_pmu.num_events + x86_pmu.num_events_fixed; + max_count = x86_pmu.num_counters + x86_pmu.num_counters_fixed; /* current number of events already accepted */ n = cpuc->n_events; @@ -904,7 +904,7 @@ x86_perf_event_set_period(struct perf_event *event) atomic64_set(&hwc->prev_count, (u64)-left); wrmsrl(hwc->event_base + idx, - (u64)(-left) & x86_pmu.event_mask); + (u64)(-left) & x86_pmu.cntval_mask); perf_event_update_userpage(event); @@ -987,7 +987,7 @@ void perf_event_print_debug(void) unsigned long flags; int cpu, idx; - if (!x86_pmu.num_events) + if (!x86_pmu.num_counters) return; local_irq_save(flags); @@ -1011,7 +1011,7 @@ void perf_event_print_debug(void) } pr_info("CPU#%d: active: %016llx\n", cpu, *(u64 *)cpuc->active_mask); - for (idx = 0; idx < x86_pmu.num_events; idx++) { + for (idx = 0; idx < x86_pmu.num_counters; idx++) { rdmsrl(x86_pmu.eventsel + idx, pmc_ctrl); rdmsrl(x86_pmu.perfctr + idx, pmc_count); @@ -1024,7 +1024,7 @@ void perf_event_print_debug(void) pr_info("CPU#%d: gen-PMC%d left: %016llx\n", cpu, idx, prev_left); } - for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { + for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) { rdmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, pmc_count); pr_info("CPU#%d: fixed-PMC%d count: %016llx\n", @@ -1089,7 +1089,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) cpuc = &__get_cpu_var(cpu_hw_events); - for (idx = 0; idx < x86_pmu.num_events; idx++) { + for (idx = 0; idx < x86_pmu.num_counters; idx++) { if (!test_bit(idx, cpuc->active_mask)) continue; @@ -1097,7 +1097,7 @@ static int x86_pmu_handle_irq(struct pt_regs *regs) hwc = &event->hw; val = x86_perf_event_update(event); - if (val & (1ULL << (x86_pmu.event_bits - 1))) + if (val & (1ULL << (x86_pmu.cntval_bits - 1))) continue; /* @@ -1401,46 +1401,46 @@ void __init init_hw_perf_events(void) if (x86_pmu.quirks) x86_pmu.quirks(); - if (x86_pmu.num_events > X86_PMC_MAX_GENERIC) { + if (x86_pmu.num_counters > X86_PMC_MAX_GENERIC) { WARN(1, KERN_ERR "hw perf events %d > max(%d), clipping!", - x86_pmu.num_events, X86_PMC_MAX_GENERIC); - x86_pmu.num_events = X86_PMC_MAX_GENERIC; + x86_pmu.num_counters, X86_PMC_MAX_GENERIC); + x86_pmu.num_counters = X86_PMC_MAX_GENERIC; } - x86_pmu.intel_ctrl = (1 << x86_pmu.num_events) - 1; - perf_max_events = x86_pmu.num_events; + x86_pmu.intel_ctrl = (1 << x86_pmu.num_counters) - 1; + perf_max_events = x86_pmu.num_counters; - if (x86_pmu.num_events_fixed > X86_PMC_MAX_FIXED) { + if (x86_pmu.num_counters_fixed > X86_PMC_MAX_FIXED) { WARN(1, KERN_ERR "hw perf events fixed %d > max(%d), clipping!", - x86_pmu.num_events_fixed, X86_PMC_MAX_FIXED); - x86_pmu.num_events_fixed = X86_PMC_MAX_FIXED; + x86_pmu.num_counters_fixed, X86_PMC_MAX_FIXED); + x86_pmu.num_counters_fixed = X86_PMC_MAX_FIXED; } x86_pmu.intel_ctrl |= - ((1LL << x86_pmu.num_events_fixed)-1) << X86_PMC_IDX_FIXED; + ((1LL << x86_pmu.num_counters_fixed)-1) << X86_PMC_IDX_FIXED; perf_events_lapic_init(); register_die_notifier(&perf_event_nmi_notifier); unconstrained = (struct event_constraint) - __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_events) - 1, - 0, x86_pmu.num_events); + __EVENT_CONSTRAINT(0, (1ULL << x86_pmu.num_counters) - 1, + 0, x86_pmu.num_counters); if (x86_pmu.event_constraints) { for_each_event_constraint(c, x86_pmu.event_constraints) { if (c->cmask != INTEL_ARCH_FIXED_MASK) continue; - c->idxmsk64 |= (1ULL << x86_pmu.num_events) - 1; - c->weight += x86_pmu.num_events; + c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; + c->weight += x86_pmu.num_counters; } } pr_info("... version: %d\n", x86_pmu.version); - pr_info("... bit width: %d\n", x86_pmu.event_bits); - pr_info("... generic registers: %d\n", x86_pmu.num_events); - pr_info("... value mask: %016Lx\n", x86_pmu.event_mask); + pr_info("... bit width: %d\n", x86_pmu.cntval_bits); + pr_info("... generic registers: %d\n", x86_pmu.num_counters); + pr_info("... value mask: %016Lx\n", x86_pmu.cntval_mask); pr_info("... max period: %016Lx\n", x86_pmu.max_period); - pr_info("... fixed-purpose events: %d\n", x86_pmu.num_events_fixed); + pr_info("... fixed-purpose events: %d\n", x86_pmu.num_counters_fixed); pr_info("... event mask: %016Lx\n", x86_pmu.intel_ctrl); perf_cpu_notifier(x86_pmu_notifier); diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 285623b..7753a5c 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -165,7 +165,7 @@ static void amd_put_event_constraints(struct cpu_hw_events *cpuc, * be removed on one CPU at a time AND PMU is disabled * when we come here */ - for (i = 0; i < x86_pmu.num_events; i++) { + for (i = 0; i < x86_pmu.num_counters; i++) { if (nb->owners[i] == event) { cmpxchg(nb->owners+i, event, NULL); break; @@ -215,7 +215,7 @@ amd_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event) struct hw_perf_event *hwc = &event->hw; struct amd_nb *nb = cpuc->amd_nb; struct perf_event *old = NULL; - int max = x86_pmu.num_events; + int max = x86_pmu.num_counters; int i, j, k = -1; /* @@ -293,7 +293,7 @@ static struct amd_nb *amd_alloc_nb(int cpu, int nb_id) /* * initialize all possible NB constraints */ - for (i = 0; i < x86_pmu.num_events; i++) { + for (i = 0; i < x86_pmu.num_counters; i++) { __set_bit(i, nb->event_constraints[i].idxmsk); nb->event_constraints[i].weight = 1; } @@ -385,9 +385,9 @@ static __initconst struct x86_pmu amd_pmu = { .event_map = amd_pmu_event_map, .raw_event = amd_pmu_raw_event, .max_events = ARRAY_SIZE(amd_perfmon_event_map), - .num_events = 4, - .event_bits = 48, - .event_mask = (1ULL << 48) - 1, + .num_counters = 4, + .cntval_bits = 48, + .cntval_mask = (1ULL << 48) - 1, .apic = 1, /* use highest bit to detect overflow */ .max_period = (1ULL << 47) - 1, diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 676aac2..cc4d90a 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -653,20 +653,20 @@ static void intel_pmu_reset(void) unsigned long flags; int idx; - if (!x86_pmu.num_events) + if (!x86_pmu.num_counters) return; local_irq_save(flags); printk("clearing PMU state on CPU#%d\n", smp_processor_id()); - for (idx = 0; idx < x86_pmu.num_events; idx++) { + for (idx = 0; idx < x86_pmu.num_counters; idx++) { checking_wrmsrl(x86_pmu.eventsel + idx, 0ull); checking_wrmsrl(x86_pmu.perfctr + idx, 0ull); } - for (idx = 0; idx < x86_pmu.num_events_fixed; idx++) { + for (idx = 0; idx < x86_pmu.num_counters_fixed; idx++) checking_wrmsrl(MSR_ARCH_PERFMON_FIXED_CTR0 + idx, 0ull); - } + if (ds) ds->bts_index = ds->bts_buffer_base; @@ -901,16 +901,16 @@ static __init int intel_pmu_init(void) x86_pmu = intel_pmu; x86_pmu.version = version; - x86_pmu.num_events = eax.split.num_events; - x86_pmu.event_bits = eax.split.bit_width; - x86_pmu.event_mask = (1ULL << eax.split.bit_width) - 1; + x86_pmu.num_counters = eax.split.num_counters; + x86_pmu.cntval_bits = eax.split.bit_width; + x86_pmu.cntval_mask = (1ULL << eax.split.bit_width) - 1; /* * Quirk: v2 perfmon does not report fixed-purpose events, so * assume at least 3 events: */ if (version > 1) - x86_pmu.num_events_fixed = max((int)edx.split.num_events_fixed, 3); + x86_pmu.num_counters_fixed = max((int)edx.split.num_counters_fixed, 3); /* * v2 and above have a perf capabilities MSR diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index 0d1be36..4139100 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -483,7 +483,7 @@ static void p4_pmu_disable_all(void) struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int idx; - for (idx = 0; idx < x86_pmu.num_events; idx++) { + for (idx = 0; idx < x86_pmu.num_counters; idx++) { struct perf_event *event = cpuc->events[idx]; if (!test_bit(idx, cpuc->active_mask)) continue; @@ -540,7 +540,7 @@ static void p4_pmu_enable_all(int added) struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); int idx; - for (idx = 0; idx < x86_pmu.num_events; idx++) { + for (idx = 0; idx < x86_pmu.num_counters; idx++) { struct perf_event *event = cpuc->events[idx]; if (!test_bit(idx, cpuc->active_mask)) continue; @@ -562,7 +562,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) cpuc = &__get_cpu_var(cpu_hw_events); - for (idx = 0; idx < x86_pmu.num_events; idx++) { + for (idx = 0; idx < x86_pmu.num_counters; idx++) { if (!test_bit(idx, cpuc->active_mask)) continue; @@ -579,7 +579,7 @@ static int p4_pmu_handle_irq(struct pt_regs *regs) p4_pmu_clear_cccr_ovf(hwc); val = x86_perf_event_update(event); - if (val & (1ULL << (x86_pmu.event_bits - 1))) + if (val & (1ULL << (x86_pmu.cntval_bits - 1))) continue; /* @@ -794,10 +794,10 @@ static __initconst struct x86_pmu p4_pmu = { * though leave it restricted at moment assuming * HT is on */ - .num_events = ARCH_P4_MAX_CCCR, + .num_counters = ARCH_P4_MAX_CCCR, .apic = 1, - .event_bits = 40, - .event_mask = (1ULL << 40) - 1, + .cntval_bits = 40, + .cntval_mask = (1ULL << 40) - 1, .max_period = (1ULL << 39) - 1, .hw_config = p4_hw_config, .schedule_events = p4_pmu_schedule_events, diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index 877182c..b26fbc7e 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c @@ -119,7 +119,7 @@ static __initconst struct x86_pmu p6_pmu = { .apic = 1, .max_period = (1ULL << 31) - 1, .version = 0, - .num_events = 2, + .num_counters = 2, /* * Events have 40 bits implemented. However they are designed such * that bits [32-39] are sign extensions of bit 31. As such the @@ -127,8 +127,8 @@ static __initconst struct x86_pmu p6_pmu = { * * See IA-32 Intel Architecture Software developer manual Vol 3B */ - .event_bits = 32, - .event_mask = (1ULL << 32) - 1, + .cntval_bits = 32, + .cntval_mask = (1ULL << 32) - 1, .get_event_constraints = x86_get_event_constraints, .event_constraints = p6_event_constraints, }; diff --git a/arch/x86/oprofile/op_model_ppro.c b/arch/x86/oprofile/op_model_ppro.c index 2bf90fa..c8abc4d 100644 --- a/arch/x86/oprofile/op_model_ppro.c +++ b/arch/x86/oprofile/op_model_ppro.c @@ -239,11 +239,11 @@ static void arch_perfmon_setup_counters(void) if (eax.split.version_id == 0 && current_cpu_data.x86 == 6 && current_cpu_data.x86_model == 15) { eax.split.version_id = 2; - eax.split.num_events = 2; + eax.split.num_counters = 2; eax.split.bit_width = 40; } - num_counters = eax.split.num_events; + num_counters = eax.split.num_counters; op_arch_perfmon_spec.num_counters = num_counters; op_arch_perfmon_spec.num_controls = num_counters; -- cgit v1.1 From a098f4484bc7dae23f5b62360954007b99b64600 Mon Sep 17 00:00:00 2001 From: Robert Richter Date: Tue, 30 Mar 2010 11:28:21 +0200 Subject: perf, x86: implement ARCH_PERFMON_EVENTSEL bit masks ARCH_PERFMON_EVENTSEL bit masks are often used in the kernel. This patch adds macros for the bit masks and removes local defines. The function intel_pmu_raw_event() becomes x86_pmu_raw_event() which is generic for x86 models and same also for p6. Duplicate code is removed. Signed-off-by: Robert Richter Signed-off-by: Peter Zijlstra LKML-Reference: <20100330092821.GH11907@erda.amd.com> Signed-off-by: Ingo Molnar --- arch/x86/include/asm/perf_event.h | 58 +++++++++++++++------------------- arch/x86/kernel/cpu/perf_event.c | 19 +++++++++-- arch/x86/kernel/cpu/perf_event_amd.c | 15 +-------- arch/x86/kernel/cpu/perf_event_intel.c | 22 ++----------- arch/x86/kernel/cpu/perf_event_p6.c | 20 +----------- 5 files changed, 45 insertions(+), 89 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index 987bf67..f6d43db 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -18,39 +18,31 @@ #define MSR_ARCH_PERFMON_EVENTSEL0 0x186 #define MSR_ARCH_PERFMON_EVENTSEL1 0x187 -#define ARCH_PERFMON_EVENTSEL_ENABLE (1 << 22) -#define ARCH_PERFMON_EVENTSEL_ANY (1 << 21) -#define ARCH_PERFMON_EVENTSEL_INT (1 << 20) -#define ARCH_PERFMON_EVENTSEL_OS (1 << 17) -#define ARCH_PERFMON_EVENTSEL_USR (1 << 16) - -/* - * Includes eventsel and unit mask as well: - */ - - -#define INTEL_ARCH_EVTSEL_MASK 0x000000FFULL -#define INTEL_ARCH_UNIT_MASK 0x0000FF00ULL -#define INTEL_ARCH_EDGE_MASK 0x00040000ULL -#define INTEL_ARCH_INV_MASK 0x00800000ULL -#define INTEL_ARCH_CNT_MASK 0xFF000000ULL -#define INTEL_ARCH_EVENT_MASK (INTEL_ARCH_UNIT_MASK|INTEL_ARCH_EVTSEL_MASK) - -/* - * filter mask to validate fixed counter events. - * the following filters disqualify for fixed counters: - * - inv - * - edge - * - cnt-mask - * The other filters are supported by fixed counters. - * The any-thread option is supported starting with v3. - */ -#define INTEL_ARCH_FIXED_MASK \ - (INTEL_ARCH_CNT_MASK| \ - INTEL_ARCH_INV_MASK| \ - INTEL_ARCH_EDGE_MASK|\ - INTEL_ARCH_UNIT_MASK|\ - INTEL_ARCH_EVENT_MASK) +#define ARCH_PERFMON_EVENTSEL_EVENT 0x000000FFULL +#define ARCH_PERFMON_EVENTSEL_UMASK 0x0000FF00ULL +#define ARCH_PERFMON_EVENTSEL_USR (1ULL << 16) +#define ARCH_PERFMON_EVENTSEL_OS (1ULL << 17) +#define ARCH_PERFMON_EVENTSEL_EDGE (1ULL << 18) +#define ARCH_PERFMON_EVENTSEL_INT (1ULL << 20) +#define ARCH_PERFMON_EVENTSEL_ANY (1ULL << 21) +#define ARCH_PERFMON_EVENTSEL_ENABLE (1ULL << 22) +#define ARCH_PERFMON_EVENTSEL_INV (1ULL << 23) +#define ARCH_PERFMON_EVENTSEL_CMASK 0xFF000000ULL + +#define AMD64_EVENTSEL_EVENT \ + (ARCH_PERFMON_EVENTSEL_EVENT | (0x0FULL << 32)) +#define INTEL_ARCH_EVENT_MASK \ + (ARCH_PERFMON_EVENTSEL_UMASK | ARCH_PERFMON_EVENTSEL_EVENT) + +#define X86_RAW_EVENT_MASK \ + (ARCH_PERFMON_EVENTSEL_EVENT | \ + ARCH_PERFMON_EVENTSEL_UMASK | \ + ARCH_PERFMON_EVENTSEL_EDGE | \ + ARCH_PERFMON_EVENTSEL_INV | \ + ARCH_PERFMON_EVENTSEL_CMASK) +#define AMD64_RAW_EVENT_MASK \ + (X86_RAW_EVENT_MASK | \ + AMD64_EVENTSEL_EVENT) #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_SEL 0x3c #define ARCH_PERFMON_UNHALTED_CORE_CYCLES_UMASK (0x00 << 8) diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 9daaa1e..1dd42c1 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -143,13 +143,21 @@ struct cpu_hw_events { * Constraint on the Event code. */ #define INTEL_EVENT_CONSTRAINT(c, n) \ - EVENT_CONSTRAINT(c, n, INTEL_ARCH_EVTSEL_MASK) + EVENT_CONSTRAINT(c, n, ARCH_PERFMON_EVENTSEL_EVENT) /* * Constraint on the Event code + UMask + fixed-mask + * + * filter mask to validate fixed counter events. + * the following filters disqualify for fixed counters: + * - inv + * - edge + * - cnt-mask + * The other filters are supported by fixed counters. + * The any-thread option is supported starting with v3. */ #define FIXED_EVENT_CONSTRAINT(c, n) \ - EVENT_CONSTRAINT(c, (1ULL << (32+n)), INTEL_ARCH_FIXED_MASK) + EVENT_CONSTRAINT(c, (1ULL << (32+n)), X86_RAW_EVENT_MASK) /* * Constraint on the Event code + UMask @@ -437,6 +445,11 @@ static int x86_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc return 0; } +static u64 x86_pmu_raw_event(u64 hw_event) +{ + return hw_event & X86_RAW_EVENT_MASK; +} + /* * Setup the hardware configuration for a given attr_type */ @@ -1427,7 +1440,7 @@ void __init init_hw_perf_events(void) if (x86_pmu.event_constraints) { for_each_event_constraint(c, x86_pmu.event_constraints) { - if (c->cmask != INTEL_ARCH_FIXED_MASK) + if (c->cmask != X86_RAW_EVENT_MASK) continue; c->idxmsk64 |= (1ULL << x86_pmu.num_counters) - 1; diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 7753a5c..37e9517 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -113,20 +113,7 @@ static u64 amd_pmu_event_map(int hw_event) static u64 amd_pmu_raw_event(u64 hw_event) { -#define K7_EVNTSEL_EVENT_MASK 0xF000000FFULL -#define K7_EVNTSEL_UNIT_MASK 0x00000FF00ULL -#define K7_EVNTSEL_EDGE_MASK 0x000040000ULL -#define K7_EVNTSEL_INV_MASK 0x000800000ULL -#define K7_EVNTSEL_REG_MASK 0x0FF000000ULL - -#define K7_EVNTSEL_MASK \ - (K7_EVNTSEL_EVENT_MASK | \ - K7_EVNTSEL_UNIT_MASK | \ - K7_EVNTSEL_EDGE_MASK | \ - K7_EVNTSEL_INV_MASK | \ - K7_EVNTSEL_REG_MASK) - - return hw_event & K7_EVNTSEL_MASK; + return hw_event & AMD64_RAW_EVENT_MASK; } /* diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index cc4d90a..dfdd6f9 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -452,24 +452,6 @@ static __initconst u64 atom_hw_cache_event_ids }, }; -static u64 intel_pmu_raw_event(u64 hw_event) -{ -#define CORE_EVNTSEL_EVENT_MASK 0x000000FFULL -#define CORE_EVNTSEL_UNIT_MASK 0x0000FF00ULL -#define CORE_EVNTSEL_EDGE_MASK 0x00040000ULL -#define CORE_EVNTSEL_INV_MASK 0x00800000ULL -#define CORE_EVNTSEL_REG_MASK 0xFF000000ULL - -#define CORE_EVNTSEL_MASK \ - (INTEL_ARCH_EVTSEL_MASK | \ - INTEL_ARCH_UNIT_MASK | \ - INTEL_ARCH_EDGE_MASK | \ - INTEL_ARCH_INV_MASK | \ - INTEL_ARCH_CNT_MASK) - - return hw_event & CORE_EVNTSEL_MASK; -} - static void intel_pmu_disable_all(void) { struct cpu_hw_events *cpuc = &__get_cpu_var(cpu_hw_events); @@ -788,7 +770,7 @@ static __initconst struct x86_pmu core_pmu = { .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, .perfctr = MSR_ARCH_PERFMON_PERFCTR0, .event_map = intel_pmu_event_map, - .raw_event = intel_pmu_raw_event, + .raw_event = x86_pmu_raw_event, .max_events = ARRAY_SIZE(intel_perfmon_event_map), .apic = 1, /* @@ -827,7 +809,7 @@ static __initconst struct x86_pmu intel_pmu = { .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, .perfctr = MSR_ARCH_PERFMON_PERFCTR0, .event_map = intel_pmu_event_map, - .raw_event = intel_pmu_raw_event, + .raw_event = x86_pmu_raw_event, .max_events = ARRAY_SIZE(intel_perfmon_event_map), .apic = 1, /* diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index b26fbc7e..03c139a 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c @@ -27,24 +27,6 @@ static u64 p6_pmu_event_map(int hw_event) */ #define P6_NOP_EVENT 0x0000002EULL -static u64 p6_pmu_raw_event(u64 hw_event) -{ -#define P6_EVNTSEL_EVENT_MASK 0x000000FFULL -#define P6_EVNTSEL_UNIT_MASK 0x0000FF00ULL -#define P6_EVNTSEL_EDGE_MASK 0x00040000ULL -#define P6_EVNTSEL_INV_MASK 0x00800000ULL -#define P6_EVNTSEL_REG_MASK 0xFF000000ULL - -#define P6_EVNTSEL_MASK \ - (P6_EVNTSEL_EVENT_MASK | \ - P6_EVNTSEL_UNIT_MASK | \ - P6_EVNTSEL_EDGE_MASK | \ - P6_EVNTSEL_INV_MASK | \ - P6_EVNTSEL_REG_MASK) - - return hw_event & P6_EVNTSEL_MASK; -} - static struct event_constraint p6_event_constraints[] = { INTEL_EVENT_CONSTRAINT(0xc1, 0x1), /* FLOPS */ @@ -114,7 +96,7 @@ static __initconst struct x86_pmu p6_pmu = { .eventsel = MSR_P6_EVNTSEL0, .perfctr = MSR_P6_PERFCTR0, .event_map = p6_pmu_event_map, - .raw_event = p6_pmu_raw_event, + .raw_event = x86_pmu_raw_event, .max_events = ARRAY_SIZE(p6_perfmon_event_map), .apic = 1, .max_period = (1ULL << 31) - 1, -- cgit v1.1 From b4cdc5c264b35c67007800dec3928e9547a9d70b Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 30 Mar 2010 17:00:06 +0200 Subject: perf, x86: Fix up the ANY flag stuff Stephane noticed that the ANY flag was in generic arch code, and Cyrill reported that it broke the P4 code. Solve this by merging x86_pmu::raw_event into x86_pmu::hw_config and provide intel_pmu and amd_pmu specific versions of this callback. The intel_pmu one deals with the ANY flag, the amd_pmu adds the few extra event bits AMD64 has. Reported-by: Stephane Eranian Reported-by: Cyrill Gorcunov Acked-by: Robert Richter Acked-by: Cyrill Gorcunov Acked-by: Stephane Eranian Signed-off-by: Peter Zijlstra LKML-Reference: <1269968113.5258.442.camel@laptop> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 35 ++++++++++------------------- arch/x86/kernel/cpu/perf_event_amd.c | 17 +++++++++++---- arch/x86/kernel/cpu/perf_event_intel.c | 30 +++++++++++++++++++++---- arch/x86/kernel/cpu/perf_event_p4.c | 40 +++++++++++++++++++--------------- arch/x86/kernel/cpu/perf_event_p6.c | 3 +-- 5 files changed, 74 insertions(+), 51 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 1dd42c1..65e9c5e 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -196,12 +196,11 @@ struct x86_pmu { void (*enable_all)(int added); void (*enable)(struct perf_event *); void (*disable)(struct perf_event *); - int (*hw_config)(struct perf_event_attr *attr, struct hw_perf_event *hwc); + int (*hw_config)(struct perf_event *event); int (*schedule_events)(struct cpu_hw_events *cpuc, int n, int *assign); unsigned eventsel; unsigned perfctr; u64 (*event_map)(int); - u64 (*raw_event)(u64); int max_events; int num_counters; int num_counters_fixed; @@ -426,28 +425,26 @@ set_ext_hw_attr(struct hw_perf_event *hwc, struct perf_event_attr *attr) return 0; } -static int x86_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc) +static int x86_pmu_hw_config(struct perf_event *event) { /* * Generate PMC IRQs: * (keep 'enabled' bit clear for now) */ - hwc->config = ARCH_PERFMON_EVENTSEL_INT; + event->hw.config = ARCH_PERFMON_EVENTSEL_INT; /* * Count user and OS events unless requested not to */ - if (!attr->exclude_user) - hwc->config |= ARCH_PERFMON_EVENTSEL_USR; - if (!attr->exclude_kernel) - hwc->config |= ARCH_PERFMON_EVENTSEL_OS; + if (!event->attr.exclude_user) + event->hw.config |= ARCH_PERFMON_EVENTSEL_USR; + if (!event->attr.exclude_kernel) + event->hw.config |= ARCH_PERFMON_EVENTSEL_OS; - return 0; -} + if (event->attr.type == PERF_TYPE_RAW) + event->hw.config |= event->attr.config & X86_RAW_EVENT_MASK; -static u64 x86_pmu_raw_event(u64 hw_event) -{ - return hw_event & X86_RAW_EVENT_MASK; + return 0; } /* @@ -489,7 +486,7 @@ static int __hw_perf_event_init(struct perf_event *event) hwc->last_tag = ~0ULL; /* Processor specifics */ - err = x86_pmu.hw_config(attr, hwc); + err = x86_pmu.hw_config(event); if (err) return err; @@ -508,16 +505,8 @@ static int __hw_perf_event_init(struct perf_event *event) return -EOPNOTSUPP; } - /* - * Raw hw_event type provide the config in the hw_event structure - */ - if (attr->type == PERF_TYPE_RAW) { - hwc->config |= x86_pmu.raw_event(attr->config); - if ((hwc->config & ARCH_PERFMON_EVENTSEL_ANY) && - perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) - return -EACCES; + if (attr->type == PERF_TYPE_RAW) return 0; - } if (attr->type == PERF_TYPE_HW_CACHE) return set_ext_hw_attr(hwc, attr); diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index 37e9517..bbd7339 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -111,9 +111,19 @@ static u64 amd_pmu_event_map(int hw_event) return amd_perfmon_event_map[hw_event]; } -static u64 amd_pmu_raw_event(u64 hw_event) +static int amd_pmu_hw_config(struct perf_event *event) { - return hw_event & AMD64_RAW_EVENT_MASK; + int ret = x86_pmu_hw_config(event); + + if (ret) + return ret; + + if (event->attr.type != PERF_TYPE_RAW) + return 0; + + event->hw.config |= event->attr.config & AMD64_RAW_EVENT_MASK; + + return 0; } /* @@ -365,12 +375,11 @@ static __initconst struct x86_pmu amd_pmu = { .enable_all = x86_pmu_enable_all, .enable = x86_pmu_enable_event, .disable = x86_pmu_disable_event, - .hw_config = x86_hw_config, + .hw_config = amd_pmu_hw_config, .schedule_events = x86_schedule_events, .eventsel = MSR_K7_EVNTSEL0, .perfctr = MSR_K7_PERFCTR0, .event_map = amd_pmu_event_map, - .raw_event = amd_pmu_raw_event, .max_events = ARRAY_SIZE(amd_perfmon_event_map), .num_counters = 4, .cntval_bits = 48, diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index dfdd6f9..30bf10c 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -758,6 +758,30 @@ intel_get_event_constraints(struct cpu_hw_events *cpuc, struct perf_event *event return x86_get_event_constraints(cpuc, event); } +static int intel_pmu_hw_config(struct perf_event *event) +{ + int ret = x86_pmu_hw_config(event); + + if (ret) + return ret; + + if (event->attr.type != PERF_TYPE_RAW) + return 0; + + if (!(event->attr.config & ARCH_PERFMON_EVENTSEL_ANY)) + return 0; + + if (x86_pmu.version < 3) + return -EINVAL; + + if (perf_paranoid_cpu() && !capable(CAP_SYS_ADMIN)) + return -EACCES; + + event->hw.config |= ARCH_PERFMON_EVENTSEL_ANY; + + return 0; +} + static __initconst struct x86_pmu core_pmu = { .name = "core", .handle_irq = x86_pmu_handle_irq, @@ -765,12 +789,11 @@ static __initconst struct x86_pmu core_pmu = { .enable_all = x86_pmu_enable_all, .enable = x86_pmu_enable_event, .disable = x86_pmu_disable_event, - .hw_config = x86_hw_config, + .hw_config = x86_pmu_hw_config, .schedule_events = x86_schedule_events, .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, .perfctr = MSR_ARCH_PERFMON_PERFCTR0, .event_map = intel_pmu_event_map, - .raw_event = x86_pmu_raw_event, .max_events = ARRAY_SIZE(intel_perfmon_event_map), .apic = 1, /* @@ -804,12 +827,11 @@ static __initconst struct x86_pmu intel_pmu = { .enable_all = intel_pmu_enable_all, .enable = intel_pmu_enable_event, .disable = intel_pmu_disable_event, - .hw_config = x86_hw_config, + .hw_config = intel_pmu_hw_config, .schedule_events = x86_schedule_events, .eventsel = MSR_ARCH_PERFMON_EVENTSEL0, .perfctr = MSR_ARCH_PERFMON_PERFCTR0, .event_map = intel_pmu_event_map, - .raw_event = x86_pmu_raw_event, .max_events = ARRAY_SIZE(intel_perfmon_event_map), .apic = 1, /* diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index 4139100..acd237d 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -419,20 +419,7 @@ static u64 p4_pmu_event_map(int hw_event) return config; } -/* - * We don't control raw events so it's up to the caller - * to pass sane values (and we don't count the thread number - * on HT machine but allow HT-compatible specifics to be - * passed on) - */ -static u64 p4_pmu_raw_event(u64 hw_event) -{ - return hw_event & - (p4_config_pack_escr(P4_ESCR_MASK_HT) | - p4_config_pack_cccr(P4_CCCR_MASK_HT)); -} - -static int p4_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc) +static int p4_hw_config(struct perf_event *event) { int cpu = raw_smp_processor_id(); u32 escr, cccr; @@ -444,11 +431,29 @@ static int p4_hw_config(struct perf_event_attr *attr, struct hw_perf_event *hwc) */ cccr = p4_default_cccr_conf(cpu); - escr = p4_default_escr_conf(cpu, attr->exclude_kernel, attr->exclude_user); - hwc->config = p4_config_pack_escr(escr) | p4_config_pack_cccr(cccr); + escr = p4_default_escr_conf(cpu, event->attr.exclude_kernel, + event->attr.exclude_user); + event->hw.config = p4_config_pack_escr(escr) | + p4_config_pack_cccr(cccr); if (p4_ht_active() && p4_ht_thread(cpu)) - hwc->config = p4_set_ht_bit(hwc->config); + event->hw.config = p4_set_ht_bit(event->hw.config); + + if (event->attr.type != PERF_TYPE_RAW) + return 0; + + /* + * We don't control raw events so it's up to the caller + * to pass sane values (and we don't count the thread number + * on HT machine but allow HT-compatible specifics to be + * passed on) + * + * XXX: HT wide things should check perf_paranoid_cpu() && + * CAP_SYS_ADMIN + */ + event->hw.config |= event->attr.config & + (p4_config_pack_escr(P4_ESCR_MASK_HT) | + p4_config_pack_cccr(P4_CCCR_MASK_HT)); return 0; } @@ -785,7 +790,6 @@ static __initconst struct x86_pmu p4_pmu = { .eventsel = MSR_P4_BPU_CCCR0, .perfctr = MSR_P4_BPU_PERFCTR0, .event_map = p4_pmu_event_map, - .raw_event = p4_pmu_raw_event, .max_events = ARRAY_SIZE(p4_general_events), .get_event_constraints = x86_get_event_constraints, /* diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index 03c139a..9123e8e 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c @@ -91,12 +91,11 @@ static __initconst struct x86_pmu p6_pmu = { .enable_all = p6_pmu_enable_all, .enable = p6_pmu_enable_event, .disable = p6_pmu_disable_event, - .hw_config = x86_hw_config, + .hw_config = x86_pmu_hw_config, .schedule_events = x86_schedule_events, .eventsel = MSR_P6_EVNTSEL0, .perfctr = MSR_P6_PERFCTR0, .event_map = p6_pmu_event_map, - .raw_event = x86_pmu_raw_event, .max_events = ARRAY_SIZE(p6_perfmon_event_map), .apic = 1, .max_period = (1ULL << 31) - 1, -- cgit v1.1 From caaa8be3b6707cb9664e573a28b00f845ce9f32e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 29 Mar 2010 13:09:53 +0200 Subject: perf, x86: Fix __initconst vs const All variables that have __initconst should also be const. Suggested-by: Stephen Rothwell Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_amd.c | 4 ++-- arch/x86/kernel/cpu/perf_event_intel.c | 12 ++++++------ arch/x86/kernel/cpu/perf_event_p4.c | 4 ++-- arch/x86/kernel/cpu/perf_event_p6.c | 2 +- 4 files changed, 11 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_amd.c b/arch/x86/kernel/cpu/perf_event_amd.c index bbd7339..611df11 100644 --- a/arch/x86/kernel/cpu/perf_event_amd.c +++ b/arch/x86/kernel/cpu/perf_event_amd.c @@ -2,7 +2,7 @@ static DEFINE_RAW_SPINLOCK(amd_nb_lock); -static __initconst u64 amd_hw_cache_event_ids +static __initconst const u64 amd_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = @@ -368,7 +368,7 @@ static void amd_pmu_cpu_dead(int cpu) raw_spin_unlock(&amd_nb_lock); } -static __initconst struct x86_pmu amd_pmu = { +static __initconst const struct x86_pmu amd_pmu = { .name = "AMD", .handle_irq = x86_pmu_handle_irq, .disable_all = x86_pmu_disable_all, diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 30bf10c..1957e3f 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -88,7 +88,7 @@ static u64 intel_pmu_event_map(int hw_event) return intel_perfmon_event_map[hw_event]; } -static __initconst u64 westmere_hw_cache_event_ids +static __initconst const u64 westmere_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = @@ -179,7 +179,7 @@ static __initconst u64 westmere_hw_cache_event_ids }, }; -static __initconst u64 nehalem_hw_cache_event_ids +static __initconst const u64 nehalem_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = @@ -270,7 +270,7 @@ static __initconst u64 nehalem_hw_cache_event_ids }, }; -static __initconst u64 core2_hw_cache_event_ids +static __initconst const u64 core2_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = @@ -361,7 +361,7 @@ static __initconst u64 core2_hw_cache_event_ids }, }; -static __initconst u64 atom_hw_cache_event_ids +static __initconst const u64 atom_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = @@ -782,7 +782,7 @@ static int intel_pmu_hw_config(struct perf_event *event) return 0; } -static __initconst struct x86_pmu core_pmu = { +static __initconst const struct x86_pmu core_pmu = { .name = "core", .handle_irq = x86_pmu_handle_irq, .disable_all = x86_pmu_disable_all, @@ -820,7 +820,7 @@ static void intel_pmu_cpu_dying(int cpu) fini_debug_store_on_cpu(cpu); } -static __initconst struct x86_pmu intel_pmu = { +static __initconst const struct x86_pmu intel_pmu = { .name = "Intel", .handle_irq = intel_pmu_handle_irq, .disable_all = intel_pmu_disable_all, diff --git a/arch/x86/kernel/cpu/perf_event_p4.c b/arch/x86/kernel/cpu/perf_event_p4.c index acd237d..15367cc 100644 --- a/arch/x86/kernel/cpu/perf_event_p4.c +++ b/arch/x86/kernel/cpu/perf_event_p4.c @@ -287,7 +287,7 @@ static struct p4_event_bind p4_event_bind_map[] = { p4_config_pack_cccr(cache_event | \ P4_CCCR_ESEL(P4_OPCODE_ESEL(P4_OPCODE(event)))) -static __initconst u64 p4_hw_cache_event_ids +static __initconst const u64 p4_hw_cache_event_ids [PERF_COUNT_HW_CACHE_MAX] [PERF_COUNT_HW_CACHE_OP_MAX] [PERF_COUNT_HW_CACHE_RESULT_MAX] = @@ -780,7 +780,7 @@ done: return num ? -ENOSPC : 0; } -static __initconst struct x86_pmu p4_pmu = { +static __initconst const struct x86_pmu p4_pmu = { .name = "Netburst P4/Xeon", .handle_irq = p4_pmu_handle_irq, .disable_all = p4_pmu_disable_all, diff --git a/arch/x86/kernel/cpu/perf_event_p6.c b/arch/x86/kernel/cpu/perf_event_p6.c index 9123e8e..34ba07b 100644 --- a/arch/x86/kernel/cpu/perf_event_p6.c +++ b/arch/x86/kernel/cpu/perf_event_p6.c @@ -84,7 +84,7 @@ static void p6_pmu_enable_event(struct perf_event *event) (void)checking_wrmsrl(hwc->config_base + hwc->idx, val); } -static __initconst struct x86_pmu p6_pmu = { +static __initconst const struct x86_pmu p6_pmu = { .name = "p6", .handle_irq = x86_pmu_handle_irq, .disable_all = p6_pmu_disable_all, -- cgit v1.1 From 40b91cd10f000b4c4934e48e2e5c0bec66def144 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Mon, 29 Mar 2010 16:37:17 +0200 Subject: perf, x86: Add Nehalem programming quirk to Westmere According to the Xeon-5600 errata the Westmere suffers the same PMU programming bug as the original Nehalem did. Signed-off-by: Peter Zijlstra LKML-Reference: Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel.c | 2 ++ 1 file changed, 2 insertions(+) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event_intel.c b/arch/x86/kernel/cpu/perf_event_intel.c index 1957e3f..f168b40 100644 --- a/arch/x86/kernel/cpu/perf_event_intel.c +++ b/arch/x86/kernel/cpu/perf_event_intel.c @@ -488,6 +488,7 @@ static void intel_pmu_enable_all(int added) * Workaround for: * Intel Errata AAK100 (model 26) * Intel Errata AAP53 (model 30) + * Intel Errata BD53 (model 44) * * These chips need to be 'reset' when adding counters by programming * the magic three (non counting) events 0x4300D2, 0x4300B1 and 0x4300B5 @@ -980,6 +981,7 @@ static __init int intel_pmu_init(void) intel_pmu_lbr_init_nhm(); x86_pmu.event_constraints = intel_westmere_event_constraints; + x86_pmu.enable_all = intel_pmu_nhm_enable_all; pr_cont("Westmere events, "); break; -- cgit v1.1 From 6f4dee06fbf0133917f3d76fa3fb50e18b10c1f5 Mon Sep 17 00:00:00 2001 From: Frederic Weisbecker Date: Thu, 18 Mar 2010 23:47:01 +0100 Subject: perf: Drop the frame reliablity check It is useless now that we have a pure stack frame walker, as given addr are always reliable. Signed-off-by: Frederic Weisbecker Cc: Peter Zijlstra Cc: Stephane Eranian Cc: Arnaldo Carvalho de Melo Cc: Paul Mackerras Cc: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 65e9c5e..353a174 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1602,8 +1602,7 @@ static void backtrace_address(void *data, unsigned long addr, int reliable) { struct perf_callchain_entry *entry = data; - if (reliable) - callchain_store(entry, addr); + callchain_store(entry, addr); } static const struct stacktrace_ops backtrace_ops = { -- cgit v1.1 From 39447b386c846bbf1c56f6403c5282837486200f Mon Sep 17 00:00:00 2001 From: "Zhang, Yanmin" Date: Mon, 19 Apr 2010 13:32:41 +0800 Subject: perf: Enhance perf to allow for guest statistic collection from host Below patch introduces perf_guest_info_callbacks and related register/unregister functions. Add more PERF_RECORD_MISC_XXX bits meaning guest kernel and guest user space. Signed-off-by: Zhang Yanmin Signed-off-by: Avi Kivity --- arch/x86/include/asm/perf_event.h | 15 ++++----------- arch/x86/kernel/cpu/perf_event.c | 31 +++++++++++++++++++++++++++++++ 2 files changed, 35 insertions(+), 11 deletions(-) (limited to 'arch/x86') diff --git a/arch/x86/include/asm/perf_event.h b/arch/x86/include/asm/perf_event.h index f6d43db..254883d 100644 --- a/arch/x86/include/asm/perf_event.h +++ b/arch/x86/include/asm/perf_event.h @@ -135,17 +135,10 @@ extern void perf_events_lapic_init(void); */ #define PERF_EFLAGS_EXACT (1UL << 3) -#define perf_misc_flags(regs) \ -({ int misc = 0; \ - if (user_mode(regs)) \ - misc |= PERF_RECORD_MISC_USER; \ - else \ - misc |= PERF_RECORD_MISC_KERNEL; \ - if (regs->flags & PERF_EFLAGS_EXACT) \ - misc |= PERF_RECORD_MISC_EXACT; \ - misc; }) - -#define perf_instruction_pointer(regs) ((regs)->ip) +struct pt_regs; +extern unsigned long perf_instruction_pointer(struct pt_regs *regs); +extern unsigned long perf_misc_flags(struct pt_regs *regs); +#define perf_misc_flags(regs) perf_misc_flags(regs) #else static inline void init_hw_perf_events(void) { } diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 626154a..2ea78ab 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -1720,6 +1720,11 @@ struct perf_callchain_entry *perf_callchain(struct pt_regs *regs) { struct perf_callchain_entry *entry; + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + /* TODO: We don't support guest os callchain now */ + return NULL; + } + if (in_nmi()) entry = &__get_cpu_var(pmc_nmi_entry); else @@ -1743,3 +1748,29 @@ void perf_arch_fetch_caller_regs(struct pt_regs *regs, unsigned long ip, int ski regs->cs = __KERNEL_CS; local_save_flags(regs->flags); } + +unsigned long perf_instruction_pointer(struct pt_regs *regs) +{ + unsigned long ip; + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) + ip = perf_guest_cbs->get_guest_ip(); + else + ip = instruction_pointer(regs); + return ip; +} + +unsigned long perf_misc_flags(struct pt_regs *regs) +{ + int misc = 0; + if (perf_guest_cbs && perf_guest_cbs->is_in_guest()) { + misc |= perf_guest_cbs->is_user_mode() ? + PERF_RECORD_MISC_GUEST_USER : + PERF_RECORD_MISC_GUEST_KERNEL; + } else + misc |= user_mode(regs) ? PERF_RECORD_MISC_USER : + PERF_RECORD_MISC_KERNEL; + if (regs->flags & PERF_EFLAGS_EXACT) + misc |= PERF_RECORD_MISC_EXACT; + + return misc; +} -- cgit v1.1 From ff9d07a0e7ce756a183e7c2e483aec452ee6b574 Mon Sep 17 00:00:00 2001 From: "Zhang, Yanmin" Date: Mon, 19 Apr 2010 13:32:45 +0800 Subject: KVM: Implement perf callbacks for guest sampling Below patch implements the perf_guest_info_callbacks on kvm. Signed-off-by: Zhang Yanmin Signed-off-by: Avi Kivity --- arch/x86/kvm/vmx.c | 5 ++++- arch/x86/kvm/x86.c | 46 ++++++++++++++++++++++++++++++++++++++++++++++ arch/x86/kvm/x86.h | 3 +++ 3 files changed, 53 insertions(+), 1 deletion(-) (limited to 'arch/x86') diff --git a/arch/x86/kvm/vmx.c b/arch/x86/kvm/vmx.c index 686492ed..82be6da 100644 --- a/arch/x86/kvm/vmx.c +++ b/arch/x86/kvm/vmx.c @@ -3654,8 +3654,11 @@ static void vmx_complete_interrupts(struct vcpu_vmx *vmx) /* We need to handle NMIs before interrupts are enabled */ if ((exit_intr_info & INTR_INFO_INTR_TYPE_MASK) == INTR_TYPE_NMI_INTR && - (exit_intr_info & INTR_INFO_VALID_MASK)) + (exit_intr_info & INTR_INFO_VALID_MASK)) { + kvm_before_handle_nmi(&vmx->vcpu); asm("int $2"); + kvm_after_handle_nmi(&vmx->vcpu); + } idtv_info_valid = idt_vectoring_info & VECTORING_INFO_VALID_MASK; diff --git a/arch/x86/kvm/x86.c b/arch/x86/kvm/x86.c index 24cd0ee..c3a33b2 100644 --- a/arch/x86/kvm/x86.c +++ b/arch/x86/kvm/x86.c @@ -40,6 +40,7 @@ #include #include #include +#include #include #undef TRACE_INCLUDE_FILE #define CREATE_TRACE_POINTS @@ -3765,6 +3766,47 @@ static void kvm_timer_init(void) } } +static DEFINE_PER_CPU(struct kvm_vcpu *, current_vcpu); + +static int kvm_is_in_guest(void) +{ + return percpu_read(current_vcpu) != NULL; +} + +static int kvm_is_user_mode(void) +{ + int user_mode = 3; + if (percpu_read(current_vcpu)) + user_mode = kvm_x86_ops->get_cpl(percpu_read(current_vcpu)); + return user_mode != 0; +} + +static unsigned long kvm_get_guest_ip(void) +{ + unsigned long ip = 0; + if (percpu_read(current_vcpu)) + ip = kvm_rip_read(percpu_read(current_vcpu)); + return ip; +} + +static struct perf_guest_info_callbacks kvm_guest_cbs = { + .is_in_guest = kvm_is_in_guest, + .is_user_mode = kvm_is_user_mode, + .get_guest_ip = kvm_get_guest_ip, +}; + +void kvm_before_handle_nmi(struct kvm_vcpu *vcpu) +{ + percpu_write(current_vcpu, vcpu); +} +EXPORT_SYMBOL_GPL(kvm_before_handle_nmi); + +void kvm_after_handle_nmi(struct kvm_vcpu *vcpu) +{ + percpu_write(current_vcpu, NULL); +} +EXPORT_SYMBOL_GPL(kvm_after_handle_nmi); + int kvm_arch_init(void *opaque) { int r; @@ -3801,6 +3843,8 @@ int kvm_arch_init(void *opaque) kvm_timer_init(); + perf_register_guest_info_callbacks(&kvm_guest_cbs); + return 0; out: @@ -3809,6 +3853,8 @@ out: void kvm_arch_exit(void) { + perf_unregister_guest_info_callbacks(&kvm_guest_cbs); + if (!boot_cpu_has(X86_FEATURE_CONSTANT_TSC)) cpufreq_unregister_notifier(&kvmclock_cpufreq_notifier_block, CPUFREQ_TRANSITION_NOTIFIER); diff --git a/arch/x86/kvm/x86.h b/arch/x86/kvm/x86.h index 2d101639..b7a4047 100644 --- a/arch/x86/kvm/x86.h +++ b/arch/x86/kvm/x86.h @@ -65,4 +65,7 @@ static inline int is_paging(struct kvm_vcpu *vcpu) return kvm_read_cr0_bits(vcpu, X86_CR0_PG); } +void kvm_before_handle_nmi(struct kvm_vcpu *vcpu); +void kvm_after_handle_nmi(struct kvm_vcpu *vcpu); + #endif -- cgit v1.1