From b39f88acd7d989b6b247ba87c480fc24ed71d9c5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 19 Oct 2010 14:08:29 +0200 Subject: perf, x86: Extract PEBS/BTS buffer free routines So that we may grow additional call-sites.. Signed-off-by: Peter Zijlstra Acked-by: Stephane Eranian LKML-Reference: <20101019134808.196793164@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_ds.c | 30 +++++++++++++++++++++++++----- 1 file changed, 25 insertions(+), 5 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 4977f9c..1bc1351 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -74,6 +74,28 @@ static void fini_debug_store_on_cpu(int cpu) wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); } +static void release_pebs_buffer(int cpu) +{ + struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + + if (!ds || !x86_pmu.pebs) + return; + + kfree((void *)(unsigned long)ds->pebs_buffer_base); + ds->pebs_buffer_base = 0; +} + +static void release_bts_buffer(int cpu) +{ + struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + + if (!ds || !x86_pmu.bts) + return; + + kfree((void *)(unsigned long)ds->bts_buffer_base); + ds->bts_buffer_base = 0; +} + static void release_ds_buffers(void) { int cpu; @@ -82,7 +104,6 @@ static void release_ds_buffers(void) return; get_online_cpus(); - for_each_online_cpu(cpu) fini_debug_store_on_cpu(cpu); @@ -92,13 +113,12 @@ static void release_ds_buffers(void) if (!ds) continue; - per_cpu(cpu_hw_events, cpu).ds = NULL; + release_pebs_buffer(cpu); + release_bts_buffer(cpu); - kfree((void *)(unsigned long)ds->pebs_buffer_base); - kfree((void *)(unsigned long)ds->bts_buffer_base); + per_cpu(cpu_hw_events, cpu).ds = NULL; kfree(ds); } - put_online_cpus(); } -- cgit v1.1 From 5ee25c87318fa3722026fd77089fa7ba0db8d447 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 19 Oct 2010 14:15:04 +0200 Subject: perf, x86: Extract PEBS/BTS allocation functions Mostly a cleanup.. it reduces code indentation and makes the code flow of reserve_ds_buffers() clearer. Signed-off-by: Peter Zijlstra Acked-by: Stephane Eranian LKML-Reference: <20101019134808.253453452@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_ds.c | 90 +++++++++++++++++++------------ 1 file changed, 56 insertions(+), 34 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 1bc1351..14d98bd 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -74,6 +74,32 @@ static void fini_debug_store_on_cpu(int cpu) wrmsr_on_cpu(cpu, MSR_IA32_DS_AREA, 0, 0); } +static int alloc_pebs_buffer(int cpu) +{ + struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + int max, thresh = 1; /* always use a single PEBS record */ + void *buffer; + + if (!x86_pmu.pebs) + return 0; + + buffer = kzalloc(PEBS_BUFFER_SIZE, GFP_KERNEL); + if (unlikely(!buffer)) + return -ENOMEM; + + max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size; + + ds->pebs_buffer_base = (u64)(unsigned long)buffer; + ds->pebs_index = ds->pebs_buffer_base; + ds->pebs_absolute_maximum = ds->pebs_buffer_base + + max * x86_pmu.pebs_record_size; + + ds->pebs_interrupt_threshold = ds->pebs_buffer_base + + thresh * x86_pmu.pebs_record_size; + + return 0; +} + static void release_pebs_buffer(int cpu) { struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; @@ -85,6 +111,32 @@ static void release_pebs_buffer(int cpu) ds->pebs_buffer_base = 0; } +static int alloc_bts_buffer(int cpu) +{ + struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + int max, thresh; + void *buffer; + + if (!x86_pmu.bts) + return 0; + + buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); + if (unlikely(!buffer)) + return -ENOMEM; + + max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; + thresh = max / 16; + + ds->bts_buffer_base = (u64)(unsigned long)buffer; + ds->bts_index = ds->bts_buffer_base; + ds->bts_absolute_maximum = ds->bts_buffer_base + + max * BTS_RECORD_SIZE; + ds->bts_interrupt_threshold = ds->bts_absolute_maximum - + thresh * BTS_RECORD_SIZE; + + return 0; +} + static void release_bts_buffer(int cpu) { struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; @@ -133,8 +185,6 @@ static int reserve_ds_buffers(void) for_each_possible_cpu(cpu) { struct debug_store *ds; - void *buffer; - int max, thresh; err = -ENOMEM; ds = kzalloc(sizeof(*ds), GFP_KERNEL); @@ -142,39 +192,11 @@ static int reserve_ds_buffers(void) break; per_cpu(cpu_hw_events, cpu).ds = ds; - if (x86_pmu.bts) { - buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); - if (unlikely(!buffer)) - break; - - max = BTS_BUFFER_SIZE / BTS_RECORD_SIZE; - thresh = max / 16; - - ds->bts_buffer_base = (u64)(unsigned long)buffer; - ds->bts_index = ds->bts_buffer_base; - ds->bts_absolute_maximum = ds->bts_buffer_base + - max * BTS_RECORD_SIZE; - ds->bts_interrupt_threshold = ds->bts_absolute_maximum - - thresh * BTS_RECORD_SIZE; - } + if (alloc_bts_buffer(cpu)) + break; - if (x86_pmu.pebs) { - buffer = kzalloc(PEBS_BUFFER_SIZE, GFP_KERNEL); - if (unlikely(!buffer)) - break; - - max = PEBS_BUFFER_SIZE / x86_pmu.pebs_record_size; - - ds->pebs_buffer_base = (u64)(unsigned long)buffer; - ds->pebs_index = ds->pebs_buffer_base; - ds->pebs_absolute_maximum = ds->pebs_buffer_base + - max * x86_pmu.pebs_record_size; - /* - * Always use single record PEBS - */ - ds->pebs_interrupt_threshold = ds->pebs_buffer_base + - x86_pmu.pebs_record_size; - } + if (alloc_pebs_buffer(cpu)) + break; err = 0; } -- cgit v1.1 From 65af94baca56beb3514d6cfce782634db9cf676d Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 19 Oct 2010 14:37:23 +0200 Subject: perf, x86: Extract DS alloc/free functions Again, mostly a cleanup to unclutter the reserve_ds_buffer() code. Signed-off-by: Peter Zijlstra Acked-by: Stephane Eranian LKML-Reference: <20101019134808.304495776@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_ds.c | 40 ++++++++++++++++++++----------- 1 file changed, 26 insertions(+), 14 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 14d98bd..3c86f4d 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -148,6 +148,30 @@ static void release_bts_buffer(int cpu) ds->bts_buffer_base = 0; } +static int alloc_ds_buffer(int cpu) +{ + struct debug_store *ds; + + ds = kzalloc(sizeof(*ds), GFP_KERNEL); + if (unlikely(!ds)) + return -ENOMEM; + + per_cpu(cpu_hw_events, cpu).ds = ds; + + return 0; +} + +static void release_ds_buffer(int cpu) +{ + struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + + if (!ds) + return; + + per_cpu(cpu_hw_events, cpu).ds = NULL; + kfree(ds); +} + static void release_ds_buffers(void) { int cpu; @@ -160,16 +184,9 @@ static void release_ds_buffers(void) fini_debug_store_on_cpu(cpu); for_each_possible_cpu(cpu) { - struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; - - if (!ds) - continue; - release_pebs_buffer(cpu); release_bts_buffer(cpu); - - per_cpu(cpu_hw_events, cpu).ds = NULL; - kfree(ds); + release_ds_buffer(cpu); } put_online_cpus(); } @@ -184,13 +201,8 @@ static int reserve_ds_buffers(void) get_online_cpus(); for_each_possible_cpu(cpu) { - struct debug_store *ds; - - err = -ENOMEM; - ds = kzalloc(sizeof(*ds), GFP_KERNEL); - if (unlikely(!ds)) + if (alloc_ds_buffer(cpu)) break; - per_cpu(cpu_hw_events, cpu).ds = ds; if (alloc_bts_buffer(cpu)) break; -- cgit v1.1 From 5553be2620ac901c21a25657bd5b59f73254e6d5 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 19 Oct 2010 14:38:11 +0200 Subject: perf, x86: Fixup the precise_ip computation In case we don't have PEBS, the LBR fixup doesn't make sense. Signed-off-by: Peter Zijlstra Acked-by: Stephane Eranian LKML-Reference: <20101019134808.354429461@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 9 +++++---- 1 file changed, 5 insertions(+), 4 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index fe73c18..f369c53 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -497,12 +497,13 @@ static int x86_pmu_hw_config(struct perf_event *event) int precise = 0; /* Support for constant skid */ - if (x86_pmu.pebs) + if (x86_pmu.pebs) { precise++; - /* Support for IP fixup */ - if (x86_pmu.lbr_nr) - precise++; + /* Support for IP fixup */ + if (x86_pmu.lbr_nr) + precise++; + } if (event->attr.precise_ip > precise) return -EOPNOTSUPP; -- cgit v1.1 From 6809b6ea73f7291f2e495d40397f1172c9caa77e Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 19 Oct 2010 14:22:50 +0200 Subject: perf, x86: Less disastrous PEBS/BTS buffer allocation failure Currently PEBS/BTS buffers are allocated when we instantiate the first event, when this fails everything fails. This is a problem because esp. BTS tries to allocate a rather large buffer (64K), which can easily fail. This patch changes the logic such that when either buffer allocation fails, we simply don't allow events that would use these facilities, but continue functioning for all other events. This logic comes from a much larger patch proposed by Stephane. Suggested-by: Stephane Eranian Signed-off-by: Peter Zijlstra Acked-by: Stephane Eranian LKML-Reference: <20101019134808.354429461@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 5 +-- arch/x86/kernel/cpu/perf_event_intel_ds.c | 58 +++++++++++++++++++++++-------- 2 files changed, 47 insertions(+), 16 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index f369c53..61e78f6 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -238,6 +238,7 @@ struct x86_pmu { * Intel DebugStore bits */ int bts, pebs; + int bts_active, pebs_active; int pebs_record_size; void (*drain_pebs)(struct pt_regs *regs); struct event_constraint *pebs_constraints; @@ -478,7 +479,7 @@ static int x86_setup_perfctr(struct perf_event *event) if ((attr->config == PERF_COUNT_HW_BRANCH_INSTRUCTIONS) && (hwc->sample_period == 1)) { /* BTS is not supported by this architecture. */ - if (!x86_pmu.bts) + if (!x86_pmu.bts_active) return -EOPNOTSUPP; /* BTS is currently only allowed for user-mode. */ @@ -497,7 +498,7 @@ static int x86_pmu_hw_config(struct perf_event *event) int precise = 0; /* Support for constant skid */ - if (x86_pmu.pebs) { + if (x86_pmu.pebs_active) { precise++; /* Support for IP fixup */ diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 3c86f4d..05c7db6 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -193,36 +193,66 @@ static void release_ds_buffers(void) static int reserve_ds_buffers(void) { - int cpu, err = 0; + int bts_err = 0, pebs_err = 0; + int cpu; + + x86_pmu.bts_active = 0; + x86_pmu.pebs_active = 0; if (!x86_pmu.bts && !x86_pmu.pebs) return 0; + if (!x86_pmu.bts) + bts_err = 1; + + if (!x86_pmu.pebs) + pebs_err = 1; + get_online_cpus(); for_each_possible_cpu(cpu) { - if (alloc_ds_buffer(cpu)) - break; + if (alloc_ds_buffer(cpu)) { + bts_err = 1; + pebs_err = 1; + } - if (alloc_bts_buffer(cpu)) - break; + if (!bts_err && alloc_bts_buffer(cpu)) + bts_err = 1; + + if (!pebs_err && alloc_pebs_buffer(cpu)) + pebs_err = 1; - if (alloc_pebs_buffer(cpu)) + if (bts_err && pebs_err) break; + } + + if (bts_err) { + for_each_possible_cpu(cpu) + release_bts_buffer(cpu); + } - err = 0; + if (pebs_err) { + for_each_possible_cpu(cpu) + release_pebs_buffer(cpu); } - if (err) - release_ds_buffers(); - else { + if (bts_err && pebs_err) { + for_each_possible_cpu(cpu) + release_ds_buffer(cpu); + } else { + if (x86_pmu.bts && !bts_err) + x86_pmu.bts_active = 1; + + if (x86_pmu.pebs && !pebs_err) + x86_pmu.pebs_active = 1; + for_each_online_cpu(cpu) init_debug_store_on_cpu(cpu); } put_online_cpus(); - return err; + return 0; } /* @@ -287,7 +317,7 @@ static int intel_pmu_drain_bts_buffer(void) if (!event) return 0; - if (!ds) + if (!x86_pmu.bts_active) return 0; at = (struct bts_record *)(unsigned long)ds->bts_buffer_base; @@ -557,7 +587,7 @@ static void intel_pmu_drain_pebs_core(struct pt_regs *iregs) struct pebs_record_core *at, *top; int n; - if (!ds || !x86_pmu.pebs) + if (!x86_pmu.pebs_active) return; at = (struct pebs_record_core *)(unsigned long)ds->pebs_buffer_base; @@ -599,7 +629,7 @@ static void intel_pmu_drain_pebs_nhm(struct pt_regs *iregs) u64 status = 0; int bit, n; - if (!ds || !x86_pmu.pebs) + if (!x86_pmu.pebs_active) return; at = (struct pebs_record_nhm *)(unsigned long)ds->pebs_buffer_base; -- cgit v1.1 From f80c9e304b8e8062230b0cda2c2fdd586149c771 Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 19 Oct 2010 14:50:02 +0200 Subject: perf, x86: Clean up reserve_ds_buffers() signature Now that reserve_ds_buffers() never fails, change it to return void and remove all code dealing with the error return. Signed-off-by: Peter Zijlstra Acked-by: Stephane Eranian LKML-Reference: <20101019134808.462621937@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event.c | 9 +++------ arch/x86/kernel/cpu/perf_event_intel_ds.c | 9 +++------ 2 files changed, 6 insertions(+), 12 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event.c b/arch/x86/kernel/cpu/perf_event.c index 61e78f6..a333bf9 100644 --- a/arch/x86/kernel/cpu/perf_event.c +++ b/arch/x86/kernel/cpu/perf_event.c @@ -382,7 +382,7 @@ static void release_pmc_hardware(void) {} #endif -static int reserve_ds_buffers(void); +static void reserve_ds_buffers(void); static void release_ds_buffers(void); static void hw_perf_event_destroy(struct perf_event *event) @@ -546,11 +546,8 @@ static int __x86_pmu_event_init(struct perf_event *event) if (atomic_read(&active_events) == 0) { if (!reserve_pmc_hardware()) err = -EBUSY; - else { - err = reserve_ds_buffers(); - if (err) - release_pmc_hardware(); - } + else + reserve_ds_buffers(); } if (!err) atomic_inc(&active_events); diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 05c7db6..8a7f81c 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -191,7 +191,7 @@ static void release_ds_buffers(void) put_online_cpus(); } -static int reserve_ds_buffers(void) +static void reserve_ds_buffers(void) { int bts_err = 0, pebs_err = 0; int cpu; @@ -200,7 +200,7 @@ static int reserve_ds_buffers(void) x86_pmu.pebs_active = 0; if (!x86_pmu.bts && !x86_pmu.pebs) - return 0; + return; if (!x86_pmu.bts) bts_err = 1; @@ -251,8 +251,6 @@ static int reserve_ds_buffers(void) } put_online_cpus(); - - return 0; } /* @@ -714,9 +712,8 @@ static void intel_ds_init(void) #else /* CONFIG_CPU_SUP_INTEL */ -static int reserve_ds_buffers(void) +static void reserve_ds_buffers(void) { - return 0; } static void release_ds_buffers(void) -- cgit v1.1 From 96681fc3c9e7d1f89ab64e5eec40b6467c97680f Mon Sep 17 00:00:00 2001 From: Peter Zijlstra Date: Tue, 19 Oct 2010 14:55:33 +0200 Subject: perf, x86: Use NUMA aware allocations for PEBS/BTS/DS allocations For performance reasons its best to use memory node local memory for per-cpu buffers. This logic comes from a much larger patch proposed by Stephane. Suggested-by: Stephane Eranian Signed-off-by: Peter Zijlstra Acked-by: Stephane Eranian LKML-Reference: <20101019134808.514465326@chello.nl> Signed-off-by: Ingo Molnar --- arch/x86/kernel/cpu/perf_event_intel_ds.c | 9 ++++++--- 1 file changed, 6 insertions(+), 3 deletions(-) (limited to 'arch/x86/kernel/cpu') diff --git a/arch/x86/kernel/cpu/perf_event_intel_ds.c b/arch/x86/kernel/cpu/perf_event_intel_ds.c index 8a7f81c..b7dcd9f 100644 --- a/arch/x86/kernel/cpu/perf_event_intel_ds.c +++ b/arch/x86/kernel/cpu/perf_event_intel_ds.c @@ -77,13 +77,14 @@ static void fini_debug_store_on_cpu(int cpu) static int alloc_pebs_buffer(int cpu) { struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + int node = cpu_to_node(cpu); int max, thresh = 1; /* always use a single PEBS record */ void *buffer; if (!x86_pmu.pebs) return 0; - buffer = kzalloc(PEBS_BUFFER_SIZE, GFP_KERNEL); + buffer = kmalloc_node(PEBS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node); if (unlikely(!buffer)) return -ENOMEM; @@ -114,13 +115,14 @@ static void release_pebs_buffer(int cpu) static int alloc_bts_buffer(int cpu) { struct debug_store *ds = per_cpu(cpu_hw_events, cpu).ds; + int node = cpu_to_node(cpu); int max, thresh; void *buffer; if (!x86_pmu.bts) return 0; - buffer = kzalloc(BTS_BUFFER_SIZE, GFP_KERNEL); + buffer = kmalloc_node(BTS_BUFFER_SIZE, GFP_KERNEL | __GFP_ZERO, node); if (unlikely(!buffer)) return -ENOMEM; @@ -150,9 +152,10 @@ static void release_bts_buffer(int cpu) static int alloc_ds_buffer(int cpu) { + int node = cpu_to_node(cpu); struct debug_store *ds; - ds = kzalloc(sizeof(*ds), GFP_KERNEL); + ds = kmalloc_node(sizeof(*ds), GFP_KERNEL | __GFP_ZERO, node); if (unlikely(!ds)) return -ENOMEM; -- cgit v1.1