summaryrefslogtreecommitdiffstats
path: root/sys/dev
diff options
context:
space:
mode:
authorjtl <jtl@FreeBSD.org>2015-11-14 01:45:55 +0000
committerjtl <jtl@FreeBSD.org>2015-11-14 01:45:55 +0000
commit73114f99e8a93e8b08a4f6b70bf873e4bf690cb7 (patch)
tree932a83cc25dfb5f711825a12282d96b319e5a5ef /sys/dev
parent8805597c1bb76cccf4d083d7338fad0c0b643be9 (diff)
downloadFreeBSD-src-73114f99e8a93e8b08a4f6b70bf873e4bf690cb7.zip
FreeBSD-src-73114f99e8a93e8b08a4f6b70bf873e4bf690cb7.tar.gz
Optimizations to the way hwpmc gathers user callchains
Changes to the code to gather user stacks: * Delay setting pmc_cpumask until we actually have the stack. * When recording user stack traces, only walk the portion of the ring that should have samples for us. Sponsored by: Juniper Networks Approved by: gnn (mentor) MFC after: 1 month
Diffstat (limited to 'sys/dev')
-rw-r--r--sys/dev/hwpmc/hwpmc_mod.c29
1 files changed, 20 insertions, 9 deletions
diff --git a/sys/dev/hwpmc/hwpmc_mod.c b/sys/dev/hwpmc/hwpmc_mod.c
index 16f5fb3..024a54f 100644
--- a/sys/dev/hwpmc/hwpmc_mod.c
+++ b/sys/dev/hwpmc/hwpmc_mod.c
@@ -4096,6 +4096,7 @@ pmc_process_interrupt(int cpu, int ring, struct pmc *pm, struct trapframe *tf,
cpu, pm, (void *) tf, inuserspace,
(int) (psb->ps_write - psb->ps_samples),
(int) (psb->ps_read - psb->ps_samples));
+ callchaindepth = 1;
error = ENOMEM;
goto done;
}
@@ -4153,7 +4154,8 @@ pmc_process_interrupt(int cpu, int ring, struct pmc *pm, struct trapframe *tf,
done:
/* mark CPU as needing processing */
- CPU_SET_ATOMIC(cpu, &pmc_cpumask);
+ if (callchaindepth != PMC_SAMPLE_INUSE)
+ CPU_SET_ATOMIC(cpu, &pmc_cpumask);
return (error);
}
@@ -4167,10 +4169,9 @@ pmc_process_interrupt(int cpu, int ring, struct pmc *pm, struct trapframe *tf,
static void
pmc_capture_user_callchain(int cpu, int ring, struct trapframe *tf)
{
- int i;
struct pmc *pm;
struct thread *td;
- struct pmc_sample *ps;
+ struct pmc_sample *ps, *ps_end;
struct pmc_samplebuffer *psb;
#ifdef INVARIANTS
int ncallchains;
@@ -4189,15 +4190,17 @@ pmc_capture_user_callchain(int cpu, int ring, struct trapframe *tf)
/*
* Iterate through all deferred callchain requests.
+ * Walk from the current read pointer to the current
+ * write pointer.
*/
- ps = psb->ps_samples;
- for (i = 0; i < pmc_nsamples; i++, ps++) {
-
+ ps = psb->ps_read;
+ ps_end = psb->ps_write;
+ do {
if (ps->ps_nsamples != PMC_SAMPLE_INUSE)
- continue;
+ goto next;
if (ps->ps_td != td)
- continue;
+ goto next;
KASSERT(ps->ps_cpu == cpu,
("[pmc,%d] cpu mismatch ps_cpu=%d pcpu=%d", __LINE__,
@@ -4222,7 +4225,12 @@ pmc_capture_user_callchain(int cpu, int ring, struct trapframe *tf)
#ifdef INVARIANTS
ncallchains++;
#endif
- }
+
+next:
+ /* increment the pointer, modulo sample ring size */
+ if (++ps == psb->ps_fence)
+ ps = psb->ps_samples;
+ } while (ps != ps_end);
KASSERT(ncallchains > 0,
("[pmc,%d] cpu %d didn't find a sample to collect", __LINE__,
@@ -4232,6 +4240,9 @@ pmc_capture_user_callchain(int cpu, int ring, struct trapframe *tf)
("[pmc,%d] invalid td_pinned value", __LINE__));
sched_unpin(); /* Can migrate safely now. */
+ /* mark CPU as needing processing */
+ CPU_SET_ATOMIC(cpu, &pmc_cpumask);
+
return;
}
OpenPOWER on IntegriCloud