summaryrefslogtreecommitdiffstats
path: root/sys/dev/hwpmc
diff options
context:
space:
mode:
authorjkoshy <jkoshy@FreeBSD.org>2007-12-07 08:20:17 +0000
committerjkoshy <jkoshy@FreeBSD.org>2007-12-07 08:20:17 +0000
commit72c27d71d82569aec187c30f6ff208631abc02f4 (patch)
treefa5327bc9f466a5a1b4e68e43a72d22b1b35f5e5 /sys/dev/hwpmc
parent12b5f9c8c99a01b1d40e88aaa1a58ce757e68d5e (diff)
downloadFreeBSD-src-72c27d71d82569aec187c30f6ff208631abc02f4.zip
FreeBSD-src-72c27d71d82569aec187c30f6ff208631abc02f4.tar.gz
Kernel and hwpmc(4) support for callchain capture.
Sponsored by: FreeBSD Foundation and Google Inc.
Diffstat (limited to 'sys/dev/hwpmc')
-rw-r--r--sys/dev/hwpmc/hwpmc_amd.c28
-rw-r--r--sys/dev/hwpmc/hwpmc_logging.c53
-rw-r--r--sys/dev/hwpmc/hwpmc_mod.c347
-rw-r--r--sys/dev/hwpmc/hwpmc_piv.c30
-rw-r--r--sys/dev/hwpmc/hwpmc_ppro.c10
-rw-r--r--sys/dev/hwpmc/hwpmc_x86.c191
6 files changed, 547 insertions, 112 deletions
diff --git a/sys/dev/hwpmc/hwpmc_amd.c b/sys/dev/hwpmc/hwpmc_amd.c
index b7cdee8..3576234 100644
--- a/sys/dev/hwpmc/hwpmc_amd.c
+++ b/sys/dev/hwpmc/hwpmc_amd.c
@@ -1,7 +1,11 @@
/*-
- * Copyright (c) 2003-2005 Joseph Koshy
+ * Copyright (c) 2003-2007 Joseph Koshy
+ * Copyright (c) 2007 The FreeBSD Foundation
* All rights reserved.
*
+ * Portions of this software were developed by A. Joseph Koshy under
+ * sponsorship from the FreeBSD Foundation and Google, Inc.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -38,9 +42,9 @@ __FBSDID("$FreeBSD$");
#include <sys/smp.h>
#include <sys/systm.h>
+#include <machine/cpu.h>
#include <machine/cpufunc.h>
#include <machine/md_var.h>
-#include <machine/pmc_mdep.h>
#include <machine/specialreg.h>
#ifdef DEBUG
@@ -667,7 +671,7 @@ amd_stop_pmc(int cpu, int ri)
*/
static int
-amd_intr(int cpu, uintptr_t eip, int usermode)
+amd_intr(int cpu, struct trapframe *tf)
{
int i, error, retval, ri;
uint32_t config, evsel, perfctr;
@@ -679,8 +683,8 @@ amd_intr(int cpu, uintptr_t eip, int usermode)
KASSERT(cpu >= 0 && cpu < mp_ncpus,
("[amd,%d] out of range CPU %d", __LINE__, cpu));
- PMCDBG(MDP,INT,1, "cpu=%d eip=%p um=%d", cpu, (void *) eip,
- usermode);
+ PMCDBG(MDP,INT,1, "cpu=%d tf=0x%p um=%d", cpu, (void *) tf,
+ TRAPF_USERMODE(tf));
retval = 0;
@@ -696,8 +700,8 @@ amd_intr(int cpu, uintptr_t eip, int usermode)
*
* If multiple PMCs interrupt at the same time, the AMD64
* processor appears to deliver as many NMIs as there are
- * outstanding PMC interrupts. Thus we need to only process
- * one interrupt at a time.
+ * outstanding PMC interrupts. So we process only one NMI
+ * interrupt at a time.
*/
for (i = 0; retval == 0 && i < AMD_NPMCS-1; i++) {
@@ -717,9 +721,9 @@ amd_intr(int cpu, uintptr_t eip, int usermode)
continue;
}
- retval = 1; /* found an interrupting PMC */
+ retval = 1; /* Found an interrupting PMC. */
- /* stop the PMC, reload count */
+ /* Stop the PMC, reload count. */
evsel = AMD_PMC_EVSEL_0 + i;
perfctr = AMD_PMC_PERFCTR_0 + i;
v = pm->pm_sc.pm_reloadcount;
@@ -733,8 +737,8 @@ amd_intr(int cpu, uintptr_t eip, int usermode)
wrmsr(evsel, config & ~AMD_PMC_ENABLE);
wrmsr(perfctr, AMD_RELOAD_COUNT_TO_PERFCTR_VALUE(v));
- /* restart the counter if there was no error during logging */
- error = pmc_process_interrupt(cpu, pm, eip, usermode);
+ /* Restart the counter if logging succeeded. */
+ error = pmc_process_interrupt(cpu, pm, tf, TRAPF_USERMODE(tf));
if (error == 0)
wrmsr(evsel, config | AMD_PMC_ENABLE);
}
@@ -742,7 +746,7 @@ amd_intr(int cpu, uintptr_t eip, int usermode)
atomic_add_int(retval ? &pmc_stats.pm_intr_processed :
&pmc_stats.pm_intr_ignored, 1);
- return retval;
+ return (retval);
}
/*
diff --git a/sys/dev/hwpmc/hwpmc_logging.c b/sys/dev/hwpmc/hwpmc_logging.c
index 7be4776..77417af 100644
--- a/sys/dev/hwpmc/hwpmc_logging.c
+++ b/sys/dev/hwpmc/hwpmc_logging.c
@@ -1,7 +1,11 @@
/*-
- * Copyright (c) 2005 Joseph Koshy
+ * Copyright (c) 2005-2007 Joseph Koshy
+ * Copyright (c) 2007 The FreeBSD Foundation
* All rights reserved.
*
+ * Portions of this software were developed by A. Joseph Koshy under
+ * sponsorship from the FreeBSD Foundation and Google, Inc.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -64,7 +68,6 @@ TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "logbuffersize", &pmclog_buffer_size);
SYSCTL_INT(_kern_hwpmc, OID_AUTO, logbuffersize, CTLFLAG_TUN|CTLFLAG_RD,
&pmclog_buffer_size, 0, "size of log buffers in kilobytes");
-
/*
* kern.hwpmc.nbuffer -- number of global log buffers
*/
@@ -96,7 +99,6 @@ static struct mtx pmc_kthread_mtx; /* sleep lock */
/*
* Log file record constructors.
*/
-
#define _PMCLOG_TO_HEADER(T,L) \
((PMCLOG_HEADER_MAGIC << 24) | \
(PMCLOG_TYPE_ ## T << 16) | \
@@ -135,6 +137,8 @@ static struct mtx pmc_kthread_mtx; /* sleep lock */
* Assertions about the log file format.
*/
+CTASSERT(sizeof(struct pmclog_callchain) == 6*4 +
+ PMC_CALLCHAIN_DEPTH_MAX*sizeof(uintfptr_t));
CTASSERT(sizeof(struct pmclog_closelog) == 3*4);
CTASSERT(sizeof(struct pmclog_dropnotify) == 3*4);
CTASSERT(sizeof(struct pmclog_map_in) == PATH_MAX +
@@ -710,9 +714,28 @@ pmclog_flush(struct pmc_owner *po)
}
-/*
- * Send a 'close log' event to the log file.
- */
+void
+pmclog_process_callchain(struct pmc *pm, struct pmc_sample *ps)
+{
+ int n, recordlen;
+ uint32_t flags;
+ struct pmc_owner *po;
+
+ PMCDBG(LOG,SAM,1,"pm=%p pid=%d n=%d", pm, ps->ps_pid,
+ ps->ps_nsamples);
+
+ recordlen = offsetof(struct pmclog_callchain, pl_pc) +
+ ps->ps_nsamples * sizeof(uintfptr_t);
+ po = pm->pm_owner;
+ flags = PMC_CALLCHAIN_TO_CPUFLAGS(ps->ps_cpu,ps->ps_flags);
+ PMCLOG_RESERVE(po, CALLCHAIN, recordlen);
+ PMCLOG_EMIT32(ps->ps_pid);
+ PMCLOG_EMIT32(pm->pm_id);
+ PMCLOG_EMIT32(flags);
+ for (n = 0; n < ps->ps_nsamples; n++)
+ PMCLOG_EMITADDR(ps->ps_pc[n]);
+ PMCLOG_DESPATCH(po);
+}
void
pmclog_process_closelog(struct pmc_owner *po)
@@ -761,24 +784,6 @@ pmclog_process_map_out(struct pmc_owner *po, pid_t pid, uintfptr_t start,
}
void
-pmclog_process_pcsample(struct pmc *pm, struct pmc_sample *ps)
-{
- struct pmc_owner *po;
-
- PMCDBG(LOG,SAM,1,"pm=%p pid=%d pc=%p", pm, ps->ps_pid,
- (void *) ps->ps_pc);
-
- po = pm->pm_owner;
-
- PMCLOG_RESERVE(po, PCSAMPLE, sizeof(struct pmclog_pcsample));
- PMCLOG_EMIT32(ps->ps_pid);
- PMCLOG_EMITADDR(ps->ps_pc);
- PMCLOG_EMIT32(pm->pm_id);
- PMCLOG_EMIT32(ps->ps_usermode);
- PMCLOG_DESPATCH(po);
-}
-
-void
pmclog_process_pmcallocate(struct pmc *pm)
{
struct pmc_owner *po;
diff --git a/sys/dev/hwpmc/hwpmc_mod.c b/sys/dev/hwpmc/hwpmc_mod.c
index 6c0e0ea..0bba092 100644
--- a/sys/dev/hwpmc/hwpmc_mod.c
+++ b/sys/dev/hwpmc/hwpmc_mod.c
@@ -1,7 +1,11 @@
/*-
- * Copyright (c) 2003-2006 Joseph Koshy
+ * Copyright (c) 2003-2007 Joseph Koshy
+ * Copyright (c) 2007 The FreeBSD Foundation
* All rights reserved.
*
+ * Portions of this software were developed by A. Joseph Koshy under
+ * sponsorship from the FreeBSD Foundation and Google, Inc.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -166,6 +170,7 @@ static int pmc_attach_one_process(struct proc *p, struct pmc *pm);
static int pmc_can_allocate_rowindex(struct proc *p, unsigned int ri,
int cpu);
static int pmc_can_attach(struct pmc *pm, struct proc *p);
+static void pmc_capture_user_callchain(int cpu, struct trapframe *tf);
static void pmc_cleanup(void);
static int pmc_detach_process(struct proc *p, struct pmc *pm);
static int pmc_detach_one_process(struct proc *p, struct pmc *pm,
@@ -180,6 +185,9 @@ static struct pmc_process *pmc_find_process_descriptor(struct proc *p,
static void pmc_force_context_switch(void);
static void pmc_link_target_process(struct pmc *pm,
struct pmc_process *pp);
+static void pmc_log_all_process_mappings(struct pmc_owner *po);
+static void pmc_log_kernel_mappings(struct pmc *pm);
+static void pmc_log_process_mappings(struct pmc_owner *po, struct proc *p);
static void pmc_maybe_remove_owner(struct pmc_owner *po);
static void pmc_process_csw_in(struct thread *td);
static void pmc_process_csw_out(struct thread *td);
@@ -205,6 +213,11 @@ static void pmc_unlink_target_process(struct pmc *pmc,
SYSCTL_NODE(_kern, OID_AUTO, hwpmc, CTLFLAG_RW, 0, "HWPMC parameters");
+static int pmc_callchaindepth = PMC_CALLCHAIN_DEPTH;
+TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "callchaindepth", &pmc_callchaindepth);
+SYSCTL_INT(_kern_hwpmc, OID_AUTO, callchaindepth, CTLFLAG_TUN|CTLFLAG_RD,
+ &pmc_callchaindepth, 0, "depth of call chain records");
+
#ifdef DEBUG
struct pmc_debugflags pmc_debugflags = PMC_DEBUG_DEFAULT_FLAGS;
char pmc_debugstr[PMC_DEBUG_STRSIZE];
@@ -226,7 +239,7 @@ SYSCTL_INT(_kern_hwpmc, OID_AUTO, hashsize, CTLFLAG_TUN|CTLFLAG_RD,
&pmc_hashsize, 0, "rows in hash tables");
/*
- * kern.hwpmc.nsamples --- number of PC samples per CPU
+ * kern.hwpmc.nsamples --- number of PC samples/callchain stacks per CPU
*/
static int pmc_nsamples = PMC_NSAMPLES;
@@ -234,6 +247,7 @@ TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "nsamples", &pmc_nsamples);
SYSCTL_INT(_kern_hwpmc, OID_AUTO, nsamples, CTLFLAG_TUN|CTLFLAG_RD,
&pmc_nsamples, 0, "number of PC samples per CPU");
+
/*
* kern.hwpmc.mtxpoolsize -- number of mutexes in the mutex pool.
*/
@@ -957,6 +971,8 @@ pmc_attach_one_process(struct proc *p, struct pmc *pm)
pmclog_process_pmcattach(pm, p->p_pid, fullpath);
if (freepath)
FREE(freepath, M_TEMP);
+ if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
+ pmc_log_process_mappings(pm->pm_owner, p);
}
/* mark process as using HWPMCs */
PROC_LOCK(p);
@@ -1449,7 +1465,7 @@ pmc_process_kld_unload(struct pmckern_map_out *pkm)
if (po->po_flags & PMC_PO_OWNS_LOGFILE)
pmclog_process_map_out(po, (pid_t) -1,
pkm->pm_address, pkm->pm_address + pkm->pm_size);
-
+
/*
* TODO: Notify owners of process-sampling PMCs.
*/
@@ -1528,6 +1544,88 @@ pmc_process_munmap(struct thread *td, struct pmckern_map_out *pkm)
}
/*
+ * Log mapping information about the kernel.
+ */
+
+static void
+pmc_log_kernel_mappings(struct pmc *pm)
+{
+ struct pmc_owner *po;
+ struct pmckern_map_in *km, *kmbase;
+
+ sx_assert(&pmc_sx, SX_LOCKED);
+ KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)),
+ ("[pmc,%d] non-sampling PMC (%p) desires mapping information",
+ __LINE__, (void *) pm));
+
+ po = pm->pm_owner;
+
+ if (po->po_flags & PMC_PO_INITIAL_MAPPINGS_DONE)
+ return;
+
+ /*
+ * Log the current set of kernel modules.
+ */
+ kmbase = linker_hwpmc_list_objects();
+ for (km = kmbase; km->pm_file != NULL; km++) {
+ PMCDBG(LOG,REG,1,"%s %p", (char *) km->pm_file,
+ (void *) km->pm_address);
+ pmclog_process_map_in(po, (pid_t) -1, km->pm_address,
+ km->pm_file);
+ }
+ FREE(kmbase, M_LINKER);
+
+ po->po_flags |= PMC_PO_INITIAL_MAPPINGS_DONE;
+}
+
+/*
+ * Log the mappings for a single process.
+ */
+
+static void
+pmc_log_process_mappings(struct pmc_owner *po, struct proc *p)
+{
+}
+
+/*
+ * Log mappings for all processes in the system.
+ */
+
+static void
+pmc_log_all_process_mappings(struct pmc_owner *po)
+{
+ struct proc *p, *top;
+
+ sx_assert(&pmc_sx, SX_XLOCKED);
+
+ if ((p = pfind(1)) == NULL)
+ panic("[pmc,%d] Cannot find init", __LINE__);
+
+ PROC_UNLOCK(p);
+
+ sx_slock(&proctree_lock);
+
+ top = p;
+
+ for (;;) {
+ pmc_log_process_mappings(po, p);
+ if (!LIST_EMPTY(&p->p_children))
+ p = LIST_FIRST(&p->p_children);
+ else for (;;) {
+ if (p == top)
+ goto done;
+ if (LIST_NEXT(p, p_sibling)) {
+ p = LIST_NEXT(p, p_sibling);
+ break;
+ }
+ p = p->p_pptr;
+ }
+ }
+ done:
+ sx_sunlock(&proctree_lock);
+}
+
+/*
* The 'hook' invoked from the kernel proper
*/
@@ -1543,7 +1641,8 @@ const char *pmc_hooknames[] = {
"KLDLOAD",
"KLDUNLOAD",
"MMAP",
- "MUNMAP"
+ "MUNMAP",
+ "CALLCHAIN"
};
#endif
@@ -1726,6 +1825,14 @@ pmc_hook_handler(struct thread *td, int function, void *arg)
pmc_process_munmap(td, (struct pmckern_map_out *) arg);
break;
+ case PMC_FN_USER_CALLCHAIN:
+ /*
+ * Record a call chain.
+ */
+ pmc_capture_user_callchain(PCPU_GET(cpuid),
+ (struct trapframe *) arg);
+ break;
+
default:
#ifdef DEBUG
KASSERT(0, ("[pmc,%d] unknown hook %d\n", __LINE__, function));
@@ -2321,6 +2428,21 @@ pmc_start(struct pmc *pm)
po = pm->pm_owner;
+ /*
+ * Disallow PMCSTART if a logfile is required but has not been
+ * configured yet.
+ */
+ if ((pm->pm_flags & PMC_F_NEEDS_LOGFILE) &&
+ (po->po_flags & PMC_PO_OWNS_LOGFILE) == 0)
+ return EDOOFUS; /* programming error */
+
+ /*
+ * If this is a sampling mode PMC, log mapping information for
+ * the kernel modules that are currently loaded.
+ */
+ if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
+ pmc_log_kernel_mappings(pm);
+
if (PMC_IS_VIRTUAL_MODE(mode)) {
/*
@@ -2333,15 +2455,6 @@ pmc_start(struct pmc *pm)
pmc_attach_process(po->po_owner, pm);
/*
- * Disallow PMCSTART if a logfile is required but has not
- * been configured yet.
- */
-
- if (error == 0 && (pm->pm_flags & PMC_F_NEEDS_LOGFILE) &&
- (po->po_flags & PMC_PO_OWNS_LOGFILE) == 0)
- error = EDOOFUS;
-
- /*
* If the PMC is attached to its owner, then force a context
* switch to ensure that the MD state gets set correctly.
*/
@@ -2358,13 +2471,7 @@ pmc_start(struct pmc *pm)
/*
* A system-wide PMC.
- */
-
- if ((pm->pm_flags & PMC_F_NEEDS_LOGFILE) &&
- (po->po_flags & PMC_PO_OWNS_LOGFILE) == 0)
- return EDOOFUS; /* programming error */
-
- /*
+ *
* Add the owner to the global list if this is a system-wide
* sampling PMC.
*/
@@ -2378,7 +2485,8 @@ pmc_start(struct pmc *pm)
po->po_sscount++;
}
- /* TODO: dump system wide process mappings to the log? */
+ /* Log mapping information for all processes in the system. */
+ pmc_log_all_process_mappings(po);
/*
* Move to the CPU associated with this
@@ -2554,7 +2662,6 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
struct proc *p;
struct pmc *pm;
struct pmc_owner *po;
- struct pmckern_map_in *km, *kmbase;
struct pmc_op_configurelog cl;
sx_assert(&pmc_sx, SX_XLOCKED);
@@ -2593,18 +2700,6 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
if (error)
break;
-
- /*
- * Log the current set of kernel modules.
- */
- kmbase = linker_hwpmc_list_objects();
- for (km = kmbase; km->pm_file != NULL; km++) {
- PMCDBG(LOG,REG,1,"%s %p", (char *) km->pm_file,
- (void *) km->pm_address);
- pmclog_process_map_in(po, (pid_t) -1, km->pm_address,
- km->pm_file);
- }
- FREE(kmbase, M_LINKER);
}
break;
@@ -2945,7 +3040,7 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
*/
if ((pa.pm_flags & ~(PMC_F_DESCENDANTS | PMC_F_LOG_PROCCSW |
- PMC_F_LOG_PROCEXIT)) != 0) {
+ PMC_F_LOG_PROCEXIT | PMC_F_CALLCHAIN)) != 0) {
error = EINVAL;
break;
}
@@ -3633,56 +3728,117 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
/*
+ * Mark the thread as needing callchain capture and post an AST. The
+ * actual callchain capture will be done in a context where it is safe
+ * to take page faults.
+ */
+
+static void
+pmc_post_callchain_ast(void)
+{
+ struct thread *td;
+
+ td = curthread;
+
+ /*
+ * Mark this thread as needing processing in ast().
+ * td->td_pflags will be safe to touch as the process was in
+ * user space when it was interrupted.
+ */
+ td->td_pflags |= TDP_CALLCHAIN;
+
+ /*
+ * Again, since we've entered this function directly from
+ * userland, `td' is guaranteed to be not locked by this CPU,
+ * so its safe to try acquire the thread lock even though we
+ * are executing in an NMI context. We need to acquire this
+ * lock before touching `td_flags' because other CPUs may be
+ * in the process of touching this field.
+ */
+ thread_lock(td);
+ td->td_flags |= TDF_ASTPENDING;
+ thread_unlock(td);
+
+ return;
+}
+
+/*
* Interrupt processing.
*
- * Find a free slot in the per-cpu array of PC samples and write the
- * current (PMC,PID,PC) triple to it. If an event was successfully
- * added, a bit is set in mask 'pmc_cpumask' denoting that the
- * DO_SAMPLES hook needs to be invoked from the clock handler.
+ * Find a free slot in the per-cpu array of samples and capture the
+ * current callchain there. If a sample was successfully added, a bit
+ * is set in mask 'pmc_cpumask' denoting that the DO_SAMPLES hook
+ * needs to be invoked from the clock handler.
*
* This function is meant to be called from an NMI handler. It cannot
* use any of the locking primitives supplied by the OS.
*/
int
-pmc_process_interrupt(int cpu, struct pmc *pm, uintfptr_t pc, int usermode)
+pmc_process_interrupt(int cpu, struct pmc *pm, struct trapframe *tf,
+ int inuserspace)
{
- int error, ri;
+ int error, callchaindepth;
struct thread *td;
struct pmc_sample *ps;
struct pmc_samplebuffer *psb;
error = 0;
- ri = PMC_TO_ROWINDEX(pm);
+ /*
+ * Allocate space for a sample buffer.
+ */
psb = pmc_pcpu[cpu]->pc_sb;
ps = psb->ps_write;
- if (ps->ps_pc) { /* in use, reader hasn't caught up */
+ if (ps->ps_nsamples) { /* in use, reader hasn't caught up */
pm->pm_stalled = 1;
atomic_add_int(&pmc_stats.pm_intr_bufferfull, 1);
- PMCDBG(SAM,INT,1,"(spc) cpu=%d pm=%p pc=%jx um=%d wr=%d rd=%d",
- cpu, pm, (uint64_t) pc, usermode,
+ PMCDBG(SAM,INT,1,"(spc) cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d",
+ cpu, pm, (void *) tf, inuserspace,
(int) (psb->ps_write - psb->ps_samples),
(int) (psb->ps_read - psb->ps_samples));
error = ENOMEM;
goto done;
}
- /* fill in entry */
- PMCDBG(SAM,INT,1,"cpu=%d pm=%p pc=%jx um=%d wr=%d rd=%d", cpu, pm,
- (uint64_t) pc, usermode,
+
+ /* Fill in entry. */
+ PMCDBG(SAM,INT,1,"cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d", cpu, pm,
+ (void *) tf, inuserspace,
(int) (psb->ps_write - psb->ps_samples),
(int) (psb->ps_read - psb->ps_samples));
- atomic_add_rel_32(&pm->pm_runcount, 1); /* hold onto PMC */
+ atomic_add_rel_32(&pm->pm_runcount, 1); /* hold onto PMC */
ps->ps_pmc = pm;
if ((td = curthread) && td->td_proc)
ps->ps_pid = td->td_proc->p_pid;
else
ps->ps_pid = -1;
- ps->ps_usermode = usermode;
- ps->ps_pc = pc; /* mark entry as in use */
+ ps->ps_cpu = cpu;
+ ps->ps_flags = inuserspace ? PMC_CC_F_USERSPACE : 0;
+
+ callchaindepth = (pm->pm_flags & PMC_F_CALLCHAIN) ?
+ pmc_callchaindepth : 1;
+
+ if (callchaindepth == 1)
+ ps->ps_pc[0] = PMC_TRAPFRAME_TO_PC(tf);
+ else {
+ /*
+ * Kernel stack traversals can be done immediately,
+ * while we defer to an AST for user space traversals.
+ */
+ if (!inuserspace)
+ callchaindepth =
+ pmc_save_kernel_callchain(ps->ps_pc,
+ callchaindepth, tf);
+ else {
+ pmc_post_callchain_ast();
+ callchaindepth = PMC_SAMPLE_INUSE;
+ }
+ }
+
+ ps->ps_nsamples = callchaindepth; /* mark entry as in use */
/* increment write pointer, modulo ring buffer size */
ps++;
@@ -3695,7 +3851,50 @@ pmc_process_interrupt(int cpu, struct pmc *pm, uintfptr_t pc, int usermode)
/* mark CPU as needing processing */
atomic_set_rel_int(&pmc_cpumask, (1 << cpu));
- return error;
+ return (error);
+}
+
+/*
+ * Capture a user call chain. This function will be called from ast()
+ * before control returns to userland and before the process gets
+ * rescheduled.
+ */
+
+static void
+pmc_capture_user_callchain(int cpu, struct trapframe *tf)
+{
+ int i;
+ struct pmc *pm;
+ struct pmc_sample *ps;
+ struct pmc_samplebuffer *psb;
+
+ psb = pmc_pcpu[cpu]->pc_sb;
+
+ /*
+ * Iterate through all deferred callchain requests.
+ */
+
+ for (i = 0; i < pmc_nsamples; i++) {
+
+ ps = &psb->ps_samples[i];
+ if (ps->ps_nsamples != PMC_SAMPLE_INUSE)
+ continue;
+
+ pm = ps->ps_pmc;
+
+ KASSERT(pm->pm_flags & PMC_F_CALLCHAIN,
+ ("[pmc,%d] Retrieving callchain for PMC that doesn't "
+ "want it", __LINE__));
+
+ /*
+ * Retrieve the callchain and mark the sample buffer
+ * as 'processable' by the timer tick sweep code.
+ */
+ ps->ps_nsamples = pmc_save_user_callchain(ps->ps_pc,
+ pmc_callchaindepth, tf);
+ }
+
+ return;
}
@@ -3722,8 +3921,13 @@ pmc_process_samples(int cpu)
for (n = 0; n < pmc_nsamples; n++) { /* bound on #iterations */
ps = psb->ps_read;
- if (ps->ps_pc == (uintfptr_t) 0) /* no data */
+ if (ps->ps_nsamples == PMC_SAMPLE_FREE)
+ break;
+ if (ps->ps_nsamples == PMC_SAMPLE_INUSE) {
+ /* Need a rescan at a later time. */
+ atomic_set_rel_int(&pmc_cpumask, (1 << cpu));
break;
+ }
pm = ps->ps_pmc;
po = pm->pm_owner;
@@ -3736,8 +3940,8 @@ pmc_process_samples(int cpu)
if (pm->pm_state != PMC_STATE_RUNNING)
goto entrydone;
- PMCDBG(SAM,OPS,1,"cpu=%d pm=%p pc=%jx um=%d wr=%d rd=%d", cpu,
- pm, (uint64_t) ps->ps_pc, ps->ps_usermode,
+ PMCDBG(SAM,OPS,1,"cpu=%d pm=%p n=%d fl=%x wr=%d rd=%d", cpu,
+ pm, ps->ps_nsamples, ps->ps_flags,
(int) (psb->ps_write - psb->ps_samples),
(int) (psb->ps_read - psb->ps_samples));
@@ -3748,9 +3952,9 @@ pmc_process_samples(int cpu)
* would have done.
*/
if (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) {
- if (ps->ps_usermode) {
+ if (ps->ps_flags & PMC_CC_F_USERSPACE) {
td = FIRST_THREAD_IN_PROC(po->po_owner);
- addupc_intr(td, ps->ps_pc, 1);
+ addupc_intr(td, ps->ps_pc[0], 1);
}
goto entrydone;
}
@@ -3762,10 +3966,10 @@ pmc_process_samples(int cpu)
* entry to the PMC's owner process.
*/
- pmclog_process_pcsample(pm, ps);
+ pmclog_process_callchain(pm, ps);
entrydone:
- ps->ps_pc = (uintfptr_t) 0; /* mark entry as free */
+ ps->ps_nsamples = 0; /* mark entry as free */
atomic_subtract_rel_32(&pm->pm_runcount, 1);
/* increment read pointer, modulo sample size */
@@ -4087,6 +4291,7 @@ pmc_initialize(void)
{
int cpu, error, n;
struct pmc_binding pb;
+ struct pmc_sample *ps;
struct pmc_samplebuffer *sb;
md = NULL;
@@ -4119,17 +4324,24 @@ pmc_initialize(void)
*/
if (pmc_hashsize <= 0) {
- (void) printf("hwpmc: tunable hashsize=%d must be greater "
- "than zero.\n", pmc_hashsize);
+ (void) printf("hwpmc: tunable \"hashsize\"=%d must be "
+ "greater than zero.\n", pmc_hashsize);
pmc_hashsize = PMC_HASH_SIZE;
}
if (pmc_nsamples <= 0 || pmc_nsamples > 65535) {
- (void) printf("hwpmc: tunable nsamples=%d out of range.\n",
- pmc_nsamples);
+ (void) printf("hwpmc: tunable \"nsamples\"=%d out of "
+ "range.\n", pmc_nsamples);
pmc_nsamples = PMC_NSAMPLES;
}
+ if (pmc_callchaindepth <= 0 ||
+ pmc_callchaindepth > PMC_CALLCHAIN_DEPTH_MAX) {
+ (void) printf("hwpmc: tunable \"callchaindepth\"=%d out of "
+ "range.\n", pmc_callchaindepth);
+ pmc_callchaindepth = PMC_CALLCHAIN_DEPTH;
+ }
+
md = pmc_md_initialize();
if (md == NULL || md->pmd_init == NULL)
@@ -4171,6 +4383,14 @@ pmc_initialize(void)
KASSERT(pmc_pcpu[cpu] != NULL,
("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu));
+ MALLOC(sb->ps_callchains, uintptr_t *,
+ pmc_callchaindepth * pmc_nsamples * sizeof(uintptr_t),
+ M_PMC, M_WAITOK|M_ZERO);
+
+ for (n = 0, ps = sb->ps_samples; n < pmc_nsamples; n++, ps++)
+ ps->ps_pc = sb->ps_callchains +
+ (n * pmc_callchaindepth);
+
pmc_pcpu[cpu]->pc_sb = sb;
}
@@ -4327,6 +4547,7 @@ pmc_cleanup(void)
KASSERT(pmc_pcpu[cpu]->pc_sb != NULL,
("[pmc,%d] Null cpu sample buffer cpu=%d", __LINE__,
cpu));
+ FREE(pmc_pcpu[cpu]->pc_sb->ps_callchains, M_PMC);
FREE(pmc_pcpu[cpu]->pc_sb, M_PMC);
pmc_pcpu[cpu]->pc_sb = NULL;
}
diff --git a/sys/dev/hwpmc/hwpmc_piv.c b/sys/dev/hwpmc/hwpmc_piv.c
index a6cd3fe..7994330 100644
--- a/sys/dev/hwpmc/hwpmc_piv.c
+++ b/sys/dev/hwpmc/hwpmc_piv.c
@@ -1,7 +1,11 @@
/*-
- * Copyright (c) 2003-2005 Joseph Koshy
+ * Copyright (c) 2003-2007 Joseph Koshy
+ * Copyright (c) 2007 The FreeBSD Foundation
* All rights reserved.
*
+ * Portions of this software were developed by A. Joseph Koshy under
+ * sponsorship from the FreeBSD Foundation and Google, Inc.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -35,6 +39,7 @@ __FBSDID("$FreeBSD$");
#include <sys/smp.h>
#include <sys/systm.h>
+#include <machine/cpu.h>
#include <machine/cpufunc.h>
#include <machine/md_var.h>
#include <machine/specialreg.h>
@@ -1478,7 +1483,7 @@ p4_stop_pmc(int cpu, int ri)
*
* On HTT machines, this PMC may be in use by two threads
* running on two logical CPUS. Thus we look at the
- * 'pm_runcount' field and only turn off the appropriate TO/T1
+ * 'runcount' field and only turn off the appropriate TO/T1
* bits (and keep the PMC running) if two logical CPUs were
* using the PMC.
*
@@ -1562,16 +1567,17 @@ p4_stop_pmc(int cpu, int ri)
*/
static int
-p4_intr(int cpu, uintptr_t eip, int usermode)
+p4_intr(int cpu, struct trapframe *tf)
{
- int i, did_interrupt, error, ri;
uint32_t cccrval, ovf_mask, ovf_partner;
- struct p4_cpu *pc;
+ int i, did_interrupt, error, ri;
struct pmc_hw *phw;
+ struct p4_cpu *pc;
struct pmc *pm;
pmc_value_t v;
- PMCDBG(MDP,INT, 1, "cpu=%d eip=%p um=%d", cpu, (void *) eip, usermode);
+ PMCDBG(MDP,INT, 1, "cpu=%d tf=0x%p um=%d", cpu, (void *) tf,
+ TRAPF_USERMODE(tf));
pc = (struct p4_cpu *) pmc_pcpu[P4_TO_HTT_PRIMARY(cpu)];
@@ -1579,8 +1585,8 @@ p4_intr(int cpu, uintptr_t eip, int usermode)
P4_CCCR_OVF_PMI_T1 : P4_CCCR_OVF_PMI_T0;
ovf_mask |= P4_CCCR_OVF;
if (p4_system_has_htt)
- ovf_partner = P4_CPU_IS_HTT_SECONDARY(cpu) ? P4_CCCR_OVF_PMI_T0 :
- P4_CCCR_OVF_PMI_T1;
+ ovf_partner = P4_CPU_IS_HTT_SECONDARY(cpu) ?
+ P4_CCCR_OVF_PMI_T0 : P4_CCCR_OVF_PMI_T1;
else
ovf_partner = 0;
did_interrupt = 0;
@@ -1617,7 +1623,8 @@ p4_intr(int cpu, uintptr_t eip, int usermode)
!PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) {
continue;
}
- (void) pmc_process_interrupt(cpu, pm, eip, usermode);
+ (void) pmc_process_interrupt(cpu, pm, tf,
+ TRAPF_USERMODE(tf));
continue;
}
@@ -1667,7 +1674,8 @@ p4_intr(int cpu, uintptr_t eip, int usermode)
* Process the interrupt. Re-enable the PMC if
* processing was successful.
*/
- error = pmc_process_interrupt(cpu, pm, eip, usermode);
+ error = pmc_process_interrupt(cpu, pm, tf,
+ TRAPF_USERMODE(tf));
/*
* Only the first processor executing the NMI handler
@@ -1698,7 +1706,7 @@ p4_intr(int cpu, uintptr_t eip, int usermode)
atomic_add_int(did_interrupt ? &pmc_stats.pm_intr_processed :
&pmc_stats.pm_intr_ignored, 1);
- return did_interrupt;
+ return (did_interrupt);
}
/*
diff --git a/sys/dev/hwpmc/hwpmc_ppro.c b/sys/dev/hwpmc/hwpmc_ppro.c
index 993a2bf..979c04e 100644
--- a/sys/dev/hwpmc/hwpmc_ppro.c
+++ b/sys/dev/hwpmc/hwpmc_ppro.c
@@ -1,7 +1,11 @@
/*-
* Copyright (c) 2003-2005 Joseph Koshy
+ * Copyright (c) 2007 The FreeBSD Foundation
* All rights reserved.
*
+ * Portions of this software were developed by A. Joseph Koshy under
+ * sponsorship from the FreeBSD Foundation and Google, Inc.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -35,6 +39,7 @@ __FBSDID("$FreeBSD$");
#include <sys/smp.h>
#include <sys/systm.h>
+#include <machine/cpu.h>
#include <machine/cpufunc.h>
#include <machine/md_var.h>
#include <machine/pmc_mdep.h>
@@ -710,7 +715,7 @@ p6_stop_pmc(int cpu, int ri)
}
static int
-p6_intr(int cpu, uintptr_t eip, int usermode)
+p6_intr(int cpu, struct trapframe *tf)
{
int i, error, retval, ri;
uint32_t perf0cfg;
@@ -745,7 +750,8 @@ p6_intr(int cpu, uintptr_t eip, int usermode)
retval = 1;
- error = pmc_process_interrupt(cpu, pm, eip, usermode);
+ error = pmc_process_interrupt(cpu, pm, tf,
+ TRAPF_USERMODE(tf));
if (error)
P6_MARK_STOPPED(pc,ri);
diff --git a/sys/dev/hwpmc/hwpmc_x86.c b/sys/dev/hwpmc/hwpmc_x86.c
index cb6db23..2fc7cd9 100644
--- a/sys/dev/hwpmc/hwpmc_x86.c
+++ b/sys/dev/hwpmc/hwpmc_x86.c
@@ -1,7 +1,11 @@
/*-
* Copyright (c) 2005, Joseph Koshy
+ * Copyright (c) 2007 The FreeBSD Foundation
* All rights reserved.
*
+ * Portions of this software were developed by A. Joseph Koshy under
+ * sponsorship from the FreeBSD Foundation and Google, Inc.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -30,12 +34,18 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/bus.h>
#include <sys/pmc.h>
+#include <sys/proc.h>
#include <sys/systm.h>
+#include <machine/cpu.h>
#include <machine/apicreg.h>
#include <machine/pmc_mdep.h>
#include <machine/md_var.h>
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+
extern volatile lapic_t *lapic;
void
@@ -48,6 +58,187 @@ pmc_x86_lapic_enable_pmc_interrupt(void)
lapic->lvt_pcint = value;
}
+/*
+ * Attempt to walk a user call stack using a too-simple algorithm.
+ * In the general case we need unwind information associated with
+ * the executable to be able to walk the user stack.
+ *
+ * We are handed a trap frame laid down at the time the PMC interrupt
+ * was taken. If the application is using frame pointers, the saved
+ * PC value could be:
+ * a. at the beginning of a function before the stack frame is laid
+ * down,
+ * b. just before a 'ret', after the stack frame has been taken off,
+ * c. somewhere else in the function with a valid stack frame being
+ * present,
+ *
+ * If the application is not using frame pointers, this algorithm will
+ * fail to yield an interesting call chain.
+ *
+ * TODO: figure out a way to use unwind information.
+ */
+
+int
+pmc_save_user_callchain(uintptr_t *cc, int nframes, struct trapframe *tf)
+{
+ int n;
+ uint32_t instr;
+ uintptr_t fp, oldfp, pc, r, sp;
+
+ KASSERT(TRAPF_USERMODE(tf), ("[x86,%d] Not a user trap frame tf=%p",
+ __LINE__, (void *) tf));
+
+ pc = PMC_TRAPFRAME_TO_PC(tf);
+ oldfp = fp = PMC_TRAPFRAME_TO_FP(tf);
+ sp = PMC_TRAPFRAME_TO_SP(tf);
+
+ *cc++ = pc; n = 1;
+
+ r = fp + sizeof(uintptr_t); /* points to return address */
+
+ if (!PMC_IN_USERSPACE(pc))
+ return (n);
+
+ if (copyin((void *) pc, &instr, sizeof(instr)) != 0)
+ return (n);
+
+ if (PMC_AT_FUNCTION_PROLOGUE_PUSH_BP(instr) ||
+ PMC_AT_FUNCTION_EPILOGUE_RET(instr)) { /* ret */
+ if (copyin((void *) sp, &pc, sizeof(pc)) != 0)
+ return (n);
+ } else if (PMC_AT_FUNCTION_PROLOGUE_MOV_SP_BP(instr)) {
+ sp += sizeof(uintptr_t);
+ if (copyin((void *) sp, &pc, sizeof(pc)) != 0)
+ return (n);
+ } else if (copyin((void *) r, &pc, sizeof(pc)) != 0 ||
+ copyin((void *) fp, &fp, sizeof(fp) != 0))
+ return (n);
+
+ for (; n < nframes;) {
+ if (pc == 0 || !PMC_IN_USERSPACE(pc))
+ break;
+
+ *cc++ = pc; n++;
+
+ if (fp < oldfp)
+ break;
+
+ r = fp + sizeof(uintptr_t); /* address of return address */
+ oldfp = fp;
+
+ if (copyin((void *) r, &pc, sizeof(pc)) != 0 ||
+ copyin((void *) fp, &fp, sizeof(fp)) != 0)
+ break;
+ }
+
+ return (n);
+}
+
+/*
+ * Walking the kernel call stack.
+ *
+ * We are handed the trap frame laid down at the time the PMC
+ * interrupt was taken. The saved PC could be:
+ * a. in the lowlevel trap handler, meaning that there isn't a C stack
+ * to traverse,
+ * b. at the beginning of a function before the stack frame is laid
+ * down,
+ * c. just before a 'ret', after the stack frame has been taken off,
+ * d. somewhere else in a function with a valid stack frame being
+ * present.
+ *
+ * In case (d), the previous frame pointer is at [%ebp]/[%rbp] and
+ * the return address is at [%ebp+4]/[%rbp+8].
+ *
+ * For cases (b) and (c), the return address is at [%esp]/[%rsp] and
+ * the frame pointer doesn't need to be changed when going up one
+ * level in the stack.
+ *
+ * For case (a), we check if the PC lies in low-level trap handling
+ * code, and if so we terminate our trace.
+ */
+
+int
+pmc_save_kernel_callchain(uintptr_t *cc, int nframes, struct trapframe *tf)
+{
+ int n;
+ uint32_t instr;
+ uintptr_t fp, pc, r, sp, stackstart, stackend;
+ struct thread *td;
+
+ KASSERT(TRAPF_USERMODE(tf) == 0,("[x86,%d] not a kernel backtrace",
+ __LINE__));
+
+ pc = PMC_TRAPFRAME_TO_PC(tf);
+ fp = PMC_TRAPFRAME_TO_FP(tf);
+ sp = PMC_TRAPFRAME_TO_SP(tf);
+
+ *cc++ = pc;
+ r = fp + sizeof(uintptr_t); /* points to return address */
+
+ if ((td = curthread) == NULL)
+ return (1);
+
+ if (nframes <= 1)
+ return (1);
+
+ stackstart = (uintptr_t) td->td_kstack;
+ stackend = (uintptr_t) td->td_kstack + td->td_kstack_pages * PAGE_SIZE;
+
+ if (PMC_IN_TRAP_HANDLER(pc) ||
+ !PMC_IN_KERNEL(pc) || !PMC_IN_KERNEL(r) ||
+ !PMC_IN_KERNEL_STACK(sp, stackstart, stackend) ||
+ !PMC_IN_KERNEL_STACK(fp, stackstart, stackend))
+ return (1);
+
+ instr = *(uint32_t *) pc;
+
+ /*
+ * Determine whether the interrupted function was in the
+ * processing of either laying down its stack frame or taking
+ * it off.
+ *
+ * If we haven't started laying down a stack frame, or are
+ * just about to return, then our caller's address is at
+ * *sp, and we don't have a frame to unwind.
+ */
+ if (PMC_AT_FUNCTION_PROLOGUE_PUSH_BP(instr) ||
+ PMC_AT_FUNCTION_EPILOGUE_RET(instr))
+ pc = *(uintptr_t *) sp;
+ else if (PMC_AT_FUNCTION_PROLOGUE_MOV_SP_BP(instr)) {
+ /*
+ * The code was midway through laying down a frame.
+ * At this point sp[0] has a frame back pointer,
+ * and the caller's address is therefore at sp[1].
+ */
+ sp += sizeof(uintptr_t);
+ if (!PMC_IN_KERNEL_STACK(sp, stackstart, stackend))
+ return (1);
+ pc = *(uintptr_t *) sp;
+ } else {
+ /*
+ * Not in the function prologue or epilogue.
+ */
+ pc = *(uintptr_t *) r;
+ fp = *(uintptr_t *) fp;
+ }
+
+ for (n = 1; n < nframes; n++) {
+ *cc++ = pc;
+
+ if (PMC_IN_TRAP_HANDLER(pc))
+ break;
+
+ r = fp + sizeof(uintptr_t);
+ if (!PMC_IN_KERNEL_STACK(fp, stackstart, stackend) ||
+ !PMC_IN_KERNEL(r))
+ break;
+ pc = *(uintptr_t *) r;
+ fp = *(uintptr_t *) fp;
+ }
+
+ return (n);
+}
static struct pmc_mdep *
pmc_intel_initialize(void)
OpenPOWER on IntegriCloud