summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--sys/amd64/amd64/exception.S69
-rw-r--r--sys/amd64/amd64/trap.c3
-rw-r--r--sys/amd64/include/pmc_mdep.h30
-rw-r--r--sys/conf/Makefile.amd643
-rw-r--r--sys/dev/hwpmc/hwpmc_amd.c28
-rw-r--r--sys/dev/hwpmc/hwpmc_logging.c53
-rw-r--r--sys/dev/hwpmc/hwpmc_mod.c347
-rw-r--r--sys/dev/hwpmc/hwpmc_piv.c30
-rw-r--r--sys/dev/hwpmc/hwpmc_ppro.c10
-rw-r--r--sys/dev/hwpmc/hwpmc_x86.c191
-rw-r--r--sys/i386/i386/exception.s52
-rw-r--r--sys/i386/i386/trap.c3
-rw-r--r--sys/i386/include/pmc_mdep.h27
-rw-r--r--sys/kern/kern_pmc.c18
-rw-r--r--sys/kern/subr_trap.c13
-rw-r--r--sys/sys/pmc.h30
-rw-r--r--sys/sys/pmckern.h9
-rw-r--r--sys/sys/pmclog.h2
18 files changed, 783 insertions, 135 deletions
diff --git a/sys/amd64/amd64/exception.S b/sys/amd64/amd64/exception.S
index da4918a..e6249c5 100644
--- a/sys/amd64/amd64/exception.S
+++ b/sys/amd64/amd64/exception.S
@@ -1,8 +1,12 @@
/*-
* Copyright (c) 1989, 1990 William F. Jolitz.
* Copyright (c) 1990 The Regents of the University of California.
+ * Copyright (c) 2007 The FreeBSD Foundation
* All rights reserved.
*
+ * Portions of this software were developed by A. Joseph Koshy under
+ * sponsorship from the FreeBSD Foundation and Google, Inc.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -32,6 +36,7 @@
#include "opt_atpic.h"
#include "opt_compat.h"
+#include "opt_hwpmc_hooks.h"
#include <machine/asmacros.h>
#include <machine/psl.h>
@@ -40,6 +45,9 @@
#include "assym.s"
.text
+#ifdef HWPMC_HOOKS
+ ENTRY(start_exceptions)
+#endif
/*****************************************************************************/
/* Trap handling */
@@ -348,6 +356,9 @@ IDTVEC(fast_syscall32)
* execute the NMI handler with interrupts disabled to prevent a
* nested interrupt from executing an 'iretq' instruction and
* inadvertently taking the processor out of NMI mode.
+ *
+ * Third, the NMI handler runs on its own stack (tss_ist1), shared
+ * with the double fault handler.
*/
IDTVEC(nmi)
@@ -386,6 +397,61 @@ nmi_calltrap:
movq %rsp, %rdi
call trap
MEXITCOUNT
+#ifdef HWPMC_HOOKS
+ /*
+ * Check if the current trap was from user mode and if so
+ * whether the current thread needs a user call chain to be
+ * captured. We are still in NMI mode at this point.
+ */
+ testb $SEL_RPL_MASK,TF_CS(%rsp)
+ jz nocallchain
+ movq PCPU(CURTHREAD),%rax /* curthread present? */
+ orq %rax,%rax
+ jz nocallchain
+ testl $TDP_CALLCHAIN,TD_PFLAGS(%rax) /* flagged for capture? */
+ jz nocallchain
+ /*
+ * A user callchain is to be captured, so:
+ * - Move execution to the regular kernel stack, to allow for
+ * nested NMI interrupts.
+ * - Take the processor out of "NMI" mode by faking an "iret".
+ * - Enable interrupts, so that copyin() can work.
+ */
+ movq %rsp,%rsi /* source stack pointer */
+ movq $TF_SIZE,%rcx
+ movq PCPU(RSP0),%rbx
+ subq %rcx,%rbx
+ movq %rbx,%rdi /* destination stack pointer */
+
+ shrq $3,%rcx /* trap frame size in long words */
+ cld
+ rep
+ movsq /* copy trapframe */
+
+ movl %ss,%eax
+ pushq %rax /* tf_ss */
+ pushq %rbx /* tf_rsp (on kernel stack) */
+ pushfq /* tf_rflags */
+ movl %cs,%eax
+ pushq %rax /* tf_cs */
+ pushq $outofnmi /* tf_rip */
+ iretq
+outofnmi:
+ /*
+ * At this point the processor has exited NMI mode and is running
+ * with interrupts turned off on the normal kernel stack.
+ * We turn interrupts back on, and take the usual 'doreti' exit
+ * path.
+ *
+ * If a pending NMI gets recognized at or after this point, it
+ * will cause a kernel callchain to be traced. Since this path
+ * is only taken for NMI interrupts from user space, our `swapgs'
+ * state is correct for taking the doreti path.
+ */
+ sti
+ jmp doreti
+nocallchain:
+#endif
testl %ebx,%ebx
jz nmi_restoreregs
swapgs
@@ -556,3 +622,6 @@ doreti_iret_fault:
movq $0,TF_ADDR(%rsp)
FAKE_MCOUNT(TF_RIP(%rsp))
jmp calltrap
+#ifdef HWPMC_HOOKS
+ ENTRY(end_exceptions)
+#endif
diff --git a/sys/amd64/amd64/trap.c b/sys/amd64/amd64/trap.c
index 7e95cbc..38da3f6 100644
--- a/sys/amd64/amd64/trap.c
+++ b/sys/amd64/amd64/trap.c
@@ -195,8 +195,7 @@ trap(struct trapframe *frame)
* the NMI was handled by it and we can return immediately.
*/
if (type == T_NMI && pmc_intr &&
- (*pmc_intr)(PCPU_GET(cpuid), (uintptr_t) frame->tf_rip,
- TRAPF_USERMODE(frame)))
+ (*pmc_intr)(PCPU_GET(cpuid), frame))
goto out;
#endif
diff --git a/sys/amd64/include/pmc_mdep.h b/sys/amd64/include/pmc_mdep.h
index 4a67658..e391c54 100644
--- a/sys/amd64/include/pmc_mdep.h
+++ b/sys/amd64/include/pmc_mdep.h
@@ -1,7 +1,11 @@
/*-
- * Copyright (c) 2003-2005 Joseph Koshy
+ * Copyright (c) 2003-2007 Joseph Koshy
+ * Copyright (c) 2007 The FreeBSD Foundation
* All rights reserved.
*
+ * Portions of this software were developed by A. Joseph Koshy under
+ * sponsorship from the FreeBSD Foundation and Google, Inc.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -53,10 +57,34 @@ union pmc_md_pmc {
struct pmc;
+#define PMC_TRAPFRAME_TO_PC(TF) ((TF)->tf_rip)
+#define PMC_TRAPFRAME_TO_FP(TF) ((TF)->tf_rbp)
+#define PMC_TRAPFRAME_TO_SP(TF) ((TF)->tf_rsp)
+
+#define PMC_AT_FUNCTION_PROLOGUE_PUSH_BP(I) \
+ (((I) & 0xffffffff) == 0xe5894855) /* pushq %rbp; movq %rsp,%rbp */
+#define PMC_AT_FUNCTION_PROLOGUE_MOV_SP_BP(I) \
+ (((I) & 0x00ffffff) == 0x00e58948) /* movq %rsp,%rbp */
+#define PMC_AT_FUNCTION_EPILOGUE_RET(I) \
+ (((I) & 0xFF) == 0xC3) /* ret */
+
+#define PMC_IN_TRAP_HANDLER(PC) \
+ ((PC) >= (uintptr_t) start_exceptions && \
+ (PC) < (uintptr_t) end_exceptions)
+
+#define PMC_IN_KERNEL_STACK(S,START,END) \
+ ((S) >= (START) && (S) < (END))
+#define PMC_IN_KERNEL(va) (((va) >= DMAP_MIN_ADDRESS && \
+ (va) < DMAP_MAX_ADDRESS) || ((va) >= KERNBASE && \
+ (va) < VM_MAX_KERNEL_ADDRESS))
+
+#define PMC_IN_USERSPACE(va) ((va) <= VM_MAXUSER_ADDRESS)
+
/*
* Prototypes
*/
+void start_exceptions(void), end_exceptions(void);
void pmc_x86_lapic_enable_pmc_interrupt(void);
#endif
diff --git a/sys/conf/Makefile.amd64 b/sys/conf/Makefile.amd64
index 1032a51..6f35c9d3 100644
--- a/sys/conf/Makefile.amd64
+++ b/sys/conf/Makefile.amd64
@@ -32,7 +32,8 @@ S= ../../..
.include "$S/conf/kern.pre.mk"
DDB_ENABLED!= grep DDB opt_ddb.h || true
-.if !empty(DDB_ENABLED)
+HWPMC_ENABLED!= grep HWPMC opt_hwpmc_hooks.h || true
+.if !empty(DDB_ENABLED) || !empty(HWPMC_ENABLED)
CFLAGS+= -fno-omit-frame-pointer
.endif
diff --git a/sys/dev/hwpmc/hwpmc_amd.c b/sys/dev/hwpmc/hwpmc_amd.c
index b7cdee8..3576234 100644
--- a/sys/dev/hwpmc/hwpmc_amd.c
+++ b/sys/dev/hwpmc/hwpmc_amd.c
@@ -1,7 +1,11 @@
/*-
- * Copyright (c) 2003-2005 Joseph Koshy
+ * Copyright (c) 2003-2007 Joseph Koshy
+ * Copyright (c) 2007 The FreeBSD Foundation
* All rights reserved.
*
+ * Portions of this software were developed by A. Joseph Koshy under
+ * sponsorship from the FreeBSD Foundation and Google, Inc.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -38,9 +42,9 @@ __FBSDID("$FreeBSD$");
#include <sys/smp.h>
#include <sys/systm.h>
+#include <machine/cpu.h>
#include <machine/cpufunc.h>
#include <machine/md_var.h>
-#include <machine/pmc_mdep.h>
#include <machine/specialreg.h>
#ifdef DEBUG
@@ -667,7 +671,7 @@ amd_stop_pmc(int cpu, int ri)
*/
static int
-amd_intr(int cpu, uintptr_t eip, int usermode)
+amd_intr(int cpu, struct trapframe *tf)
{
int i, error, retval, ri;
uint32_t config, evsel, perfctr;
@@ -679,8 +683,8 @@ amd_intr(int cpu, uintptr_t eip, int usermode)
KASSERT(cpu >= 0 && cpu < mp_ncpus,
("[amd,%d] out of range CPU %d", __LINE__, cpu));
- PMCDBG(MDP,INT,1, "cpu=%d eip=%p um=%d", cpu, (void *) eip,
- usermode);
+ PMCDBG(MDP,INT,1, "cpu=%d tf=0x%p um=%d", cpu, (void *) tf,
+ TRAPF_USERMODE(tf));
retval = 0;
@@ -696,8 +700,8 @@ amd_intr(int cpu, uintptr_t eip, int usermode)
*
* If multiple PMCs interrupt at the same time, the AMD64
* processor appears to deliver as many NMIs as there are
- * outstanding PMC interrupts. Thus we need to only process
- * one interrupt at a time.
+ * outstanding PMC interrupts. So we process only one NMI
+ * interrupt at a time.
*/
for (i = 0; retval == 0 && i < AMD_NPMCS-1; i++) {
@@ -717,9 +721,9 @@ amd_intr(int cpu, uintptr_t eip, int usermode)
continue;
}
- retval = 1; /* found an interrupting PMC */
+ retval = 1; /* Found an interrupting PMC. */
- /* stop the PMC, reload count */
+ /* Stop the PMC, reload count. */
evsel = AMD_PMC_EVSEL_0 + i;
perfctr = AMD_PMC_PERFCTR_0 + i;
v = pm->pm_sc.pm_reloadcount;
@@ -733,8 +737,8 @@ amd_intr(int cpu, uintptr_t eip, int usermode)
wrmsr(evsel, config & ~AMD_PMC_ENABLE);
wrmsr(perfctr, AMD_RELOAD_COUNT_TO_PERFCTR_VALUE(v));
- /* restart the counter if there was no error during logging */
- error = pmc_process_interrupt(cpu, pm, eip, usermode);
+ /* Restart the counter if logging succeeded. */
+ error = pmc_process_interrupt(cpu, pm, tf, TRAPF_USERMODE(tf));
if (error == 0)
wrmsr(evsel, config | AMD_PMC_ENABLE);
}
@@ -742,7 +746,7 @@ amd_intr(int cpu, uintptr_t eip, int usermode)
atomic_add_int(retval ? &pmc_stats.pm_intr_processed :
&pmc_stats.pm_intr_ignored, 1);
- return retval;
+ return (retval);
}
/*
diff --git a/sys/dev/hwpmc/hwpmc_logging.c b/sys/dev/hwpmc/hwpmc_logging.c
index 7be4776..77417af 100644
--- a/sys/dev/hwpmc/hwpmc_logging.c
+++ b/sys/dev/hwpmc/hwpmc_logging.c
@@ -1,7 +1,11 @@
/*-
- * Copyright (c) 2005 Joseph Koshy
+ * Copyright (c) 2005-2007 Joseph Koshy
+ * Copyright (c) 2007 The FreeBSD Foundation
* All rights reserved.
*
+ * Portions of this software were developed by A. Joseph Koshy under
+ * sponsorship from the FreeBSD Foundation and Google, Inc.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -64,7 +68,6 @@ TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "logbuffersize", &pmclog_buffer_size);
SYSCTL_INT(_kern_hwpmc, OID_AUTO, logbuffersize, CTLFLAG_TUN|CTLFLAG_RD,
&pmclog_buffer_size, 0, "size of log buffers in kilobytes");
-
/*
* kern.hwpmc.nbuffer -- number of global log buffers
*/
@@ -96,7 +99,6 @@ static struct mtx pmc_kthread_mtx; /* sleep lock */
/*
* Log file record constructors.
*/
-
#define _PMCLOG_TO_HEADER(T,L) \
((PMCLOG_HEADER_MAGIC << 24) | \
(PMCLOG_TYPE_ ## T << 16) | \
@@ -135,6 +137,8 @@ static struct mtx pmc_kthread_mtx; /* sleep lock */
* Assertions about the log file format.
*/
+CTASSERT(sizeof(struct pmclog_callchain) == 6*4 +
+ PMC_CALLCHAIN_DEPTH_MAX*sizeof(uintfptr_t));
CTASSERT(sizeof(struct pmclog_closelog) == 3*4);
CTASSERT(sizeof(struct pmclog_dropnotify) == 3*4);
CTASSERT(sizeof(struct pmclog_map_in) == PATH_MAX +
@@ -710,9 +714,28 @@ pmclog_flush(struct pmc_owner *po)
}
-/*
- * Send a 'close log' event to the log file.
- */
+void
+pmclog_process_callchain(struct pmc *pm, struct pmc_sample *ps)
+{
+ int n, recordlen;
+ uint32_t flags;
+ struct pmc_owner *po;
+
+ PMCDBG(LOG,SAM,1,"pm=%p pid=%d n=%d", pm, ps->ps_pid,
+ ps->ps_nsamples);
+
+ recordlen = offsetof(struct pmclog_callchain, pl_pc) +
+ ps->ps_nsamples * sizeof(uintfptr_t);
+ po = pm->pm_owner;
+ flags = PMC_CALLCHAIN_TO_CPUFLAGS(ps->ps_cpu,ps->ps_flags);
+ PMCLOG_RESERVE(po, CALLCHAIN, recordlen);
+ PMCLOG_EMIT32(ps->ps_pid);
+ PMCLOG_EMIT32(pm->pm_id);
+ PMCLOG_EMIT32(flags);
+ for (n = 0; n < ps->ps_nsamples; n++)
+ PMCLOG_EMITADDR(ps->ps_pc[n]);
+ PMCLOG_DESPATCH(po);
+}
void
pmclog_process_closelog(struct pmc_owner *po)
@@ -761,24 +784,6 @@ pmclog_process_map_out(struct pmc_owner *po, pid_t pid, uintfptr_t start,
}
void
-pmclog_process_pcsample(struct pmc *pm, struct pmc_sample *ps)
-{
- struct pmc_owner *po;
-
- PMCDBG(LOG,SAM,1,"pm=%p pid=%d pc=%p", pm, ps->ps_pid,
- (void *) ps->ps_pc);
-
- po = pm->pm_owner;
-
- PMCLOG_RESERVE(po, PCSAMPLE, sizeof(struct pmclog_pcsample));
- PMCLOG_EMIT32(ps->ps_pid);
- PMCLOG_EMITADDR(ps->ps_pc);
- PMCLOG_EMIT32(pm->pm_id);
- PMCLOG_EMIT32(ps->ps_usermode);
- PMCLOG_DESPATCH(po);
-}
-
-void
pmclog_process_pmcallocate(struct pmc *pm)
{
struct pmc_owner *po;
diff --git a/sys/dev/hwpmc/hwpmc_mod.c b/sys/dev/hwpmc/hwpmc_mod.c
index 6c0e0ea..0bba092 100644
--- a/sys/dev/hwpmc/hwpmc_mod.c
+++ b/sys/dev/hwpmc/hwpmc_mod.c
@@ -1,7 +1,11 @@
/*-
- * Copyright (c) 2003-2006 Joseph Koshy
+ * Copyright (c) 2003-2007 Joseph Koshy
+ * Copyright (c) 2007 The FreeBSD Foundation
* All rights reserved.
*
+ * Portions of this software were developed by A. Joseph Koshy under
+ * sponsorship from the FreeBSD Foundation and Google, Inc.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -166,6 +170,7 @@ static int pmc_attach_one_process(struct proc *p, struct pmc *pm);
static int pmc_can_allocate_rowindex(struct proc *p, unsigned int ri,
int cpu);
static int pmc_can_attach(struct pmc *pm, struct proc *p);
+static void pmc_capture_user_callchain(int cpu, struct trapframe *tf);
static void pmc_cleanup(void);
static int pmc_detach_process(struct proc *p, struct pmc *pm);
static int pmc_detach_one_process(struct proc *p, struct pmc *pm,
@@ -180,6 +185,9 @@ static struct pmc_process *pmc_find_process_descriptor(struct proc *p,
static void pmc_force_context_switch(void);
static void pmc_link_target_process(struct pmc *pm,
struct pmc_process *pp);
+static void pmc_log_all_process_mappings(struct pmc_owner *po);
+static void pmc_log_kernel_mappings(struct pmc *pm);
+static void pmc_log_process_mappings(struct pmc_owner *po, struct proc *p);
static void pmc_maybe_remove_owner(struct pmc_owner *po);
static void pmc_process_csw_in(struct thread *td);
static void pmc_process_csw_out(struct thread *td);
@@ -205,6 +213,11 @@ static void pmc_unlink_target_process(struct pmc *pmc,
SYSCTL_NODE(_kern, OID_AUTO, hwpmc, CTLFLAG_RW, 0, "HWPMC parameters");
+static int pmc_callchaindepth = PMC_CALLCHAIN_DEPTH;
+TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "callchaindepth", &pmc_callchaindepth);
+SYSCTL_INT(_kern_hwpmc, OID_AUTO, callchaindepth, CTLFLAG_TUN|CTLFLAG_RD,
+ &pmc_callchaindepth, 0, "depth of call chain records");
+
#ifdef DEBUG
struct pmc_debugflags pmc_debugflags = PMC_DEBUG_DEFAULT_FLAGS;
char pmc_debugstr[PMC_DEBUG_STRSIZE];
@@ -226,7 +239,7 @@ SYSCTL_INT(_kern_hwpmc, OID_AUTO, hashsize, CTLFLAG_TUN|CTLFLAG_RD,
&pmc_hashsize, 0, "rows in hash tables");
/*
- * kern.hwpmc.nsamples --- number of PC samples per CPU
+ * kern.hwpmc.nsamples --- number of PC samples/callchain stacks per CPU
*/
static int pmc_nsamples = PMC_NSAMPLES;
@@ -234,6 +247,7 @@ TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "nsamples", &pmc_nsamples);
SYSCTL_INT(_kern_hwpmc, OID_AUTO, nsamples, CTLFLAG_TUN|CTLFLAG_RD,
&pmc_nsamples, 0, "number of PC samples per CPU");
+
/*
* kern.hwpmc.mtxpoolsize -- number of mutexes in the mutex pool.
*/
@@ -957,6 +971,8 @@ pmc_attach_one_process(struct proc *p, struct pmc *pm)
pmclog_process_pmcattach(pm, p->p_pid, fullpath);
if (freepath)
FREE(freepath, M_TEMP);
+ if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
+ pmc_log_process_mappings(pm->pm_owner, p);
}
/* mark process as using HWPMCs */
PROC_LOCK(p);
@@ -1449,7 +1465,7 @@ pmc_process_kld_unload(struct pmckern_map_out *pkm)
if (po->po_flags & PMC_PO_OWNS_LOGFILE)
pmclog_process_map_out(po, (pid_t) -1,
pkm->pm_address, pkm->pm_address + pkm->pm_size);
-
+
/*
* TODO: Notify owners of process-sampling PMCs.
*/
@@ -1528,6 +1544,88 @@ pmc_process_munmap(struct thread *td, struct pmckern_map_out *pkm)
}
/*
+ * Log mapping information about the kernel.
+ */
+
+static void
+pmc_log_kernel_mappings(struct pmc *pm)
+{
+ struct pmc_owner *po;
+ struct pmckern_map_in *km, *kmbase;
+
+ sx_assert(&pmc_sx, SX_LOCKED);
+ KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)),
+ ("[pmc,%d] non-sampling PMC (%p) desires mapping information",
+ __LINE__, (void *) pm));
+
+ po = pm->pm_owner;
+
+ if (po->po_flags & PMC_PO_INITIAL_MAPPINGS_DONE)
+ return;
+
+ /*
+ * Log the current set of kernel modules.
+ */
+ kmbase = linker_hwpmc_list_objects();
+ for (km = kmbase; km->pm_file != NULL; km++) {
+ PMCDBG(LOG,REG,1,"%s %p", (char *) km->pm_file,
+ (void *) km->pm_address);
+ pmclog_process_map_in(po, (pid_t) -1, km->pm_address,
+ km->pm_file);
+ }
+ FREE(kmbase, M_LINKER);
+
+ po->po_flags |= PMC_PO_INITIAL_MAPPINGS_DONE;
+}
+
+/*
+ * Log the mappings for a single process.
+ */
+
+static void
+pmc_log_process_mappings(struct pmc_owner *po, struct proc *p)
+{
+}
+
+/*
+ * Log mappings for all processes in the system.
+ */
+
+static void
+pmc_log_all_process_mappings(struct pmc_owner *po)
+{
+ struct proc *p, *top;
+
+ sx_assert(&pmc_sx, SX_XLOCKED);
+
+ if ((p = pfind(1)) == NULL)
+ panic("[pmc,%d] Cannot find init", __LINE__);
+
+ PROC_UNLOCK(p);
+
+ sx_slock(&proctree_lock);
+
+ top = p;
+
+ for (;;) {
+ pmc_log_process_mappings(po, p);
+ if (!LIST_EMPTY(&p->p_children))
+ p = LIST_FIRST(&p->p_children);
+ else for (;;) {
+ if (p == top)
+ goto done;
+ if (LIST_NEXT(p, p_sibling)) {
+ p = LIST_NEXT(p, p_sibling);
+ break;
+ }
+ p = p->p_pptr;
+ }
+ }
+ done:
+ sx_sunlock(&proctree_lock);
+}
+
+/*
* The 'hook' invoked from the kernel proper
*/
@@ -1543,7 +1641,8 @@ const char *pmc_hooknames[] = {
"KLDLOAD",
"KLDUNLOAD",
"MMAP",
- "MUNMAP"
+ "MUNMAP",
+ "CALLCHAIN"
};
#endif
@@ -1726,6 +1825,14 @@ pmc_hook_handler(struct thread *td, int function, void *arg)
pmc_process_munmap(td, (struct pmckern_map_out *) arg);
break;
+ case PMC_FN_USER_CALLCHAIN:
+ /*
+ * Record a call chain.
+ */
+ pmc_capture_user_callchain(PCPU_GET(cpuid),
+ (struct trapframe *) arg);
+ break;
+
default:
#ifdef DEBUG
KASSERT(0, ("[pmc,%d] unknown hook %d\n", __LINE__, function));
@@ -2321,6 +2428,21 @@ pmc_start(struct pmc *pm)
po = pm->pm_owner;
+ /*
+ * Disallow PMCSTART if a logfile is required but has not been
+ * configured yet.
+ */
+ if ((pm->pm_flags & PMC_F_NEEDS_LOGFILE) &&
+ (po->po_flags & PMC_PO_OWNS_LOGFILE) == 0)
+ return EDOOFUS; /* programming error */
+
+ /*
+ * If this is a sampling mode PMC, log mapping information for
+ * the kernel modules that are currently loaded.
+ */
+ if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
+ pmc_log_kernel_mappings(pm);
+
if (PMC_IS_VIRTUAL_MODE(mode)) {
/*
@@ -2333,15 +2455,6 @@ pmc_start(struct pmc *pm)
pmc_attach_process(po->po_owner, pm);
/*
- * Disallow PMCSTART if a logfile is required but has not
- * been configured yet.
- */
-
- if (error == 0 && (pm->pm_flags & PMC_F_NEEDS_LOGFILE) &&
- (po->po_flags & PMC_PO_OWNS_LOGFILE) == 0)
- error = EDOOFUS;
-
- /*
* If the PMC is attached to its owner, then force a context
* switch to ensure that the MD state gets set correctly.
*/
@@ -2358,13 +2471,7 @@ pmc_start(struct pmc *pm)
/*
* A system-wide PMC.
- */
-
- if ((pm->pm_flags & PMC_F_NEEDS_LOGFILE) &&
- (po->po_flags & PMC_PO_OWNS_LOGFILE) == 0)
- return EDOOFUS; /* programming error */
-
- /*
+ *
* Add the owner to the global list if this is a system-wide
* sampling PMC.
*/
@@ -2378,7 +2485,8 @@ pmc_start(struct pmc *pm)
po->po_sscount++;
}
- /* TODO: dump system wide process mappings to the log? */
+ /* Log mapping information for all processes in the system. */
+ pmc_log_all_process_mappings(po);
/*
* Move to the CPU associated with this
@@ -2554,7 +2662,6 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
struct proc *p;
struct pmc *pm;
struct pmc_owner *po;
- struct pmckern_map_in *km, *kmbase;
struct pmc_op_configurelog cl;
sx_assert(&pmc_sx, SX_XLOCKED);
@@ -2593,18 +2700,6 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
if (error)
break;
-
- /*
- * Log the current set of kernel modules.
- */
- kmbase = linker_hwpmc_list_objects();
- for (km = kmbase; km->pm_file != NULL; km++) {
- PMCDBG(LOG,REG,1,"%s %p", (char *) km->pm_file,
- (void *) km->pm_address);
- pmclog_process_map_in(po, (pid_t) -1, km->pm_address,
- km->pm_file);
- }
- FREE(kmbase, M_LINKER);
}
break;
@@ -2945,7 +3040,7 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
*/
if ((pa.pm_flags & ~(PMC_F_DESCENDANTS | PMC_F_LOG_PROCCSW |
- PMC_F_LOG_PROCEXIT)) != 0) {
+ PMC_F_LOG_PROCEXIT | PMC_F_CALLCHAIN)) != 0) {
error = EINVAL;
break;
}
@@ -3633,56 +3728,117 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
/*
+ * Mark the thread as needing callchain capture and post an AST. The
+ * actual callchain capture will be done in a context where it is safe
+ * to take page faults.
+ */
+
+static void
+pmc_post_callchain_ast(void)
+{
+ struct thread *td;
+
+ td = curthread;
+
+ /*
+ * Mark this thread as needing processing in ast().
+ * td->td_pflags will be safe to touch as the process was in
+ * user space when it was interrupted.
+ */
+ td->td_pflags |= TDP_CALLCHAIN;
+
+ /*
+ * Again, since we've entered this function directly from
+ * userland, `td' is guaranteed to be not locked by this CPU,
+ * so its safe to try acquire the thread lock even though we
+ * are executing in an NMI context. We need to acquire this
+ * lock before touching `td_flags' because other CPUs may be
+ * in the process of touching this field.
+ */
+ thread_lock(td);
+ td->td_flags |= TDF_ASTPENDING;
+ thread_unlock(td);
+
+ return;
+}
+
+/*
* Interrupt processing.
*
- * Find a free slot in the per-cpu array of PC samples and write the
- * current (PMC,PID,PC) triple to it. If an event was successfully
- * added, a bit is set in mask 'pmc_cpumask' denoting that the
- * DO_SAMPLES hook needs to be invoked from the clock handler.
+ * Find a free slot in the per-cpu array of samples and capture the
+ * current callchain there. If a sample was successfully added, a bit
+ * is set in mask 'pmc_cpumask' denoting that the DO_SAMPLES hook
+ * needs to be invoked from the clock handler.
*
* This function is meant to be called from an NMI handler. It cannot
* use any of the locking primitives supplied by the OS.
*/
int
-pmc_process_interrupt(int cpu, struct pmc *pm, uintfptr_t pc, int usermode)
+pmc_process_interrupt(int cpu, struct pmc *pm, struct trapframe *tf,
+ int inuserspace)
{
- int error, ri;
+ int error, callchaindepth;
struct thread *td;
struct pmc_sample *ps;
struct pmc_samplebuffer *psb;
error = 0;
- ri = PMC_TO_ROWINDEX(pm);
+ /*
+ * Allocate space for a sample buffer.
+ */
psb = pmc_pcpu[cpu]->pc_sb;
ps = psb->ps_write;
- if (ps->ps_pc) { /* in use, reader hasn't caught up */
+ if (ps->ps_nsamples) { /* in use, reader hasn't caught up */
pm->pm_stalled = 1;
atomic_add_int(&pmc_stats.pm_intr_bufferfull, 1);
- PMCDBG(SAM,INT,1,"(spc) cpu=%d pm=%p pc=%jx um=%d wr=%d rd=%d",
- cpu, pm, (uint64_t) pc, usermode,
+ PMCDBG(SAM,INT,1,"(spc) cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d",
+ cpu, pm, (void *) tf, inuserspace,
(int) (psb->ps_write - psb->ps_samples),
(int) (psb->ps_read - psb->ps_samples));
error = ENOMEM;
goto done;
}
- /* fill in entry */
- PMCDBG(SAM,INT,1,"cpu=%d pm=%p pc=%jx um=%d wr=%d rd=%d", cpu, pm,
- (uint64_t) pc, usermode,
+
+ /* Fill in entry. */
+ PMCDBG(SAM,INT,1,"cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d", cpu, pm,
+ (void *) tf, inuserspace,
(int) (psb->ps_write - psb->ps_samples),
(int) (psb->ps_read - psb->ps_samples));
- atomic_add_rel_32(&pm->pm_runcount, 1); /* hold onto PMC */
+ atomic_add_rel_32(&pm->pm_runcount, 1); /* hold onto PMC */
ps->ps_pmc = pm;
if ((td = curthread) && td->td_proc)
ps->ps_pid = td->td_proc->p_pid;
else
ps->ps_pid = -1;
- ps->ps_usermode = usermode;
- ps->ps_pc = pc; /* mark entry as in use */
+ ps->ps_cpu = cpu;
+ ps->ps_flags = inuserspace ? PMC_CC_F_USERSPACE : 0;
+
+ callchaindepth = (pm->pm_flags & PMC_F_CALLCHAIN) ?
+ pmc_callchaindepth : 1;
+
+ if (callchaindepth == 1)
+ ps->ps_pc[0] = PMC_TRAPFRAME_TO_PC(tf);
+ else {
+ /*
+ * Kernel stack traversals can be done immediately,
+ * while we defer to an AST for user space traversals.
+ */
+ if (!inuserspace)
+ callchaindepth =
+ pmc_save_kernel_callchain(ps->ps_pc,
+ callchaindepth, tf);
+ else {
+ pmc_post_callchain_ast();
+ callchaindepth = PMC_SAMPLE_INUSE;
+ }
+ }
+
+ ps->ps_nsamples = callchaindepth; /* mark entry as in use */
/* increment write pointer, modulo ring buffer size */
ps++;
@@ -3695,7 +3851,50 @@ pmc_process_interrupt(int cpu, struct pmc *pm, uintfptr_t pc, int usermode)
/* mark CPU as needing processing */
atomic_set_rel_int(&pmc_cpumask, (1 << cpu));
- return error;
+ return (error);
+}
+
+/*
+ * Capture a user call chain. This function will be called from ast()
+ * before control returns to userland and before the process gets
+ * rescheduled.
+ */
+
+static void
+pmc_capture_user_callchain(int cpu, struct trapframe *tf)
+{
+ int i;
+ struct pmc *pm;
+ struct pmc_sample *ps;
+ struct pmc_samplebuffer *psb;
+
+ psb = pmc_pcpu[cpu]->pc_sb;
+
+ /*
+ * Iterate through all deferred callchain requests.
+ */
+
+ for (i = 0; i < pmc_nsamples; i++) {
+
+ ps = &psb->ps_samples[i];
+ if (ps->ps_nsamples != PMC_SAMPLE_INUSE)
+ continue;
+
+ pm = ps->ps_pmc;
+
+ KASSERT(pm->pm_flags & PMC_F_CALLCHAIN,
+ ("[pmc,%d] Retrieving callchain for PMC that doesn't "
+ "want it", __LINE__));
+
+ /*
+ * Retrieve the callchain and mark the sample buffer
+ * as 'processable' by the timer tick sweep code.
+ */
+ ps->ps_nsamples = pmc_save_user_callchain(ps->ps_pc,
+ pmc_callchaindepth, tf);
+ }
+
+ return;
}
@@ -3722,8 +3921,13 @@ pmc_process_samples(int cpu)
for (n = 0; n < pmc_nsamples; n++) { /* bound on #iterations */
ps = psb->ps_read;
- if (ps->ps_pc == (uintfptr_t) 0) /* no data */
+ if (ps->ps_nsamples == PMC_SAMPLE_FREE)
+ break;
+ if (ps->ps_nsamples == PMC_SAMPLE_INUSE) {
+ /* Need a rescan at a later time. */
+ atomic_set_rel_int(&pmc_cpumask, (1 << cpu));
break;
+ }
pm = ps->ps_pmc;
po = pm->pm_owner;
@@ -3736,8 +3940,8 @@ pmc_process_samples(int cpu)
if (pm->pm_state != PMC_STATE_RUNNING)
goto entrydone;
- PMCDBG(SAM,OPS,1,"cpu=%d pm=%p pc=%jx um=%d wr=%d rd=%d", cpu,
- pm, (uint64_t) ps->ps_pc, ps->ps_usermode,
+ PMCDBG(SAM,OPS,1,"cpu=%d pm=%p n=%d fl=%x wr=%d rd=%d", cpu,
+ pm, ps->ps_nsamples, ps->ps_flags,
(int) (psb->ps_write - psb->ps_samples),
(int) (psb->ps_read - psb->ps_samples));
@@ -3748,9 +3952,9 @@ pmc_process_samples(int cpu)
* would have done.
*/
if (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) {
- if (ps->ps_usermode) {
+ if (ps->ps_flags & PMC_CC_F_USERSPACE) {
td = FIRST_THREAD_IN_PROC(po->po_owner);
- addupc_intr(td, ps->ps_pc, 1);
+ addupc_intr(td, ps->ps_pc[0], 1);
}
goto entrydone;
}
@@ -3762,10 +3966,10 @@ pmc_process_samples(int cpu)
* entry to the PMC's owner process.
*/
- pmclog_process_pcsample(pm, ps);
+ pmclog_process_callchain(pm, ps);
entrydone:
- ps->ps_pc = (uintfptr_t) 0; /* mark entry as free */
+ ps->ps_nsamples = 0; /* mark entry as free */
atomic_subtract_rel_32(&pm->pm_runcount, 1);
/* increment read pointer, modulo sample size */
@@ -4087,6 +4291,7 @@ pmc_initialize(void)
{
int cpu, error, n;
struct pmc_binding pb;
+ struct pmc_sample *ps;
struct pmc_samplebuffer *sb;
md = NULL;
@@ -4119,17 +4324,24 @@ pmc_initialize(void)
*/
if (pmc_hashsize <= 0) {
- (void) printf("hwpmc: tunable hashsize=%d must be greater "
- "than zero.\n", pmc_hashsize);
+ (void) printf("hwpmc: tunable \"hashsize\"=%d must be "
+ "greater than zero.\n", pmc_hashsize);
pmc_hashsize = PMC_HASH_SIZE;
}
if (pmc_nsamples <= 0 || pmc_nsamples > 65535) {
- (void) printf("hwpmc: tunable nsamples=%d out of range.\n",
- pmc_nsamples);
+ (void) printf("hwpmc: tunable \"nsamples\"=%d out of "
+ "range.\n", pmc_nsamples);
pmc_nsamples = PMC_NSAMPLES;
}
+ if (pmc_callchaindepth <= 0 ||
+ pmc_callchaindepth > PMC_CALLCHAIN_DEPTH_MAX) {
+ (void) printf("hwpmc: tunable \"callchaindepth\"=%d out of "
+ "range.\n", pmc_callchaindepth);
+ pmc_callchaindepth = PMC_CALLCHAIN_DEPTH;
+ }
+
md = pmc_md_initialize();
if (md == NULL || md->pmd_init == NULL)
@@ -4171,6 +4383,14 @@ pmc_initialize(void)
KASSERT(pmc_pcpu[cpu] != NULL,
("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu));
+ MALLOC(sb->ps_callchains, uintptr_t *,
+ pmc_callchaindepth * pmc_nsamples * sizeof(uintptr_t),
+ M_PMC, M_WAITOK|M_ZERO);
+
+ for (n = 0, ps = sb->ps_samples; n < pmc_nsamples; n++, ps++)
+ ps->ps_pc = sb->ps_callchains +
+ (n * pmc_callchaindepth);
+
pmc_pcpu[cpu]->pc_sb = sb;
}
@@ -4327,6 +4547,7 @@ pmc_cleanup(void)
KASSERT(pmc_pcpu[cpu]->pc_sb != NULL,
("[pmc,%d] Null cpu sample buffer cpu=%d", __LINE__,
cpu));
+ FREE(pmc_pcpu[cpu]->pc_sb->ps_callchains, M_PMC);
FREE(pmc_pcpu[cpu]->pc_sb, M_PMC);
pmc_pcpu[cpu]->pc_sb = NULL;
}
diff --git a/sys/dev/hwpmc/hwpmc_piv.c b/sys/dev/hwpmc/hwpmc_piv.c
index a6cd3fe..7994330 100644
--- a/sys/dev/hwpmc/hwpmc_piv.c
+++ b/sys/dev/hwpmc/hwpmc_piv.c
@@ -1,7 +1,11 @@
/*-
- * Copyright (c) 2003-2005 Joseph Koshy
+ * Copyright (c) 2003-2007 Joseph Koshy
+ * Copyright (c) 2007 The FreeBSD Foundation
* All rights reserved.
*
+ * Portions of this software were developed by A. Joseph Koshy under
+ * sponsorship from the FreeBSD Foundation and Google, Inc.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -35,6 +39,7 @@ __FBSDID("$FreeBSD$");
#include <sys/smp.h>
#include <sys/systm.h>
+#include <machine/cpu.h>
#include <machine/cpufunc.h>
#include <machine/md_var.h>
#include <machine/specialreg.h>
@@ -1478,7 +1483,7 @@ p4_stop_pmc(int cpu, int ri)
*
* On HTT machines, this PMC may be in use by two threads
* running on two logical CPUS. Thus we look at the
- * 'pm_runcount' field and only turn off the appropriate TO/T1
+ * 'runcount' field and only turn off the appropriate TO/T1
* bits (and keep the PMC running) if two logical CPUs were
* using the PMC.
*
@@ -1562,16 +1567,17 @@ p4_stop_pmc(int cpu, int ri)
*/
static int
-p4_intr(int cpu, uintptr_t eip, int usermode)
+p4_intr(int cpu, struct trapframe *tf)
{
- int i, did_interrupt, error, ri;
uint32_t cccrval, ovf_mask, ovf_partner;
- struct p4_cpu *pc;
+ int i, did_interrupt, error, ri;
struct pmc_hw *phw;
+ struct p4_cpu *pc;
struct pmc *pm;
pmc_value_t v;
- PMCDBG(MDP,INT, 1, "cpu=%d eip=%p um=%d", cpu, (void *) eip, usermode);
+ PMCDBG(MDP,INT, 1, "cpu=%d tf=0x%p um=%d", cpu, (void *) tf,
+ TRAPF_USERMODE(tf));
pc = (struct p4_cpu *) pmc_pcpu[P4_TO_HTT_PRIMARY(cpu)];
@@ -1579,8 +1585,8 @@ p4_intr(int cpu, uintptr_t eip, int usermode)
P4_CCCR_OVF_PMI_T1 : P4_CCCR_OVF_PMI_T0;
ovf_mask |= P4_CCCR_OVF;
if (p4_system_has_htt)
- ovf_partner = P4_CPU_IS_HTT_SECONDARY(cpu) ? P4_CCCR_OVF_PMI_T0 :
- P4_CCCR_OVF_PMI_T1;
+ ovf_partner = P4_CPU_IS_HTT_SECONDARY(cpu) ?
+ P4_CCCR_OVF_PMI_T0 : P4_CCCR_OVF_PMI_T1;
else
ovf_partner = 0;
did_interrupt = 0;
@@ -1617,7 +1623,8 @@ p4_intr(int cpu, uintptr_t eip, int usermode)
!PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) {
continue;
}
- (void) pmc_process_interrupt(cpu, pm, eip, usermode);
+ (void) pmc_process_interrupt(cpu, pm, tf,
+ TRAPF_USERMODE(tf));
continue;
}
@@ -1667,7 +1674,8 @@ p4_intr(int cpu, uintptr_t eip, int usermode)
* Process the interrupt. Re-enable the PMC if
* processing was successful.
*/
- error = pmc_process_interrupt(cpu, pm, eip, usermode);
+ error = pmc_process_interrupt(cpu, pm, tf,
+ TRAPF_USERMODE(tf));
/*
* Only the first processor executing the NMI handler
@@ -1698,7 +1706,7 @@ p4_intr(int cpu, uintptr_t eip, int usermode)
atomic_add_int(did_interrupt ? &pmc_stats.pm_intr_processed :
&pmc_stats.pm_intr_ignored, 1);
- return did_interrupt;
+ return (did_interrupt);
}
/*
diff --git a/sys/dev/hwpmc/hwpmc_ppro.c b/sys/dev/hwpmc/hwpmc_ppro.c
index 993a2bf..979c04e 100644
--- a/sys/dev/hwpmc/hwpmc_ppro.c
+++ b/sys/dev/hwpmc/hwpmc_ppro.c
@@ -1,7 +1,11 @@
/*-
* Copyright (c) 2003-2005 Joseph Koshy
+ * Copyright (c) 2007 The FreeBSD Foundation
* All rights reserved.
*
+ * Portions of this software were developed by A. Joseph Koshy under
+ * sponsorship from the FreeBSD Foundation and Google, Inc.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -35,6 +39,7 @@ __FBSDID("$FreeBSD$");
#include <sys/smp.h>
#include <sys/systm.h>
+#include <machine/cpu.h>
#include <machine/cpufunc.h>
#include <machine/md_var.h>
#include <machine/pmc_mdep.h>
@@ -710,7 +715,7 @@ p6_stop_pmc(int cpu, int ri)
}
static int
-p6_intr(int cpu, uintptr_t eip, int usermode)
+p6_intr(int cpu, struct trapframe *tf)
{
int i, error, retval, ri;
uint32_t perf0cfg;
@@ -745,7 +750,8 @@ p6_intr(int cpu, uintptr_t eip, int usermode)
retval = 1;
- error = pmc_process_interrupt(cpu, pm, eip, usermode);
+ error = pmc_process_interrupt(cpu, pm, tf,
+ TRAPF_USERMODE(tf));
if (error)
P6_MARK_STOPPED(pc,ri);
diff --git a/sys/dev/hwpmc/hwpmc_x86.c b/sys/dev/hwpmc/hwpmc_x86.c
index cb6db23..2fc7cd9 100644
--- a/sys/dev/hwpmc/hwpmc_x86.c
+++ b/sys/dev/hwpmc/hwpmc_x86.c
@@ -1,7 +1,11 @@
/*-
* Copyright (c) 2005, Joseph Koshy
+ * Copyright (c) 2007 The FreeBSD Foundation
* All rights reserved.
*
+ * Portions of this software were developed by A. Joseph Koshy under
+ * sponsorship from the FreeBSD Foundation and Google, Inc.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -30,12 +34,18 @@ __FBSDID("$FreeBSD$");
#include <sys/param.h>
#include <sys/bus.h>
#include <sys/pmc.h>
+#include <sys/proc.h>
#include <sys/systm.h>
+#include <machine/cpu.h>
#include <machine/apicreg.h>
#include <machine/pmc_mdep.h>
#include <machine/md_var.h>
+#include <vm/vm.h>
+#include <vm/vm_param.h>
+#include <vm/pmap.h>
+
extern volatile lapic_t *lapic;
void
@@ -48,6 +58,187 @@ pmc_x86_lapic_enable_pmc_interrupt(void)
lapic->lvt_pcint = value;
}
+/*
+ * Attempt to walk a user call stack using a too-simple algorithm.
+ * In the general case we need unwind information associated with
+ * the executable to be able to walk the user stack.
+ *
+ * We are handed a trap frame laid down at the time the PMC interrupt
+ * was taken. If the application is using frame pointers, the saved
+ * PC value could be:
+ * a. at the beginning of a function before the stack frame is laid
+ * down,
+ * b. just before a 'ret', after the stack frame has been taken off,
+ * c. somewhere else in the function with a valid stack frame being
+ * present,
+ *
+ * If the application is not using frame pointers, this algorithm will
+ * fail to yield an interesting call chain.
+ *
+ * TODO: figure out a way to use unwind information.
+ */
+
+int
+pmc_save_user_callchain(uintptr_t *cc, int nframes, struct trapframe *tf)
+{
+ int n;
+ uint32_t instr;
+ uintptr_t fp, oldfp, pc, r, sp;
+
+ KASSERT(TRAPF_USERMODE(tf), ("[x86,%d] Not a user trap frame tf=%p",
+ __LINE__, (void *) tf));
+
+ pc = PMC_TRAPFRAME_TO_PC(tf);
+ oldfp = fp = PMC_TRAPFRAME_TO_FP(tf);
+ sp = PMC_TRAPFRAME_TO_SP(tf);
+
+ *cc++ = pc; n = 1;
+
+ r = fp + sizeof(uintptr_t); /* points to return address */
+
+ if (!PMC_IN_USERSPACE(pc))
+ return (n);
+
+ if (copyin((void *) pc, &instr, sizeof(instr)) != 0)
+ return (n);
+
+ if (PMC_AT_FUNCTION_PROLOGUE_PUSH_BP(instr) ||
+ PMC_AT_FUNCTION_EPILOGUE_RET(instr)) { /* ret */
+ if (copyin((void *) sp, &pc, sizeof(pc)) != 0)
+ return (n);
+ } else if (PMC_AT_FUNCTION_PROLOGUE_MOV_SP_BP(instr)) {
+ sp += sizeof(uintptr_t);
+ if (copyin((void *) sp, &pc, sizeof(pc)) != 0)
+ return (n);
+ } else if (copyin((void *) r, &pc, sizeof(pc)) != 0 ||
+ copyin((void *) fp, &fp, sizeof(fp) != 0))
+ return (n);
+
+ for (; n < nframes;) {
+ if (pc == 0 || !PMC_IN_USERSPACE(pc))
+ break;
+
+ *cc++ = pc; n++;
+
+ if (fp < oldfp)
+ break;
+
+ r = fp + sizeof(uintptr_t); /* address of return address */
+ oldfp = fp;
+
+ if (copyin((void *) r, &pc, sizeof(pc)) != 0 ||
+ copyin((void *) fp, &fp, sizeof(fp)) != 0)
+ break;
+ }
+
+ return (n);
+}
+
+/*
+ * Walking the kernel call stack.
+ *
+ * We are handed the trap frame laid down at the time the PMC
+ * interrupt was taken. The saved PC could be:
+ * a. in the lowlevel trap handler, meaning that there isn't a C stack
+ * to traverse,
+ * b. at the beginning of a function before the stack frame is laid
+ * down,
+ * c. just before a 'ret', after the stack frame has been taken off,
+ * d. somewhere else in a function with a valid stack frame being
+ * present.
+ *
+ * In case (d), the previous frame pointer is at [%ebp]/[%rbp] and
+ * the return address is at [%ebp+4]/[%rbp+8].
+ *
+ * For cases (b) and (c), the return address is at [%esp]/[%rsp] and
+ * the frame pointer doesn't need to be changed when going up one
+ * level in the stack.
+ *
+ * For case (a), we check if the PC lies in low-level trap handling
+ * code, and if so we terminate our trace.
+ */
+
+int
+pmc_save_kernel_callchain(uintptr_t *cc, int nframes, struct trapframe *tf)
+{
+ int n;
+ uint32_t instr;
+ uintptr_t fp, pc, r, sp, stackstart, stackend;
+ struct thread *td;
+
+ KASSERT(TRAPF_USERMODE(tf) == 0,("[x86,%d] not a kernel backtrace",
+ __LINE__));
+
+ pc = PMC_TRAPFRAME_TO_PC(tf);
+ fp = PMC_TRAPFRAME_TO_FP(tf);
+ sp = PMC_TRAPFRAME_TO_SP(tf);
+
+ *cc++ = pc;
+ r = fp + sizeof(uintptr_t); /* points to return address */
+
+ if ((td = curthread) == NULL)
+ return (1);
+
+ if (nframes <= 1)
+ return (1);
+
+ stackstart = (uintptr_t) td->td_kstack;
+ stackend = (uintptr_t) td->td_kstack + td->td_kstack_pages * PAGE_SIZE;
+
+ if (PMC_IN_TRAP_HANDLER(pc) ||
+ !PMC_IN_KERNEL(pc) || !PMC_IN_KERNEL(r) ||
+ !PMC_IN_KERNEL_STACK(sp, stackstart, stackend) ||
+ !PMC_IN_KERNEL_STACK(fp, stackstart, stackend))
+ return (1);
+
+ instr = *(uint32_t *) pc;
+
+ /*
+ * Determine whether the interrupted function was in the
+ * processing of either laying down its stack frame or taking
+ * it off.
+ *
+ * If we haven't started laying down a stack frame, or are
+ * just about to return, then our caller's address is at
+ * *sp, and we don't have a frame to unwind.
+ */
+ if (PMC_AT_FUNCTION_PROLOGUE_PUSH_BP(instr) ||
+ PMC_AT_FUNCTION_EPILOGUE_RET(instr))
+ pc = *(uintptr_t *) sp;
+ else if (PMC_AT_FUNCTION_PROLOGUE_MOV_SP_BP(instr)) {
+ /*
+ * The code was midway through laying down a frame.
+ * At this point sp[0] has a frame back pointer,
+ * and the caller's address is therefore at sp[1].
+ */
+ sp += sizeof(uintptr_t);
+ if (!PMC_IN_KERNEL_STACK(sp, stackstart, stackend))
+ return (1);
+ pc = *(uintptr_t *) sp;
+ } else {
+ /*
+ * Not in the function prologue or epilogue.
+ */
+ pc = *(uintptr_t *) r;
+ fp = *(uintptr_t *) fp;
+ }
+
+ for (n = 1; n < nframes; n++) {
+ *cc++ = pc;
+
+ if (PMC_IN_TRAP_HANDLER(pc))
+ break;
+
+ r = fp + sizeof(uintptr_t);
+ if (!PMC_IN_KERNEL_STACK(fp, stackstart, stackend) ||
+ !PMC_IN_KERNEL(r))
+ break;
+ pc = *(uintptr_t *) r;
+ fp = *(uintptr_t *) fp;
+ }
+
+ return (n);
+}
static struct pmc_mdep *
pmc_intel_initialize(void)
diff --git a/sys/i386/i386/exception.s b/sys/i386/i386/exception.s
index 259327d..31d9a66 100644
--- a/sys/i386/i386/exception.s
+++ b/sys/i386/i386/exception.s
@@ -1,8 +1,12 @@
/*-
* Copyright (c) 1989, 1990 William F. Jolitz.
* Copyright (c) 1990 The Regents of the University of California.
+ * Copyright (c) 2007 The FreeBSD Foundation
* All rights reserved.
*
+ * Portions of this software were developed by A. Joseph Koshy under
+ * sponsorship from the FreeBSD Foundation and Google, Inc.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -31,6 +35,7 @@
*/
#include "opt_apic.h"
+#include "opt_hwpmc_hooks.h"
#include "opt_npx.h"
#include <machine/asmacros.h>
@@ -42,7 +47,9 @@
#define SEL_RPL_MASK 0x0003
.text
-
+#ifdef HWPMC_HOOKS
+ ENTRY(start_exceptions)
+#endif
/*****************************************************************************/
/* Trap handling */
/*****************************************************************************/
@@ -261,8 +268,18 @@ doreti:
FAKE_MCOUNT($bintr) /* init "from" bintr -> doreti */
doreti_next:
/*
- * Check if ASTs can be handled now. PSL_VM must be checked first
- * since segment registers only have an RPL in non-VM86 mode.
+ * Check if ASTs can be handled now. ASTs cannot be safely
+ * processed when returning from an NMI.
+ */
+ cmpb $T_NMI,TF_TRAPNO(%esp)
+#ifdef HWPMC_HOOKS
+ je doreti_nmi
+#else
+ je doreti_exit
+#endif
+ /*
+ * PSL_VM must be checked first since segment registers only
+ * have an RPL in non-VM86 mode.
*/
testl $PSL_VM,TF_EFLAGS(%esp) /* are we in vm86 mode? */
jz doreti_notvm86
@@ -340,3 +357,32 @@ doreti_popl_fs_fault:
movl $0,TF_ERR(%esp) /* XXX should be the error code */
movl $T_PROTFLT,TF_TRAPNO(%esp)
jmp alltraps_with_regs_pushed
+#ifdef HWPMC_HOOKS
+doreti_nmi:
+ /*
+ * Since we are returning from an NMI, check if the current trap
+ * was from user mode and if so whether the current thread
+ * needs a user call chain capture.
+ */
+ testb $SEL_RPL_MASK,TF_CS(%esp)
+ jz doreti_exit
+ movl PCPU(CURTHREAD),%eax /* curthread present? */
+ orl %eax,%eax
+ jz doreti_exit
+ testl $TDP_CALLCHAIN,TD_PFLAGS(%eax) /* flagged for capture? */
+ jz doreti_exit
+ /*
+ * Take the processor out of NMI mode by executing a fake "iret".
+ */
+ pushfl
+ pushl %cs
+ pushl $outofnmi
+ iret
+outofnmi:
+ /*
+ * Clear interrupts and jump to AST handling code.
+ */
+ sti
+ jmp doreti_ast
+ ENTRY(end_exceptions)
+#endif
diff --git a/sys/i386/i386/trap.c b/sys/i386/i386/trap.c
index ebec7ab..d6aa973 100644
--- a/sys/i386/i386/trap.c
+++ b/sys/i386/i386/trap.c
@@ -214,8 +214,7 @@ trap(struct trapframe *frame)
* return immediately.
*/
if (type == T_NMI && pmc_intr &&
- (*pmc_intr)(PCPU_GET(cpuid), (uintptr_t) frame->tf_eip,
- TRAPF_USERMODE(frame)))
+ (*pmc_intr)(PCPU_GET(cpuid), frame))
goto out;
#endif
diff --git a/sys/i386/include/pmc_mdep.h b/sys/i386/include/pmc_mdep.h
index 2d4c4a3..ffe94dd 100644
--- a/sys/i386/include/pmc_mdep.h
+++ b/sys/i386/include/pmc_mdep.h
@@ -1,7 +1,11 @@
/*-
* Copyright (c) 2003-2005 Joseph Koshy
+ * Copyright (c) 2007 The FreeBSD Foundation
* All rights reserved.
*
+ * Portions of this software were developed by A. Joseph Koshy under
+ * sponsorship from the FreeBSD Foundation and Google, Inc.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -73,10 +77,33 @@ union pmc_md_pmc {
struct pmc;
+#define PMC_TRAPFRAME_TO_PC(TF) ((TF)->tf_eip)
+#define PMC_TRAPFRAME_TO_FP(TF) ((TF)->tf_ebp)
+#define PMC_TRAPFRAME_TO_SP(TF) ((TF)->tf_esp)
+
+#define PMC_IN_KERNEL_STACK(S,START,END) \
+ ((S) >= (START) && (S) < (END))
+#define PMC_IN_KERNEL(va) (((va) >= USRSTACK) && \
+ ((va) < VM_MAX_KERNEL_ADDRESS))
+
+#define PMC_IN_USERSPACE(va) ((va) <= VM_MAXUSER_ADDRESS)
+
+#define PMC_IN_TRAP_HANDLER(PC) \
+ ((PC) >= (uintptr_t) start_exceptions && \
+ (PC) < (uintptr_t) end_exceptions)
+
+#define PMC_AT_FUNCTION_PROLOGUE_PUSH_BP(I) \
+ (((I) & 0xffffffff) == 0xe5894855) /* pushq %rbp; movq %rsp,%rbp */
+#define PMC_AT_FUNCTION_PROLOGUE_MOV_SP_BP(I) \
+ (((I) & 0x00ffffff) == 0x00e58948) /* movq %rsp,%rbp */
+#define PMC_AT_FUNCTION_EPILOGUE_RET(I) \
+ (((I) & 0xFF) == 0xC3) /* ret */
+
/*
* Prototypes
*/
+void start_exceptions(void), end_exceptions(void);
void pmc_x86_lapic_enable_pmc_interrupt(void);
#endif /* _KERNEL */
diff --git a/sys/kern/kern_pmc.c b/sys/kern/kern_pmc.c
index 6d5d7af..da331ca 100644
--- a/sys/kern/kern_pmc.c
+++ b/sys/kern/kern_pmc.c
@@ -1,5 +1,10 @@
/*-
- * Copyright (c) 2003-2005, Joseph Koshy
+ * Copyright (c) 2003-2007 Joseph Koshy
+ * Copyright (c) 2007 The FreeBSD Foundation
+ * All rights reserved.
+ *
+ * Portions of this software were developed by A. Joseph Koshy under
+ * sponsorship from the FreeBSD Foundation and Google, Inc.
*
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
@@ -45,7 +50,7 @@ const int pmc_kernel_version = PMC_KERNEL_VERSION;
int (*pmc_hook)(struct thread *td, int function, void *arg) = NULL;
/* Interrupt handler */
-int (*pmc_intr)(int cpu, uintptr_t pc, int usermode) = NULL;
+int (*pmc_intr)(int cpu, struct trapframe *tf) = NULL;
/* Bitmask of CPUs requiring servicing at hardclock time */
volatile cpumask_t pmc_cpumask;
@@ -66,7 +71,14 @@ volatile int pmc_ss_count;
* somewhat more expensive than a simple 'if' check and indirect call.
*/
struct sx pmc_sx;
-SX_SYSINIT(pmc, &pmc_sx, "pmc shared lock");
+
+static void
+pmc_init_sx(void)
+{
+ sx_init_flags(&pmc_sx, "pmc-sx", SX_NOWITNESS);
+}
+
+SYSINIT(pmcsx, SI_SUB_LOCK, SI_ORDER_MIDDLE, pmc_init_sx, NULL);
/*
* Helper functions
diff --git a/sys/kern/subr_trap.c b/sys/kern/subr_trap.c
index 355faf2..d8168af 100644
--- a/sys/kern/subr_trap.c
+++ b/sys/kern/subr_trap.c
@@ -2,10 +2,14 @@
* Copyright (C) 1994, David Greenman
* Copyright (c) 1990, 1993
* The Regents of the University of California. All rights reserved.
+ * Copyright (c) 2007 The FreeBSD Foundation
*
* This code is derived from software contributed to Berkeley by
* the University of Utah, and William Jolitz.
*
+ * Portions of this software were developed by A. Joseph Koshy under
+ * sponsorship from the FreeBSD Foundation and Google, Inc.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -40,6 +44,7 @@
#include <sys/cdefs.h>
__FBSDID("$FreeBSD$");
+#include "opt_hwpmc_hooks.h"
#include "opt_ktrace.h"
#include "opt_mac.h"
#ifdef __i386__
@@ -52,6 +57,7 @@ __FBSDID("$FreeBSD$");
#include <sys/kernel.h>
#include <sys/lock.h>
#include <sys/mutex.h>
+#include <sys/pmckern.h>
#include <sys/proc.h>
#include <sys/ktr.h>
#include <sys/resourcevar.h>
@@ -201,6 +207,13 @@ ast(struct trapframe *framep)
td->td_profil_ticks = 0;
td->td_pflags &= ~TDP_OWEUPC;
}
+#if defined(HWPMC_HOOKS)
+ if (td->td_pflags & TDP_CALLCHAIN) {
+ PMC_CALL_HOOK_UNLOCKED(td, PMC_FN_USER_CALLCHAIN,
+ (void *) framep);
+ td->td_pflags &= ~TDP_CALLCHAIN;
+ }
+#endif
if (flags & TDF_ALRMPEND) {
PROC_LOCK(p);
psignal(p, SIGVTALRM);
diff --git a/sys/sys/pmc.h b/sys/sys/pmc.h
index 1656a92..ff34586 100644
--- a/sys/sys/pmc.h
+++ b/sys/sys/pmc.h
@@ -541,11 +541,14 @@ struct pmc_op_getmsr {
#include <sys/malloc.h>
#include <sys/sysctl.h>
+#include <machine/frame.h>
+
#define PMC_HASH_SIZE 16
#define PMC_MTXPOOL_SIZE 32
#define PMC_LOG_BUFFER_SIZE 4
#define PMC_NLOGBUFFERS 16
#define PMC_NSAMPLES 32
+#define PMC_CALLCHAIN_DEPTH 8
#define PMC_SYSCTL_NAME_PREFIX "kern." PMC_MODULE_NAME "."
@@ -652,7 +655,7 @@ struct pmc {
pmc_value_t pm_initial; /* counting PMC modes */
} pm_sc;
- uint32_t pm_stalled; /* true for stalled sampling PMCs */
+ uint32_t pm_stalled; /* marks stalled sampling PMCs */
uint32_t pm_caps; /* PMC capabilities */
enum pmc_event pm_event; /* event being measured */
uint32_t pm_flags; /* additional flags PMC_F_... */
@@ -680,6 +683,7 @@ struct pmc {
#define PMC_TO_ROWINDEX(P) PMC_ID_TO_ROWINDEX((P)->pm_id)
#define PMC_TO_CPU(P) PMC_ID_TO_CPU((P)->pm_id)
+
/*
* struct pmc_process
*
@@ -743,6 +747,7 @@ struct pmc_owner {
#define PMC_PO_OWNS_LOGFILE 0x00000001 /* has a log file */
#define PMC_PO_IN_FLUSH 0x00000010 /* in the middle of a flush */
+#define PMC_PO_INITIAL_MAPPINGS_DONE 0x00000020
/*
* struct pmc_hw -- describe the state of the PMC hardware
@@ -793,15 +798,21 @@ struct pmc_hw {
*/
struct pmc_sample {
- uintfptr_t ps_pc; /* PC value at interrupt */
- struct pmc *ps_pmc; /* interrupting PMC */
- int ps_usermode; /* true for user mode PCs */
+ uint16_t ps_nsamples; /* callchain depth */
+ uint8_t ps_cpu; /* cpu number */
+ uint8_t ps_flags; /* other flags */
pid_t ps_pid; /* process PID or -1 */
+ struct pmc *ps_pmc; /* interrupting PMC */
+ uintptr_t *ps_pc; /* (const) callchain start */
};
+#define PMC_SAMPLE_FREE ((uint16_t) 0)
+#define PMC_SAMPLE_INUSE ((uint16_t) 0xFFFF)
+
struct pmc_samplebuffer {
struct pmc_sample * volatile ps_read; /* read pointer */
struct pmc_sample * volatile ps_write; /* write pointer */
+ uintptr_t *ps_callchains; /* all saved call chains */
struct pmc_sample *ps_fence; /* one beyond ps_samples[] */
struct pmc_sample ps_samples[]; /* array of sample entries */
};
@@ -881,7 +892,7 @@ struct pmc_mdep {
int (*pmd_stop_pmc)(int _cpu, int _ri);
/* handle a PMC interrupt */
- int (*pmd_intr)(int _cpu, uintptr_t _pc, int _usermode);
+ int (*pmd_intr)(int _cpu, struct trapframe *_tf);
int (*pmd_describe)(int _cpu, int _ri, struct pmc_info *_pi,
struct pmc **_ppmc);
@@ -1002,8 +1013,11 @@ MALLOC_DECLARE(M_PMC);
struct pmc_mdep *pmc_md_initialize(void); /* MD init function */
int pmc_getrowdisp(int _ri);
-int pmc_process_interrupt(int _cpu, struct pmc *_pm, uintfptr_t _pc,
- int _usermode);
-
+int pmc_process_interrupt(int _cpu, struct pmc *_pm,
+ struct trapframe *_tf, int _inuserspace);
+int pmc_save_kernel_callchain(uintptr_t *_cc, int _maxsamples,
+ struct trapframe *_tf);
+int pmc_save_user_callchain(uintptr_t *_cc, int _maxsamples,
+ struct trapframe *_tf);
#endif /* _KERNEL */
#endif /* _SYS_PMC_H_ */
diff --git a/sys/sys/pmckern.h b/sys/sys/pmckern.h
index 5ae3101..50911b2 100644
--- a/sys/sys/pmckern.h
+++ b/sys/sys/pmckern.h
@@ -1,7 +1,11 @@
/*-
- * Copyright (c) 2003-2006, Joseph Koshy
+ * Copyright (c) 2003-2007, Joseph Koshy
+ * Copyright (c) 2007 The FreeBSD Foundation
* All rights reserved.
*
+ * Portions of this software were developed by A. Joseph Koshy under
+ * sponsorship from the FreeBSD Foundation and Google, Inc.
+ *
* Redistribution and use in source and binary forms, with or without
* modification, are permitted provided that the following conditions
* are met:
@@ -47,6 +51,7 @@
#define PMC_FN_KLD_UNLOAD 6
#define PMC_FN_MMAP 7
#define PMC_FN_MUNMAP 8
+#define PMC_FN_USER_CALLCHAIN 9
struct pmckern_procexec {
int pm_credentialschanged;
@@ -65,7 +70,7 @@ struct pmckern_map_out {
/* hook */
extern int (*pmc_hook)(struct thread *_td, int _function, void *_arg);
-extern int (*pmc_intr)(int _cpu, uintptr_t _pc, int _usermode);
+extern int (*pmc_intr)(int _cpu, struct trapframe *_frame);
/* SX lock protecting the hook */
extern struct sx pmc_sx;
diff --git a/sys/sys/pmclog.h b/sys/sys/pmclog.h
index 80ae567..6878a59 100644
--- a/sys/sys/pmclog.h
+++ b/sys/sys/pmclog.h
@@ -243,13 +243,13 @@ int pmclog_configure_log(struct pmc_owner *_po, int _logfd);
int pmclog_deconfigure_log(struct pmc_owner *_po);
int pmclog_flush(struct pmc_owner *_po);
void pmclog_initialize(void);
+void pmclog_process_callchain(struct pmc *_pm, struct pmc_sample *_ps);
void pmclog_process_closelog(struct pmc_owner *po);
void pmclog_process_dropnotify(struct pmc_owner *po);
void pmclog_process_map_in(struct pmc_owner *po, pid_t pid,
uintfptr_t start, const char *path);
void pmclog_process_map_out(struct pmc_owner *po, pid_t pid,
uintfptr_t start, uintfptr_t end);
-void pmclog_process_pcsample(struct pmc *_pm, struct pmc_sample *_ps);
void pmclog_process_pmcallocate(struct pmc *_pm);
void pmclog_process_pmcattach(struct pmc *_pm, pid_t _pid, char *_path);
void pmclog_process_pmcdetach(struct pmc *_pm, pid_t _pid);
OpenPOWER on IntegriCloud