diff options
author | jkoshy <jkoshy@FreeBSD.org> | 2007-12-07 08:20:17 +0000 |
---|---|---|
committer | jkoshy <jkoshy@FreeBSD.org> | 2007-12-07 08:20:17 +0000 |
commit | 72c27d71d82569aec187c30f6ff208631abc02f4 (patch) | |
tree | fa5327bc9f466a5a1b4e68e43a72d22b1b35f5e5 /sys/dev/hwpmc/hwpmc_mod.c | |
parent | 12b5f9c8c99a01b1d40e88aaa1a58ce757e68d5e (diff) | |
download | FreeBSD-src-72c27d71d82569aec187c30f6ff208631abc02f4.zip FreeBSD-src-72c27d71d82569aec187c30f6ff208631abc02f4.tar.gz |
Kernel and hwpmc(4) support for callchain capture.
Sponsored by: FreeBSD Foundation and Google Inc.
Diffstat (limited to 'sys/dev/hwpmc/hwpmc_mod.c')
-rw-r--r-- | sys/dev/hwpmc/hwpmc_mod.c | 347 |
1 files changed, 284 insertions, 63 deletions
diff --git a/sys/dev/hwpmc/hwpmc_mod.c b/sys/dev/hwpmc/hwpmc_mod.c index 6c0e0ea..0bba092 100644 --- a/sys/dev/hwpmc/hwpmc_mod.c +++ b/sys/dev/hwpmc/hwpmc_mod.c @@ -1,7 +1,11 @@ /*- - * Copyright (c) 2003-2006 Joseph Koshy + * Copyright (c) 2003-2007 Joseph Koshy + * Copyright (c) 2007 The FreeBSD Foundation * All rights reserved. * + * Portions of this software were developed by A. Joseph Koshy under + * sponsorship from the FreeBSD Foundation and Google, Inc. + * * Redistribution and use in source and binary forms, with or without * modification, are permitted provided that the following conditions * are met: @@ -166,6 +170,7 @@ static int pmc_attach_one_process(struct proc *p, struct pmc *pm); static int pmc_can_allocate_rowindex(struct proc *p, unsigned int ri, int cpu); static int pmc_can_attach(struct pmc *pm, struct proc *p); +static void pmc_capture_user_callchain(int cpu, struct trapframe *tf); static void pmc_cleanup(void); static int pmc_detach_process(struct proc *p, struct pmc *pm); static int pmc_detach_one_process(struct proc *p, struct pmc *pm, @@ -180,6 +185,9 @@ static struct pmc_process *pmc_find_process_descriptor(struct proc *p, static void pmc_force_context_switch(void); static void pmc_link_target_process(struct pmc *pm, struct pmc_process *pp); +static void pmc_log_all_process_mappings(struct pmc_owner *po); +static void pmc_log_kernel_mappings(struct pmc *pm); +static void pmc_log_process_mappings(struct pmc_owner *po, struct proc *p); static void pmc_maybe_remove_owner(struct pmc_owner *po); static void pmc_process_csw_in(struct thread *td); static void pmc_process_csw_out(struct thread *td); @@ -205,6 +213,11 @@ static void pmc_unlink_target_process(struct pmc *pmc, SYSCTL_NODE(_kern, OID_AUTO, hwpmc, CTLFLAG_RW, 0, "HWPMC parameters"); +static int pmc_callchaindepth = PMC_CALLCHAIN_DEPTH; +TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "callchaindepth", &pmc_callchaindepth); +SYSCTL_INT(_kern_hwpmc, OID_AUTO, callchaindepth, CTLFLAG_TUN|CTLFLAG_RD, + &pmc_callchaindepth, 0, "depth of call chain records"); + #ifdef DEBUG struct pmc_debugflags pmc_debugflags = PMC_DEBUG_DEFAULT_FLAGS; char pmc_debugstr[PMC_DEBUG_STRSIZE]; @@ -226,7 +239,7 @@ SYSCTL_INT(_kern_hwpmc, OID_AUTO, hashsize, CTLFLAG_TUN|CTLFLAG_RD, &pmc_hashsize, 0, "rows in hash tables"); /* - * kern.hwpmc.nsamples --- number of PC samples per CPU + * kern.hwpmc.nsamples --- number of PC samples/callchain stacks per CPU */ static int pmc_nsamples = PMC_NSAMPLES; @@ -234,6 +247,7 @@ TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "nsamples", &pmc_nsamples); SYSCTL_INT(_kern_hwpmc, OID_AUTO, nsamples, CTLFLAG_TUN|CTLFLAG_RD, &pmc_nsamples, 0, "number of PC samples per CPU"); + /* * kern.hwpmc.mtxpoolsize -- number of mutexes in the mutex pool. */ @@ -957,6 +971,8 @@ pmc_attach_one_process(struct proc *p, struct pmc *pm) pmclog_process_pmcattach(pm, p->p_pid, fullpath); if (freepath) FREE(freepath, M_TEMP); + if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) + pmc_log_process_mappings(pm->pm_owner, p); } /* mark process as using HWPMCs */ PROC_LOCK(p); @@ -1449,7 +1465,7 @@ pmc_process_kld_unload(struct pmckern_map_out *pkm) if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmclog_process_map_out(po, (pid_t) -1, pkm->pm_address, pkm->pm_address + pkm->pm_size); - + /* * TODO: Notify owners of process-sampling PMCs. */ @@ -1528,6 +1544,88 @@ pmc_process_munmap(struct thread *td, struct pmckern_map_out *pkm) } /* + * Log mapping information about the kernel. + */ + +static void +pmc_log_kernel_mappings(struct pmc *pm) +{ + struct pmc_owner *po; + struct pmckern_map_in *km, *kmbase; + + sx_assert(&pmc_sx, SX_LOCKED); + KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)), + ("[pmc,%d] non-sampling PMC (%p) desires mapping information", + __LINE__, (void *) pm)); + + po = pm->pm_owner; + + if (po->po_flags & PMC_PO_INITIAL_MAPPINGS_DONE) + return; + + /* + * Log the current set of kernel modules. + */ + kmbase = linker_hwpmc_list_objects(); + for (km = kmbase; km->pm_file != NULL; km++) { + PMCDBG(LOG,REG,1,"%s %p", (char *) km->pm_file, + (void *) km->pm_address); + pmclog_process_map_in(po, (pid_t) -1, km->pm_address, + km->pm_file); + } + FREE(kmbase, M_LINKER); + + po->po_flags |= PMC_PO_INITIAL_MAPPINGS_DONE; +} + +/* + * Log the mappings for a single process. + */ + +static void +pmc_log_process_mappings(struct pmc_owner *po, struct proc *p) +{ +} + +/* + * Log mappings for all processes in the system. + */ + +static void +pmc_log_all_process_mappings(struct pmc_owner *po) +{ + struct proc *p, *top; + + sx_assert(&pmc_sx, SX_XLOCKED); + + if ((p = pfind(1)) == NULL) + panic("[pmc,%d] Cannot find init", __LINE__); + + PROC_UNLOCK(p); + + sx_slock(&proctree_lock); + + top = p; + + for (;;) { + pmc_log_process_mappings(po, p); + if (!LIST_EMPTY(&p->p_children)) + p = LIST_FIRST(&p->p_children); + else for (;;) { + if (p == top) + goto done; + if (LIST_NEXT(p, p_sibling)) { + p = LIST_NEXT(p, p_sibling); + break; + } + p = p->p_pptr; + } + } + done: + sx_sunlock(&proctree_lock); +} + +/* * The 'hook' invoked from the kernel proper */ @@ -1543,7 +1641,8 @@ const char *pmc_hooknames[] = { "KLDLOAD", "KLDUNLOAD", "MMAP", - "MUNMAP" + "MUNMAP", + "CALLCHAIN" }; #endif @@ -1726,6 +1825,14 @@ pmc_hook_handler(struct thread *td, int function, void *arg) pmc_process_munmap(td, (struct pmckern_map_out *) arg); break; + case PMC_FN_USER_CALLCHAIN: + /* + * Record a call chain. + */ + pmc_capture_user_callchain(PCPU_GET(cpuid), + (struct trapframe *) arg); + break; + default: #ifdef DEBUG KASSERT(0, ("[pmc,%d] unknown hook %d\n", __LINE__, function)); @@ -2321,6 +2428,21 @@ pmc_start(struct pmc *pm) po = pm->pm_owner; + /* + * Disallow PMCSTART if a logfile is required but has not been + * configured yet. + */ + if ((pm->pm_flags & PMC_F_NEEDS_LOGFILE) && + (po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) + return EDOOFUS; /* programming error */ + + /* + * If this is a sampling mode PMC, log mapping information for + * the kernel modules that are currently loaded. + */ + if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) + pmc_log_kernel_mappings(pm); + if (PMC_IS_VIRTUAL_MODE(mode)) { /* @@ -2333,15 +2455,6 @@ pmc_start(struct pmc *pm) pmc_attach_process(po->po_owner, pm); /* - * Disallow PMCSTART if a logfile is required but has not - * been configured yet. - */ - - if (error == 0 && (pm->pm_flags & PMC_F_NEEDS_LOGFILE) && - (po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) - error = EDOOFUS; - - /* * If the PMC is attached to its owner, then force a context * switch to ensure that the MD state gets set correctly. */ @@ -2358,13 +2471,7 @@ pmc_start(struct pmc *pm) /* * A system-wide PMC. - */ - - if ((pm->pm_flags & PMC_F_NEEDS_LOGFILE) && - (po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) - return EDOOFUS; /* programming error */ - - /* + * * Add the owner to the global list if this is a system-wide * sampling PMC. */ @@ -2378,7 +2485,8 @@ pmc_start(struct pmc *pm) po->po_sscount++; } - /* TODO: dump system wide process mappings to the log? */ + /* Log mapping information for all processes in the system. */ + pmc_log_all_process_mappings(po); /* * Move to the CPU associated with this @@ -2554,7 +2662,6 @@ pmc_syscall_handler(struct thread *td, void *syscall_args) struct proc *p; struct pmc *pm; struct pmc_owner *po; - struct pmckern_map_in *km, *kmbase; struct pmc_op_configurelog cl; sx_assert(&pmc_sx, SX_XLOCKED); @@ -2593,18 +2700,6 @@ pmc_syscall_handler(struct thread *td, void *syscall_args) if (error) break; - - /* - * Log the current set of kernel modules. - */ - kmbase = linker_hwpmc_list_objects(); - for (km = kmbase; km->pm_file != NULL; km++) { - PMCDBG(LOG,REG,1,"%s %p", (char *) km->pm_file, - (void *) km->pm_address); - pmclog_process_map_in(po, (pid_t) -1, km->pm_address, - km->pm_file); - } - FREE(kmbase, M_LINKER); } break; @@ -2945,7 +3040,7 @@ pmc_syscall_handler(struct thread *td, void *syscall_args) */ if ((pa.pm_flags & ~(PMC_F_DESCENDANTS | PMC_F_LOG_PROCCSW | - PMC_F_LOG_PROCEXIT)) != 0) { + PMC_F_LOG_PROCEXIT | PMC_F_CALLCHAIN)) != 0) { error = EINVAL; break; } @@ -3633,56 +3728,117 @@ pmc_syscall_handler(struct thread *td, void *syscall_args) /* + * Mark the thread as needing callchain capture and post an AST. The + * actual callchain capture will be done in a context where it is safe + * to take page faults. + */ + +static void +pmc_post_callchain_ast(void) +{ + struct thread *td; + + td = curthread; + + /* + * Mark this thread as needing processing in ast(). + * td->td_pflags will be safe to touch as the process was in + * user space when it was interrupted. + */ + td->td_pflags |= TDP_CALLCHAIN; + + /* + * Again, since we've entered this function directly from + * userland, `td' is guaranteed to be not locked by this CPU, + * so its safe to try acquire the thread lock even though we + * are executing in an NMI context. We need to acquire this + * lock before touching `td_flags' because other CPUs may be + * in the process of touching this field. + */ + thread_lock(td); + td->td_flags |= TDF_ASTPENDING; + thread_unlock(td); + + return; +} + +/* * Interrupt processing. * - * Find a free slot in the per-cpu array of PC samples and write the - * current (PMC,PID,PC) triple to it. If an event was successfully - * added, a bit is set in mask 'pmc_cpumask' denoting that the - * DO_SAMPLES hook needs to be invoked from the clock handler. + * Find a free slot in the per-cpu array of samples and capture the + * current callchain there. If a sample was successfully added, a bit + * is set in mask 'pmc_cpumask' denoting that the DO_SAMPLES hook + * needs to be invoked from the clock handler. * * This function is meant to be called from an NMI handler. It cannot * use any of the locking primitives supplied by the OS. */ int -pmc_process_interrupt(int cpu, struct pmc *pm, uintfptr_t pc, int usermode) +pmc_process_interrupt(int cpu, struct pmc *pm, struct trapframe *tf, + int inuserspace) { - int error, ri; + int error, callchaindepth; struct thread *td; struct pmc_sample *ps; struct pmc_samplebuffer *psb; error = 0; - ri = PMC_TO_ROWINDEX(pm); + /* + * Allocate space for a sample buffer. + */ psb = pmc_pcpu[cpu]->pc_sb; ps = psb->ps_write; - if (ps->ps_pc) { /* in use, reader hasn't caught up */ + if (ps->ps_nsamples) { /* in use, reader hasn't caught up */ pm->pm_stalled = 1; atomic_add_int(&pmc_stats.pm_intr_bufferfull, 1); - PMCDBG(SAM,INT,1,"(spc) cpu=%d pm=%p pc=%jx um=%d wr=%d rd=%d", - cpu, pm, (uint64_t) pc, usermode, + PMCDBG(SAM,INT,1,"(spc) cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d", + cpu, pm, (void *) tf, inuserspace, (int) (psb->ps_write - psb->ps_samples), (int) (psb->ps_read - psb->ps_samples)); error = ENOMEM; goto done; } - /* fill in entry */ - PMCDBG(SAM,INT,1,"cpu=%d pm=%p pc=%jx um=%d wr=%d rd=%d", cpu, pm, - (uint64_t) pc, usermode, + + /* Fill in entry. */ + PMCDBG(SAM,INT,1,"cpu=%d pm=%p tf=%p um=%d wr=%d rd=%d", cpu, pm, + (void *) tf, inuserspace, (int) (psb->ps_write - psb->ps_samples), (int) (psb->ps_read - psb->ps_samples)); - atomic_add_rel_32(&pm->pm_runcount, 1); /* hold onto PMC */ + atomic_add_rel_32(&pm->pm_runcount, 1); /* hold onto PMC */ ps->ps_pmc = pm; if ((td = curthread) && td->td_proc) ps->ps_pid = td->td_proc->p_pid; else ps->ps_pid = -1; - ps->ps_usermode = usermode; - ps->ps_pc = pc; /* mark entry as in use */ + ps->ps_cpu = cpu; + ps->ps_flags = inuserspace ? PMC_CC_F_USERSPACE : 0; + + callchaindepth = (pm->pm_flags & PMC_F_CALLCHAIN) ? + pmc_callchaindepth : 1; + + if (callchaindepth == 1) + ps->ps_pc[0] = PMC_TRAPFRAME_TO_PC(tf); + else { + /* + * Kernel stack traversals can be done immediately, + * while we defer to an AST for user space traversals. + */ + if (!inuserspace) + callchaindepth = + pmc_save_kernel_callchain(ps->ps_pc, + callchaindepth, tf); + else { + pmc_post_callchain_ast(); + callchaindepth = PMC_SAMPLE_INUSE; + } + } + + ps->ps_nsamples = callchaindepth; /* mark entry as in use */ /* increment write pointer, modulo ring buffer size */ ps++; @@ -3695,7 +3851,50 @@ pmc_process_interrupt(int cpu, struct pmc *pm, uintfptr_t pc, int usermode) /* mark CPU as needing processing */ atomic_set_rel_int(&pmc_cpumask, (1 << cpu)); - return error; + return (error); +} + +/* + * Capture a user call chain. This function will be called from ast() + * before control returns to userland and before the process gets + * rescheduled. + */ + +static void +pmc_capture_user_callchain(int cpu, struct trapframe *tf) +{ + int i; + struct pmc *pm; + struct pmc_sample *ps; + struct pmc_samplebuffer *psb; + + psb = pmc_pcpu[cpu]->pc_sb; + + /* + * Iterate through all deferred callchain requests. + */ + + for (i = 0; i < pmc_nsamples; i++) { + + ps = &psb->ps_samples[i]; + if (ps->ps_nsamples != PMC_SAMPLE_INUSE) + continue; + + pm = ps->ps_pmc; + + KASSERT(pm->pm_flags & PMC_F_CALLCHAIN, + ("[pmc,%d] Retrieving callchain for PMC that doesn't " + "want it", __LINE__)); + + /* + * Retrieve the callchain and mark the sample buffer + * as 'processable' by the timer tick sweep code. + */ + ps->ps_nsamples = pmc_save_user_callchain(ps->ps_pc, + pmc_callchaindepth, tf); + } + + return; } @@ -3722,8 +3921,13 @@ pmc_process_samples(int cpu) for (n = 0; n < pmc_nsamples; n++) { /* bound on #iterations */ ps = psb->ps_read; - if (ps->ps_pc == (uintfptr_t) 0) /* no data */ + if (ps->ps_nsamples == PMC_SAMPLE_FREE) + break; + if (ps->ps_nsamples == PMC_SAMPLE_INUSE) { + /* Need a rescan at a later time. */ + atomic_set_rel_int(&pmc_cpumask, (1 << cpu)); break; + } pm = ps->ps_pmc; po = pm->pm_owner; @@ -3736,8 +3940,8 @@ pmc_process_samples(int cpu) if (pm->pm_state != PMC_STATE_RUNNING) goto entrydone; - PMCDBG(SAM,OPS,1,"cpu=%d pm=%p pc=%jx um=%d wr=%d rd=%d", cpu, - pm, (uint64_t) ps->ps_pc, ps->ps_usermode, + PMCDBG(SAM,OPS,1,"cpu=%d pm=%p n=%d fl=%x wr=%d rd=%d", cpu, + pm, ps->ps_nsamples, ps->ps_flags, (int) (psb->ps_write - psb->ps_samples), (int) (psb->ps_read - psb->ps_samples)); @@ -3748,9 +3952,9 @@ pmc_process_samples(int cpu) * would have done. */ if (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) { - if (ps->ps_usermode) { + if (ps->ps_flags & PMC_CC_F_USERSPACE) { td = FIRST_THREAD_IN_PROC(po->po_owner); - addupc_intr(td, ps->ps_pc, 1); + addupc_intr(td, ps->ps_pc[0], 1); } goto entrydone; } @@ -3762,10 +3966,10 @@ pmc_process_samples(int cpu) * entry to the PMC's owner process. */ - pmclog_process_pcsample(pm, ps); + pmclog_process_callchain(pm, ps); entrydone: - ps->ps_pc = (uintfptr_t) 0; /* mark entry as free */ + ps->ps_nsamples = 0; /* mark entry as free */ atomic_subtract_rel_32(&pm->pm_runcount, 1); /* increment read pointer, modulo sample size */ @@ -4087,6 +4291,7 @@ pmc_initialize(void) { int cpu, error, n; struct pmc_binding pb; + struct pmc_sample *ps; struct pmc_samplebuffer *sb; md = NULL; @@ -4119,17 +4324,24 @@ pmc_initialize(void) */ if (pmc_hashsize <= 0) { - (void) printf("hwpmc: tunable hashsize=%d must be greater " - "than zero.\n", pmc_hashsize); + (void) printf("hwpmc: tunable \"hashsize\"=%d must be " + "greater than zero.\n", pmc_hashsize); pmc_hashsize = PMC_HASH_SIZE; } if (pmc_nsamples <= 0 || pmc_nsamples > 65535) { - (void) printf("hwpmc: tunable nsamples=%d out of range.\n", - pmc_nsamples); + (void) printf("hwpmc: tunable \"nsamples\"=%d out of " + "range.\n", pmc_nsamples); pmc_nsamples = PMC_NSAMPLES; } + if (pmc_callchaindepth <= 0 || + pmc_callchaindepth > PMC_CALLCHAIN_DEPTH_MAX) { + (void) printf("hwpmc: tunable \"callchaindepth\"=%d out of " + "range.\n", pmc_callchaindepth); + pmc_callchaindepth = PMC_CALLCHAIN_DEPTH; + } + md = pmc_md_initialize(); if (md == NULL || md->pmd_init == NULL) @@ -4171,6 +4383,14 @@ pmc_initialize(void) KASSERT(pmc_pcpu[cpu] != NULL, ("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu)); + MALLOC(sb->ps_callchains, uintptr_t *, + pmc_callchaindepth * pmc_nsamples * sizeof(uintptr_t), + M_PMC, M_WAITOK|M_ZERO); + + for (n = 0, ps = sb->ps_samples; n < pmc_nsamples; n++, ps++) + ps->ps_pc = sb->ps_callchains + + (n * pmc_callchaindepth); + pmc_pcpu[cpu]->pc_sb = sb; } @@ -4327,6 +4547,7 @@ pmc_cleanup(void) KASSERT(pmc_pcpu[cpu]->pc_sb != NULL, ("[pmc,%d] Null cpu sample buffer cpu=%d", __LINE__, cpu)); + FREE(pmc_pcpu[cpu]->pc_sb->ps_callchains, M_PMC); FREE(pmc_pcpu[cpu]->pc_sb, M_PMC); pmc_pcpu[cpu]->pc_sb = NULL; } |