diff options
Diffstat (limited to 'sys/dev/hwpmc')
-rw-r--r-- | sys/dev/hwpmc/hwpmc_amd.c | 42 | ||||
-rw-r--r-- | sys/dev/hwpmc/hwpmc_intel.c | 4 | ||||
-rw-r--r-- | sys/dev/hwpmc/hwpmc_mod.c | 398 | ||||
-rw-r--r-- | sys/dev/hwpmc/hwpmc_piv.c | 437 | ||||
-rw-r--r-- | sys/dev/hwpmc/hwpmc_ppro.c | 40 |
5 files changed, 625 insertions, 296 deletions
diff --git a/sys/dev/hwpmc/hwpmc_amd.c b/sys/dev/hwpmc/hwpmc_amd.c index 158dc0b..cd3db04 100644 --- a/sys/dev/hwpmc/hwpmc_amd.c +++ b/sys/dev/hwpmc/hwpmc_amd.c @@ -360,7 +360,7 @@ amd_read_pmc(int cpu, int ri, pmc_value_t *v) ("[amd,%d] No owner for HWPMC [cpu%d,pmc%d]", __LINE__, cpu, ri)); - mode = pm->pm_mode; + mode = PMC_TO_MODE(pm); PMCDBG(MDP,REA,1,"amd-read id=%d class=%d", ri, pd->pm_descr.pd_class); @@ -413,7 +413,7 @@ amd_write_pmc(int cpu, int ri, pmc_value_t v) ("[amd,%d] PMC not owned (cpu%d,pmc%d)", __LINE__, cpu, ri)); - mode = pm->pm_mode; + mode = PMC_TO_MODE(pm); if (pd->pm_descr.pd_class == PMC_CLASS_TSC) return 0; @@ -461,6 +461,18 @@ amd_config_pmc(int cpu, int ri, struct pmc *pm) } /* + * Retrieve a configured PMC pointer from hardware state. + */ + +static int +amd_get_config(int cpu, int ri, struct pmc **ppm) +{ + *ppm = pmc_pcpu[cpu]->pc_hwpmcs[ri]->phw_pmc; + + return 0; +} + +/* * Machine dependent actions taken during the context switch in of a * thread. */ @@ -471,10 +483,10 @@ amd_switch_in(struct pmc_cpu *pc, struct pmc_process *pp) (void) pc; PMCDBG(MDP,SWI,1, "pc=%p pp=%p enable-msr=%d", pc, pp, - (pp->pp_flags & PMC_FLAG_ENABLE_MSR_ACCESS) != 0); + (pp->pp_flags & PMC_PP_ENABLE_MSR_ACCESS) != 0); /* enable the RDPMC instruction if needed */ - if (pp->pp_flags & PMC_FLAG_ENABLE_MSR_ACCESS) + if (pp->pp_flags & PMC_PP_ENABLE_MSR_ACCESS) load_cr4(rcr4() | CR4_PCE); return 0; @@ -492,7 +504,7 @@ amd_switch_out(struct pmc_cpu *pc, struct pmc_process *pp) (void) pp; /* can be NULL */ PMCDBG(MDP,SWO,1, "pc=%p pp=%p enable-msr=%d", pc, pp, pp ? - (pp->pp_flags & PMC_FLAG_ENABLE_MSR_ACCESS) == 1 : 0); + (pp->pp_flags & PMC_PP_ENABLE_MSR_ACCESS) == 1 : 0); /* always turn off the RDPMC instruction */ load_cr4(rcr4() & ~CR4_PCE); @@ -523,7 +535,7 @@ amd_allocate_pmc(int cpu, int ri, struct pmc *pm, pd = &amd_pmcdesc[ri].pm_descr; /* check class match */ - if (pd->pd_class != pm->pm_class) + if (pd->pd_class != a->pm_class) return EINVAL; caps = pm->pm_caps; @@ -765,7 +777,7 @@ amd_intr(int cpu, uintptr_t eip) continue; } - mode = pm->pm_mode; + mode = PMC_TO_MODE(pm); if (PMC_IS_SAMPLING_MODE(mode) && AMD_PMC_HAS_OVERFLOWED(perfctr)) { atomic_add_int(&pmc_stats.pm_intr_processed, 1); @@ -803,8 +815,6 @@ amd_describe(int cpu, int ri, struct pmc_info *pi, struct pmc **ppmc) return error; pi->pm_class = pd->pm_descr.pd_class; - pi->pm_caps = pd->pm_descr.pd_caps; - pi->pm_width = pd->pm_descr.pd_width; if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) { pi->pm_enabled = TRUE; @@ -982,8 +992,17 @@ pmc_amd_initialize(void) /* this processor has two classes of usable PMCs */ pmc_mdep->pmd_nclass = 2; - pmc_mdep->pmd_classes[0] = PMC_CLASS_TSC; - pmc_mdep->pmd_classes[1] = AMD_PMC_CLASS; + + /* TSC */ + pmc_mdep->pmd_classes[0].pm_class = PMC_CLASS_TSC; + pmc_mdep->pmd_classes[0].pm_caps = PMC_CAP_READ; + pmc_mdep->pmd_classes[0].pm_width = 64; + + /* AMD K7/K8 PMCs */ + pmc_mdep->pmd_classes[1].pm_class = AMD_PMC_CLASS; + pmc_mdep->pmd_classes[1].pm_caps = AMD_PMC_CAPS; + pmc_mdep->pmd_classes[1].pm_width = 48; + pmc_mdep->pmd_nclasspmcs[0] = 1; pmc_mdep->pmd_nclasspmcs[1] = (AMD_NPMCS-1); @@ -994,6 +1013,7 @@ pmc_amd_initialize(void) pmc_mdep->pmd_read_pmc = amd_read_pmc; pmc_mdep->pmd_write_pmc = amd_write_pmc; pmc_mdep->pmd_config_pmc = amd_config_pmc; + pmc_mdep->pmd_get_config = amd_get_config; pmc_mdep->pmd_allocate_pmc = amd_allocate_pmc; pmc_mdep->pmd_release_pmc = amd_release_pmc; pmc_mdep->pmd_start_pmc = amd_start_pmc; diff --git a/sys/dev/hwpmc/hwpmc_intel.c b/sys/dev/hwpmc/hwpmc_intel.c index 3f6f330..ea2c6c8 100644 --- a/sys/dev/hwpmc/hwpmc_intel.c +++ b/sys/dev/hwpmc/hwpmc_intel.c @@ -92,7 +92,9 @@ pmc_intel_initialize(void) pmc_mdep->pmd_cputype = cputype; pmc_mdep->pmd_nclass = 2; - pmc_mdep->pmd_classes[0] = PMC_CLASS_TSC; + pmc_mdep->pmd_classes[0].pm_class = PMC_CLASS_TSC; + pmc_mdep->pmd_classes[0].pm_caps = PMC_CAP_READ; + pmc_mdep->pmd_classes[0].pm_width = 64; pmc_mdep->pmd_nclasspmcs[0] = 1; error = 0; diff --git a/sys/dev/hwpmc/hwpmc_mod.c b/sys/dev/hwpmc/hwpmc_mod.c index 00cf9fa..36e4761 100644 --- a/sys/dev/hwpmc/hwpmc_mod.c +++ b/sys/dev/hwpmc/hwpmc_mod.c @@ -151,12 +151,14 @@ static struct pmc *pmc_allocate_pmc_descriptor(void); static struct pmc *pmc_find_pmc_descriptor_in_process(struct pmc_owner *po, pmc_id_t pmc); static void pmc_release_pmc_descriptor(struct pmc *pmc); -static int pmc_can_allocate_rowindex(struct proc *p, unsigned int ri); +static int pmc_can_allocate_rowindex(struct proc *p, unsigned int ri, + int cpu); static struct pmc_process *pmc_find_process_descriptor(struct proc *p, uint32_t mode); static void pmc_remove_process_descriptor(struct pmc_process *pp); static struct pmc_owner *pmc_find_owner_descriptor(struct proc *p); static int pmc_find_pmc(pmc_id_t pmcid, struct pmc **pm); +static void pmc_force_context_switch(void); static void pmc_remove_owner(struct pmc_owner *po); static void pmc_maybe_remove_owner(struct pmc_owner *po); static void pmc_unlink_target_process(struct pmc *pmc, @@ -364,6 +366,7 @@ pmc_debugflags_parse(char *newstr, char *fence) CMP_SET_FLAG_MIN("cfg", CFG); CMP_SET_FLAG_MIN("sta", STA); CMP_SET_FLAG_MIN("sto", STO); + CMP_SET_FLAG_MIN("int", INT); CMP_SET_FLAG_MIN("bnd", BND); CMP_SET_FLAG_MIN("sel", SEL); else /* unrecognized keyword */ @@ -573,6 +576,27 @@ pmc_select_cpu(int cpu) } /* + * Force a context switch. + * + * We do this by tsleep'ing for 1 tick -- invoking mi_switch() is not + * guaranteed to force a context switch. + */ + +static void +pmc_force_context_switch(void) +{ + u_char curpri; + + mtx_lock_spin(&sched_lock); + curpri = curthread->td_priority; + mtx_unlock_spin(&sched_lock); + + (void) tsleep((void *) pmc_force_context_switch, curpri, + "pmcctx", 1); + +} + +/* * Update the per-pmc histogram */ @@ -671,7 +695,7 @@ pmc_remove_owner(struct pmc_owner *po) * XXX rework needed. */ - if (po->po_flags & PMC_FLAG_OWNS_LOGFILE) + if (po->po_flags & PMC_PO_OWNS_LOGFILE) pmc_configure_log(po, -1); } @@ -693,7 +717,7 @@ pmc_maybe_remove_owner(struct pmc_owner *po) */ if (LIST_EMPTY(&po->po_pmcs) && - ((po->po_flags & PMC_FLAG_OWNS_LOGFILE) == 0)) { + ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0)) { pmc_remove_owner(po); FREE(po, M_PMC); } @@ -718,7 +742,7 @@ pmc_link_target_process(struct pmc *pm, struct pmc_process *pp) ("[pmc,%d] Illegal reference count %d for process record %p", __LINE__, pp->pp_refcnt, (void *) pp)); - ri = pm->pm_rowindex; + ri = PMC_TO_ROWINDEX(pm); PMCDBG(PRC,TLK,1, "link-target pmc=%p ri=%d pmc-process=%p", pm, ri, pp); @@ -740,12 +764,10 @@ pmc_link_target_process(struct pmc *pm, struct pmc_process *pp) atomic_store_rel_ptr(&pp->pp_pmcs[ri].pp_pmc, pm); if (pm->pm_owner->po_owner == pp->pp_proc) - pp->pp_flags |= PMC_FLAG_ENABLE_MSR_ACCESS; + pm->pm_flags |= PMC_F_ATTACHED_TO_OWNER; pp->pp_refcnt++; - PMCDBG(PRC,TLK,2, "enable-msr %d", - (pp->pp_flags & PMC_FLAG_ENABLE_MSR_ACCESS) != 0); } /* @@ -767,7 +789,7 @@ pmc_unlink_target_process(struct pmc *pm, struct pmc_process *pp) ("[pmc,%d] Illegal ref count %d on process record %p", __LINE__, pp->pp_refcnt, (void *) pp)); - ri = pm->pm_rowindex; + ri = PMC_TO_ROWINDEX(pm); PMCDBG(PRC,TUL,1, "unlink-target pmc=%p ri=%d pmc-process=%p", pm, ri, pp); @@ -779,8 +801,11 @@ pmc_unlink_target_process(struct pmc *pm, struct pmc_process *pp) pp->pp_pmcs[ri].pp_pmc = NULL; pp->pp_pmcs[ri].pp_pmcval = (pmc_value_t) 0; - if (pm->pm_owner->po_owner == pp->pp_proc) - pp->pp_flags &= ~PMC_FLAG_ENABLE_MSR_ACCESS; + /* Remove owner-specific flags */ + if (pm->pm_owner->po_owner == pp->pp_proc) { + pp->pp_flags &= ~PMC_PP_ENABLE_MSR_ACCESS; + pm->pm_flags &= ~PMC_F_ATTACHED_TO_OWNER; + } pp->pp_refcnt--; @@ -792,9 +817,6 @@ pmc_unlink_target_process(struct pmc *pm, struct pmc_process *pp) KASSERT(ptgt != NULL, ("[pmc,%d] process %p (pp: %p) not found " "in pmc %p", __LINE__, pp->pp_proc, pp, pm)); - PMCDBG(PRC,TUL,4, "unlink ptgt=%p, enable-msr=%d", ptgt, - (pp->pp_flags & PMC_FLAG_ENABLE_MSR_ACCESS) != 0); - LIST_REMOVE(ptgt, pt_next); FREE(ptgt, M_PMC); } @@ -897,7 +919,7 @@ pmc_attach_one_process(struct proc *p, struct pmc *pm) sx_assert(&pmc_sx, SX_XLOCKED); PMCDBG(PRC,ATT,2, "attach-one pm=%p ri=%d proc=%p (%d, %s)", pm, - pm->pm_rowindex, p, p->p_pid, p->p_comm); + PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm); /* * Locate the process descriptor corresponding to process 'p', @@ -910,7 +932,7 @@ pmc_attach_one_process(struct proc *p, struct pmc *pm) * process descriptor and PMC. */ - ri = pm->pm_rowindex; + ri = PMC_TO_ROWINDEX(pm); if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_ALLOCATE)) == NULL) return ENOMEM; @@ -944,7 +966,16 @@ pmc_attach_process(struct proc *p, struct pmc *pm) sx_assert(&pmc_sx, SX_XLOCKED); PMCDBG(PRC,ATT,1, "attach pm=%p ri=%d proc=%p (%d, %s)", pm, - pm->pm_rowindex, p, p->p_pid, p->p_comm); + PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm); + + + /* + * If this PMC successfully allowed a GETMSR operation + * in the past, disallow further ATTACHes. + */ + + if ((pm->pm_flags & PMC_PP_ENABLE_MSR_ACCESS) != 0) + return EPERM; if ((pm->pm_flags & PMC_F_DESCENDANTS) == 0) return pmc_attach_one_process(p, pm); @@ -999,10 +1030,10 @@ pmc_detach_one_process(struct proc *p, struct pmc *pm, int flags) KASSERT(pm != NULL, ("[pmc,%d] null pm pointer", __LINE__)); - PMCDBG(PRC,ATT,2, "detach-one pm=%p ri=%d proc=%p (%d, %s) flags=0x%x", - pm, pm->pm_rowindex, p, p->p_pid, p->p_comm, flags); + ri = PMC_TO_ROWINDEX(pm); - ri = pm->pm_rowindex; + PMCDBG(PRC,ATT,2, "detach-one pm=%p ri=%d proc=%p (%d, %s) flags=0x%x", + pm, ri, p, p->p_pid, p->p_comm, flags); if ((pp = pmc_find_process_descriptor(p, 0)) == NULL) return ESRCH; @@ -1049,7 +1080,7 @@ pmc_detach_process(struct proc *p, struct pmc *pm) sx_assert(&pmc_sx, SX_XLOCKED); PMCDBG(PRC,ATT,1, "detach pm=%p ri=%d proc=%p (%d, %s)", pm, - pm->pm_rowindex, p, p->p_pid, p->p_comm); + PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm); if ((pm->pm_flags & PMC_F_DESCENDANTS) == 0) return pmc_detach_one_process(p, pm, PMC_FLAG_REMOVE); @@ -1131,7 +1162,6 @@ pmc_hook_handler(struct thread *td, int function, void *arg) int cpu; unsigned int ri; struct pmc *pm; - struct pmc_hw *phw; struct pmc_process *pp; struct pmc_owner *po; struct proc *p; @@ -1183,22 +1213,22 @@ pmc_hook_handler(struct thread *td, int function, void *arg) * state similar to the CSW_OUT code. */ - phw = pmc_pcpu[cpu]->pc_hwpmcs[ri]; - pm = phw->phw_pmc; + pm = NULL; + (void) (*md->pmd_get_config)(cpu, ri, &pm); PMCDBG(PRC,EXT,2, "ri=%d pm=%p", ri, pm); if (pm == NULL || - !PMC_IS_VIRTUAL_MODE(pm->pm_mode)) + !PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) continue; PMCDBG(PRC,EXT,2, "ppmcs[%d]=%p pm=%p " "state=%d", ri, pp->pp_pmcs[ri].pp_pmc, pm, pm->pm_state); - KASSERT(pm->pm_rowindex == ri, + KASSERT(PMC_TO_ROWINDEX(pm) == ri, ("[pmc,%d] ri mismatch pmc(%d) ri(%d)", - __LINE__, pm->pm_rowindex, ri)); + __LINE__, PMC_TO_ROWINDEX(pm), ri)); KASSERT(pm == pp->pp_pmcs[ri].pp_pmc, ("[pmc,%d] pm %p != pp_pmcs[%d] %p", @@ -1222,10 +1252,11 @@ pmc_hook_handler(struct thread *td, int function, void *arg) mtx_pool_unlock_spin(pmc_mtxpool, pm); } + atomic_subtract_rel_32(&pm->pm_runcount,1); + KASSERT((int) pm->pm_runcount >= 0, ("[pmc,%d] runcount is %d", __LINE__, ri)); - atomic_subtract_rel_32(&pm->pm_runcount,1); (void) md->pmd_config_pmc(cpu, ri, NULL); } @@ -1254,6 +1285,7 @@ pmc_hook_handler(struct thread *td, int function, void *arg) FREE(pp, M_PMC); + } else critical_exit(); /* pp == NULL */ @@ -1445,13 +1477,13 @@ pmc_hook_handler(struct thread *td, int function, void *arg) if ((pm = pp->pp_pmcs[ri].pp_pmc) == NULL) continue; - KASSERT(PMC_IS_VIRTUAL_MODE(pm->pm_mode), + KASSERT(PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)), ("[pmc,%d] Target PMC in non-virtual mode (%d)", - __LINE__, pm->pm_mode)); + __LINE__, PMC_TO_MODE(pm))); - KASSERT(pm->pm_rowindex == ri, + KASSERT(PMC_TO_ROWINDEX(pm) == ri, ("[pmc,%d] Row index mismatch pmc %d != ri %d", - __LINE__, pm->pm_rowindex, ri)); + __LINE__, PMC_TO_ROWINDEX(pm), ri)); /* * Only PMCs that are marked as 'RUNNING' need @@ -1510,7 +1542,6 @@ pmc_hook_handler(struct thread *td, int function, void *arg) struct pmc *pm; struct proc *p; struct pmc_cpu *pc; - struct pmc_hw *phw; struct pmc_process *pp; pmc_value_t newvalue, tmp; @@ -1560,18 +1591,18 @@ pmc_hook_handler(struct thread *td, int function, void *arg) for (ri = 0; ri < md->pmd_npmc; ri++) { - phw = pc->pc_hwpmcs[ri]; - pm = phw->phw_pmc; + pm = NULL; + (void) (*md->pmd_get_config)(cpu, ri, &pm); if (pm == NULL) /* nothing at this row index */ continue; - if (!PMC_IS_VIRTUAL_MODE(pm->pm_mode)) + if (!PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) continue; /* not a process virtual PMC */ - KASSERT(pm->pm_rowindex == ri, + KASSERT(PMC_TO_ROWINDEX(pm) == ri, ("[pmc,%d] ri mismatch pmc(%d) ri(%d)", - __LINE__, pm->pm_rowindex, ri)); + __LINE__, PMC_TO_ROWINDEX(pm), ri)); /* Stop hardware */ md->pmd_stop_pmc(cpu, ri); @@ -1838,7 +1869,7 @@ pmc_release_pmc_descriptor(struct pmc *pm) volatile int maxloop; #endif u_int ri, cpu; - u_char curpri; + enum pmc_mode mode; struct pmc_hw *phw; struct pmc_process *pp; struct pmc_target *ptgt, *tmp; @@ -1848,16 +1879,17 @@ pmc_release_pmc_descriptor(struct pmc *pm) KASSERT(pm, ("[pmc,%d] null pmc", __LINE__)); - ri = pm->pm_rowindex; + ri = PMC_TO_ROWINDEX(pm); + mode = PMC_TO_MODE(pm); PMCDBG(PMC,REL,1, "release-pmc pmc=%p ri=%d mode=%d", pm, ri, - pm->pm_mode); + mode); /* * First, we take the PMC off hardware. */ cpu = 0; - if (PMC_IS_SYSTEM_MODE(pm->pm_mode)) { + if (PMC_IS_SYSTEM_MODE(mode)) { /* * A system mode PMC runs on a specific CPU. Switch @@ -1866,7 +1898,7 @@ pmc_release_pmc_descriptor(struct pmc *pm) pmc_save_cpu_binding(&pb); - cpu = pm->pm_gv.pm_cpu; + cpu = PMC_TO_CPU(pm); if (pm->pm_state == PMC_STATE_RUNNING) { @@ -1895,7 +1927,7 @@ pmc_release_pmc_descriptor(struct pmc *pm) pmc_restore_cpu_binding(&pb); - } else if (PMC_IS_VIRTUAL_MODE(pm->pm_mode)) { + } else if (PMC_IS_VIRTUAL_MODE(mode)) { /* * A virtual PMC could be running on multiple CPUs at @@ -1924,17 +1956,11 @@ pmc_release_pmc_descriptor(struct pmc *pm) maxloop--; KASSERT(maxloop > 0, ("[pmc,%d] (ri%d, rc%d) waiting too long for " - "pmc to be free", __LINE__, pm->pm_rowindex, - pm->pm_runcount)); + "pmc to be free", __LINE__, + PMC_TO_ROWINDEX(pm), pm->pm_runcount)); #endif - mtx_lock_spin(&sched_lock); - curpri = curthread->td_priority; - mtx_unlock_spin(&sched_lock); - - (void) tsleep((void *) pmc_release_pmc_descriptor, - curpri, "pmcrel", 1); - + pmc_force_context_switch(); } /* @@ -1977,7 +2003,7 @@ pmc_release_pmc_descriptor(struct pmc *pm) * Update row disposition */ - if (PMC_IS_SYSTEM_MODE(pm->pm_mode)) + if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) PMC_UNMARK_ROW_STANDALONE(ri); else PMC_UNMARK_ROW_THREAD(ri); @@ -2007,21 +2033,20 @@ pmc_register_owner(struct proc *p, struct pmc *pmc) if (pl == NULL) return ENOMEM; - if ((po = pmc_find_owner_descriptor(p)) == NULL) { + if ((po = pmc_find_owner_descriptor(p)) == NULL) if ((po = pmc_allocate_owner_descriptor(p)) == NULL) { FREE(pl, M_PMC); return ENOMEM; } - po->po_flags |= PMC_FLAG_IS_OWNER; /* real owner */ - } - if (pmc->pm_mode == PMC_MODE_TS) { + /* XXX is this too restrictive */ + if (PMC_ID_TO_MODE(pmc->pm_id) == PMC_MODE_TS) { /* can have only one TS mode PMC per process */ - if (po->po_flags & PMC_FLAG_HAS_TS_PMC) { + if (po->po_flags & PMC_PO_HAS_TS_PMC) { FREE(pl, M_PMC); return EINVAL; } - po->po_flags |= PMC_FLAG_HAS_TS_PMC; + po->po_flags |= PMC_PO_HAS_TS_PMC; } KASSERT(pmc->pm_owner == NULL, @@ -2067,22 +2092,41 @@ pmc_getrowdisp(int ri) */ static int -pmc_can_allocate_rowindex(struct proc *p, unsigned int ri) +pmc_can_allocate_rowindex(struct proc *p, unsigned int ri, int cpu) { + enum pmc_mode mode; + struct pmc *pm; struct pmc_list *pl; struct pmc_owner *po; struct pmc_process *pp; - PMCDBG(PMC,ALR,1, "can-allocate-rowindex proc=%p (%d, %s) ri=%d", - p, p->p_pid, p->p_comm, ri); + PMCDBG(PMC,ALR,1, "can-allocate-rowindex proc=%p (%d, %s) ri=%d " + "cpu=%d", p, p->p_pid, p->p_comm, ri, cpu); - /* we shouldn't have allocated a PMC at row index 'ri' */ + /* + * We shouldn't have already allocated a process-mode PMC at + * row index 'ri'. + * + * We shouldn't have allocated a system-wide PMC on the same + * CPU and same RI. + */ if ((po = pmc_find_owner_descriptor(p)) != NULL) - LIST_FOREACH(pl, &po->po_pmcs, pl_next) - if (pl->pl_pmc->pm_rowindex == ri) - return EEXIST; + LIST_FOREACH(pl, &po->po_pmcs, pl_next) { + pm = pl->pl_pmc; + if (PMC_TO_ROWINDEX(pm) == ri) { + mode = PMC_TO_MODE(pm); + if (PMC_IS_VIRTUAL_MODE(mode)) + return EEXIST; + if (PMC_IS_SYSTEM_MODE(mode) && + (int) PMC_TO_CPU(pm) == cpu) + return EEXIST; + } + } - /* we shouldn't be the target of any PMC ourselves at this index */ + /* + * We also shouldn't be the target of any PMC at this index + * since otherwise a PMC_ATTACH to ourselves will fail. + */ if ((pp = pmc_find_process_descriptor(p, 0)) != NULL) if (pp->pp_pmcs[ri].pp_pmc) return EEXIST; @@ -2139,7 +2183,7 @@ pmc_can_allocate_row(int ri, enum pmc_mode mode) } /* - * Find a PMC descriptor with user handle 'pmc' for thread 'td'. + * Find a PMC descriptor with user handle 'pmcid' for thread 'td'. */ static struct pmc * @@ -2147,12 +2191,12 @@ pmc_find_pmc_descriptor_in_process(struct pmc_owner *po, pmc_id_t pmcid) { struct pmc_list *pl; - KASSERT(pmcid < md->pmd_npmc, - ("[pmc,%d] Illegal pmc index %d (max %d)", __LINE__, pmcid, - md->pmd_npmc)); + KASSERT(PMC_ID_TO_ROWINDEX(pmcid) < md->pmd_npmc, + ("[pmc,%d] Illegal pmc index %d (max %d)", __LINE__, + PMC_ID_TO_ROWINDEX(pmcid), md->pmd_npmc)); LIST_FOREACH(pl, &po->po_pmcs, pl_next) - if (pl->pl_pmc->pm_rowindex == pmcid) + if (pl->pl_pmc->pm_id == pmcid) return pl->pl_pmc; return NULL; @@ -2187,17 +2231,21 @@ static int pmc_start(struct pmc *pm) { int error, cpu, ri; + enum pmc_mode mode; struct pmc_binding pb; KASSERT(pm != NULL, ("[pmc,%d] null pm", __LINE__)); - PMCDBG(PMC,OPS,1, "start pmc=%p mode=%d ri=%d", pm, pm->pm_mode, - pm->pm_rowindex); + mode = PMC_TO_MODE(pm); + ri = PMC_TO_ROWINDEX(pm); + error = 0; + + PMCDBG(PMC,OPS,1, "start pmc=%p mode=%d ri=%d", pm, mode, ri); pm->pm_state = PMC_STATE_RUNNING; - if (PMC_IS_VIRTUAL_MODE(pm->pm_mode)) { + if (PMC_IS_VIRTUAL_MODE(mode)) { /* * If a PMCATTACH hadn't been done on this @@ -2205,32 +2253,36 @@ pmc_start(struct pmc *pm) */ if (LIST_EMPTY(&pm->pm_targets)) - return pmc_attach_process(pm->pm_owner->po_owner, pm); + error = pmc_attach_process(pm->pm_owner->po_owner, pm); + /* + * If the PMC is attached to its owner, then force a context + * switch to ensure that the MD state gets set correctly. + */ + if (error == 0 && (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER)) + pmc_force_context_switch(); /* * Nothing further to be done; thread context switch code - * will start/stop the PMC as appropriate. + * will start/stop the hardware as appropriate. */ - return 0; + return error; } /* - * A system-mode PMC. Move to the CPU associated with this + * A system-wide PMC. Move to the CPU associated with this * PMC, and start the hardware. */ pmc_save_cpu_binding(&pb); - cpu = pm->pm_gv.pm_cpu; + cpu = PMC_TO_CPU(pm); if (pmc_cpu_is_disabled(cpu)) return ENXIO; - ri = pm->pm_rowindex; - pmc_select_cpu(cpu); /* @@ -2238,11 +2290,13 @@ pmc_start(struct pmc *pm) * so write out the initial value and start the PMC. */ + critical_enter(); if ((error = md->pmd_write_pmc(cpu, ri, - PMC_IS_SAMPLING_MODE(pm->pm_mode) ? + PMC_IS_SAMPLING_MODE(mode) ? pm->pm_sc.pm_reloadcount : pm->pm_sc.pm_initial)) == 0) error = md->pmd_start_pmc(cpu, ri); + critical_exit(); pmc_restore_cpu_binding(&pb); @@ -2256,13 +2310,13 @@ pmc_start(struct pmc *pm) static int pmc_stop(struct pmc *pm) { - int error, cpu; + int cpu, error, ri; struct pmc_binding pb; KASSERT(pm != NULL, ("[pmc,%d] null pmc", __LINE__)); - PMCDBG(PMC,OPS,1, "stop pmc=%p mode=%d ri=%d", pm, pm->pm_mode, - pm->pm_rowindex); + PMCDBG(PMC,OPS,1, "stop pmc=%p mode=%d ri=%d", pm, + PMC_TO_MODE(pm), PMC_TO_ROWINDEX(pm)); pm->pm_state = PMC_STATE_STOPPED; @@ -2276,7 +2330,7 @@ pmc_stop(struct pmc *pm) * switched out. */ - if (PMC_IS_VIRTUAL_MODE(pm->pm_mode)) + if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) return 0; /* @@ -2288,16 +2342,22 @@ pmc_stop(struct pmc *pm) pmc_save_cpu_binding(&pb); - cpu = pm->pm_gv.pm_cpu; + cpu = PMC_TO_CPU(pm); + + KASSERT(cpu >= 0 && cpu < mp_ncpus, + ("[pmc,%d] illegal cpu=%d", __LINE__, cpu)); if (pmc_cpu_is_disabled(cpu)) return ENXIO; pmc_select_cpu(cpu); - if ((error = md->pmd_stop_pmc(cpu, pm->pm_rowindex)) == 0) - error = md->pmd_read_pmc(cpu, pm->pm_rowindex, - &pm->pm_sc.pm_initial); + ri = PMC_TO_ROWINDEX(pm); + + critical_enter(); + if ((error = md->pmd_stop_pmc(cpu, ri)) == 0) + error = md->pmd_read_pmc(cpu, ri, &pm->pm_sc.pm_initial); + critical_exit(); pmc_restore_cpu_binding(&pb); @@ -2396,11 +2456,11 @@ pmc_syscall_handler(struct thread *td, void *syscall_args) struct pmc_op_getcpuinfo gci; gci.pm_cputype = md->pmd_cputype; + gci.pm_ncpu = mp_ncpus; gci.pm_npmc = md->pmd_npmc; gci.pm_nclass = md->pmd_nclass; bcopy(md->pmd_classes, &gci.pm_classes, sizeof(gci.pm_classes)); - gci.pm_ncpu = mp_ncpus; error = copyout(&gci, arg, sizeof(gci)); } break; @@ -2499,11 +2559,11 @@ pmc_syscall_handler(struct thread *td, void *syscall_args) __LINE__)); p->pm_ownerpid = po->po_owner->p_pid; - p->pm_mode = pm->pm_mode; + p->pm_mode = PMC_TO_MODE(pm); p->pm_event = pm->pm_event; p->pm_flags = pm->pm_flags; - if (PMC_IS_SAMPLING_MODE(pm->pm_mode)) + if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) p->pm_reloadcount = pm->pm_sc.pm_reloadcount; } @@ -2628,6 +2688,7 @@ pmc_syscall_handler(struct thread *td, void *syscall_args) int n; enum pmc_mode mode; struct pmc *pmc; + struct pmc_hw *phw; struct pmc_op_pmcallocate pa; struct pmc_binding pb; @@ -2708,10 +2769,10 @@ pmc_syscall_handler(struct thread *td, void *syscall_args) pa.pm_ev, caps, mode, cpu); pmc = pmc_allocate_pmc_descriptor(); + pmc->pm_id = PMC_ID_MAKE_ID(cpu,pa.pm_mode,pa.pm_class, + PMC_ID_INVALID); pmc->pm_event = pa.pm_ev; - pmc->pm_class = pa.pm_class; pmc->pm_state = PMC_STATE_FREE; - pmc->pm_mode = mode; pmc->pm_caps = caps; pmc->pm_flags = pa.pm_flags; @@ -2729,7 +2790,7 @@ pmc_syscall_handler(struct thread *td, void *syscall_args) for (n = 0; n < (int) md->pmd_npmc; n++) if (pmc_can_allocate_row(n, mode) == 0 && pmc_can_allocate_rowindex( - curthread->td_proc, n) == 0 && + curthread->td_proc, n, cpu) == 0 && (PMC_IS_UNALLOCATED(cpu, n) || PMC_IS_SHAREABLE_PMC(cpu, n)) && md->pmd_allocate_pmc(cpu, n, pmc, @@ -2740,7 +2801,8 @@ pmc_syscall_handler(struct thread *td, void *syscall_args) for (n = 0; n < (int) md->pmd_npmc; n++) { if (pmc_can_allocate_row(n, mode) == 0 && pmc_can_allocate_rowindex( - curthread->td_proc, n) == 0 && + curthread->td_proc, n, + PMC_CPU_ANY) == 0 && md->pmd_allocate_pmc(curthread->td_oncpu, n, pmc, &pa) == 0) break; @@ -2760,27 +2822,37 @@ pmc_syscall_handler(struct thread *td, void *syscall_args) break; } - PMCDBG(PMC,ALL,2, "ev=%d class=%d mode=%d -> n=%d", - pmc->pm_event, pmc->pm_class, pmc->pm_mode, n); + /* Fill in the correct value in the ID field */ + pmc->pm_id = PMC_ID_MAKE_ID(cpu,mode,pa.pm_class,n); + + PMCDBG(PMC,ALL,2, "ev=%d class=%d mode=%d n=%d -> pmcid=%x", + pmc->pm_event, pa.pm_class, mode, n, pmc->pm_id); /* * Configure global pmc's immediately */ - if (PMC_IS_SYSTEM_MODE(pmc->pm_mode)) - if ((error = md->pmd_config_pmc(cpu, n, pmc)) != 0) { + if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pmc))) { + + pmc_save_cpu_binding(&pb); + pmc_select_cpu(cpu); + + phw = pmc_pcpu[cpu]->pc_hwpmcs[n]; + + if ((phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) == 0 || + (error = md->pmd_config_pmc(cpu, n, pmc)) != 0) { (void) md->pmd_release_pmc(cpu, n, pmc); pmc_destroy_pmc_descriptor(pmc); FREE(pmc, M_PMC); pmc = NULL; + pmc_restore_cpu_binding(&pb); + error = EPERM; break; } - /* - * Mark the row index allocated. - */ + pmc_restore_cpu_binding(&pb); + } - pmc->pm_rowindex = n; pmc->pm_state = PMC_STATE_ALLOCATED; /* @@ -2793,14 +2865,6 @@ pmc_syscall_handler(struct thread *td, void *syscall_args) PMC_MARK_ROW_THREAD(n); /* - * If this is a system-wide CPU, mark the CPU it - * was allocated on. - */ - - if (PMC_IS_SYSTEM_MODE(mode)) - pmc->pm_gv.pm_cpu = cpu; - - /* * Register this PMC with the current thread as its owner. */ @@ -2816,7 +2880,7 @@ pmc_syscall_handler(struct thread *td, void *syscall_args) * Return the allocated index. */ - pa.pm_pmcid = n; + pa.pm_pmcid = pmc->pm_id; error = copyout(&pa, arg, sizeof(pa)); } @@ -2847,7 +2911,7 @@ pmc_syscall_handler(struct thread *td, void *syscall_args) if ((error = pmc_find_pmc(a.pm_pmc, &pm)) != 0) break; - if (PMC_IS_SYSTEM_MODE(pm->pm_mode)) { + if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) { error = EINVAL; break; } @@ -3022,19 +3086,43 @@ pmc_syscall_handler(struct thread *td, void *syscall_args) break; } - if (PMC_IS_VIRTUAL_MODE(pm->pm_mode)) { + if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) { + + /* + * If this PMC is attached to its owner (i.e., + * the process requesting this operation) and + * is running, then attempt to get an + * upto-date reading from hardware for a READ. + * Writes are only allowed when the PMC is + * stopped, so only update the saved value + * field. + * + * If the PMC is not running, or is not + * attached to its owner, read/write to the + * savedvalue field. + */ + + ri = PMC_TO_ROWINDEX(pm); - /* read/write the saved value in the PMC record */ mtx_pool_lock_spin(pmc_mtxpool, pm); - if (prw.pm_flags & PMC_F_OLDVALUE) - oldvalue = pm->pm_gv.pm_savedvalue; + cpu = curthread->td_oncpu; + + if (prw.pm_flags & PMC_F_OLDVALUE) { + if ((pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) && + (pm->pm_state == PMC_STATE_RUNNING)) + error = (*md->pmd_read_pmc)(cpu, ri, + &oldvalue); + else + oldvalue = pm->pm_gv.pm_savedvalue; + } if (prw.pm_flags & PMC_F_NEWVALUE) pm->pm_gv.pm_savedvalue = prw.pm_value; + mtx_pool_unlock_spin(pmc_mtxpool, pm); } else { /* System mode PMCs */ - cpu = pm->pm_gv.pm_cpu; - ri = pm->pm_rowindex; + cpu = PMC_TO_CPU(pm); + ri = PMC_TO_ROWINDEX(pm); if (pmc_cpu_is_disabled(cpu)) { error = ENXIO; @@ -3045,6 +3133,7 @@ pmc_syscall_handler(struct thread *td, void *syscall_args) pmc_save_cpu_binding(&pb); pmc_select_cpu(cpu); + critical_enter(); /* save old value */ if (prw.pm_flags & PMC_F_OLDVALUE) if ((error = (*md->pmd_read_pmc)(cpu, ri, @@ -3055,6 +3144,7 @@ pmc_syscall_handler(struct thread *td, void *syscall_args) error = (*md->pmd_write_pmc)(cpu, ri, prw.pm_value); error: + critical_exit(); pmc_restore_cpu_binding(&pb); if (error) break; @@ -3114,7 +3204,7 @@ pmc_syscall_handler(struct thread *td, void *syscall_args) break; } - if (PMC_IS_SAMPLING_MODE(pm->pm_mode)) + if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) pm->pm_sc.pm_reloadcount = sc.pm_count; else pm->pm_sc.pm_initial = sc.pm_count; @@ -3142,9 +3232,9 @@ pmc_syscall_handler(struct thread *td, void *syscall_args) if ((error = pmc_find_pmc(pmcid, &pm)) != 0) break; - KASSERT(pmcid == pm->pm_rowindex, - ("[pmc,%d] row index %d != id %d", __LINE__, - pm->pm_rowindex, pmcid)); + KASSERT(pmcid == pm->pm_id, + ("[pmc,%d] pmcid %x != id %x", __LINE__, + pm->pm_id, pmcid)); if (pm->pm_state == PMC_STATE_RUNNING) /* already running */ break; @@ -3184,9 +3274,9 @@ pmc_syscall_handler(struct thread *td, void *syscall_args) if ((error = pmc_find_pmc(pmcid, &pm)) != 0) break; - KASSERT(pmcid == pm->pm_rowindex, - ("[pmc,%d] row index %d != pmcid %d", __LINE__, - pm->pm_rowindex, pmcid)); + KASSERT(pmcid == pm->pm_id, + ("[pmc,%d] pmc id %x != pmcid %x", __LINE__, + pm->pm_id, pmcid)); if (pm->pm_state == PMC_STATE_STOPPED) /* already stopped */ break; @@ -3234,6 +3324,7 @@ pmc_syscall_handler(struct thread *td, void *syscall_args) { int ri; struct pmc *pm; + struct pmc_target *pt; struct pmc_op_x86_getmsr gm; PMC_DOWNGRADE_SX(); @@ -3251,26 +3342,53 @@ pmc_syscall_handler(struct thread *td, void *syscall_args) break; /* - * The allocated PMC needs to be a process virtual PMC, - * i.e., of type T[CS]. + * The allocated PMC has to be a process virtual PMC, + * i.e., of type MODE_T[CS]. Global PMCs can only be + * read using the PMCREAD operation since they may be + * allocated on a different CPU than the one we could + * be running on at the time of the RDPMC instruction. * - * Global PMCs can only be read using the PMCREAD - * operation since they may be allocated on a - * different CPU than the one we could be running on - * at the time of the read. + * The GETMSR operation is not allowed for PMCs that + * are inherited across processes. */ - if (!PMC_IS_VIRTUAL_MODE(pm->pm_mode)) { + if (!PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) || + (pm->pm_flags & PMC_F_DESCENDANTS)) { error = EINVAL; break; } - ri = pm->pm_rowindex; + /* + * It only makes sense to use a RDPMC (or its + * equivalent instruction on non-x86 architectures) on + * a process that has allocated and attached a PMC to + * itself. Conversely the PMC is only allowed to have + * one process attached to it -- its owner. + */ + + if ((pt = LIST_FIRST(&pm->pm_targets)) == NULL || + LIST_NEXT(pt, pt_next) != NULL || + pt->pt_process->pp_proc != pm->pm_owner->po_owner) { + error = EINVAL; + break; + } + + ri = PMC_TO_ROWINDEX(pm); if ((error = (*md->pmd_get_msr)(ri, &gm.pm_msr)) < 0) break; + if ((error = copyout(&gm, arg, sizeof(gm))) < 0) break; + + /* + * Mark our process as using MSRs. Update machine + * state using a forced context switch. + */ + + pt->pt_process->pp_flags |= PMC_PP_ENABLE_MSR_ACCESS; + pmc_force_context_switch(); + } break; #endif @@ -3314,13 +3432,13 @@ pmc_configure_log(struct pmc_owner *po, int logfd) if (po->po_logfd >= 0 && logfd < 0) { /* deconfigure log */ /* XXX */ - po->po_flags &= ~PMC_FLAG_OWNS_LOGFILE; + po->po_flags &= ~PMC_PO_OWNS_LOGFILE; pmc_maybe_remove_owner(po); } else if (po->po_logfd < 0 && logfd >= 0) { /* configure log file */ /* XXX */ - po->po_flags |= PMC_FLAG_OWNS_LOGFILE; + po->po_flags |= PMC_PO_OWNS_LOGFILE; /* mark process as using HWPMCs */ PROC_LOCK(p); @@ -3530,7 +3648,7 @@ pmc_initialize(void) printf(PMC_MODULE_NAME ":"); for (n = 0; n < (int) md->pmd_nclass; n++) printf(" %s(%d)", - pmc_name_of_pmcclass[md->pmd_classes[n]], + pmc_name_of_pmcclass[md->pmd_classes[n].pm_class], md->pmd_nclasspmcs[n]); printf("\n"); } diff --git a/sys/dev/hwpmc/hwpmc_piv.c b/sys/dev/hwpmc/hwpmc_piv.c index 67be026..e81e4e4 100644 --- a/sys/dev/hwpmc/hwpmc_piv.c +++ b/sys/dev/hwpmc/hwpmc_piv.c @@ -35,7 +35,7 @@ __FBSDID("$FreeBSD$"); #include <sys/smp.h> #include <sys/systm.h> -#include <machine/cputypes.h> +#include <machine/apicreg.h> #include <machine/md_var.h> /* @@ -96,12 +96,52 @@ __FBSDID("$FreeBSD$"); * - Threads of multi-threaded processes that get scheduled on the same * physical CPU are handled correctly. * + * HTT Detection + * * Not all HTT capable systems will have HTT enabled since users may * have turned HTT support off using the appropriate sysctls - * (machdep.hlt_logical_cpus and machdep.logical_cpus_mask). We - * detect the presence of HTT by remembering if an initialization was - * done for a logical CPU. + * (machdep.hlt_logical_cpus or machdep.logical_cpus_mask). We detect + * the presence of HTT by remembering if 'p4_init()' was called for a + * logical CPU. Note that hwpmc(4) cannot deal with a change in HTT + * status once it is loaded. + * + * Handling HTT READ / WRITE / START / STOP + * + * PMC resources are shared across multiple logical CPUs. In each + * physical CPU's state we keep track of a 'runcount' which reflects + * the number of PMC-using processes that have been scheduled on the + * logical CPUs of this physical CPU. Process-mode PMC operations + * will actually 'start' or 'stop' hardware only if these are the + * first or last processes respectively to use the hardware. PMC + * values written by a 'write' operation are saved and are transferred + * to hardware at PMC 'start' time if the runcount is 0. If the + * runcount is greater than 0 at the time of a 'start' operation, we + * keep track of the actual hardware value at the time of the 'start' + * operation and use this to adjust the final readings at PMC 'stop' + * or 'read' time. + * + * Execution sequences: + * + * Case 1: CPUx +...- (no overlap) + * CPUy +...- + * RC 0 1 0 1 0 + * + * Case 2: CPUx +........- (partial overlap) + * CPUy +........- + * RC 0 1 2 1 0 + * + * Case 3: CPUx +..............- (fully overlapped) + * CPUy +.....- + * RC 0 1 2 1 0 + * + * Here CPUx and CPUy are one of the two logical processors on a HTT CPU. * + * Handling HTT CONFIG + * + * Different processes attached to the same PMC may get scheduled on + * the two logical processors in the package. We keep track of config + * and de-config operations using the CFGFLAGS fields of the per-physical + * cpu state. */ #define P4_PMCS() \ @@ -386,9 +426,11 @@ static int p4_system_has_htt; * [19 struct pmc_hw structures] * [45 ESCRs status bytes] * [per-cpu spin mutex] - * [19 flags for holding the config count and runcount] - * [19*2 saved value fields] (Thread mode PMC support) - * [19*2 pmc value fields] (-do-) + * [19 flag fields for holding config flags and a runcount] + * [19*2 hw value fields] (Thread mode PMC support) + * or + * [19*2 EIP values] (Sampling mode PMCs) + * [19*2 pmc value fields] (Thread mode PMC support)) */ struct p4_cpu { @@ -398,12 +440,16 @@ struct p4_cpu { char pc_escrs[P4_NESCR]; struct mtx pc_mtx; /* spin lock */ unsigned char pc_flags[P4_NPMCS]; /* 4 bits each: {cfg,run}count */ - pmc_value_t pc_saved[P4_NPMCS * P4_NHTT]; + union { + pmc_value_t pc_hw[P4_NPMCS * P4_NHTT]; + uintptr_t pc_ip[P4_NPMCS * P4_NHTT]; + } pc_si; pmc_value_t pc_pmc_values[P4_NPMCS * P4_NHTT]; }; -#define P4_PCPU_SAVED_VALUE(PC,RI,CPU) (PC)->pc_saved[(RI)*((CPU) & 1)] -#define P4_PCPU_PMC_VALUE(P,R,C) (P)->pc_pmc_values[(R)*((C) & 1)] +#define P4_PCPU_PMC_VALUE(PC,RI,CPU) (PC)->pc_pmc_values[(RI)*((CPU) & 1)] +#define P4_PCPU_HW_VALUE(PC,RI,CPU) (PC)->pc_si.pc_hw[(RI)*((CPU) & 1)] +#define P4_PCPU_SAVED_IP(PC,RI,CPU) (PC)->pc_si.pc_ip[(RI)*((CPU) & 1)] #define P4_PCPU_GET_FLAGS(PC,RI,MASK) ((PC)->pc_flags[(RI)] & (MASK)) #define P4_PCPU_SET_FLAGS(PC,RI,MASK,VAL) do { \ @@ -417,8 +463,10 @@ struct p4_cpu { #define P4_PCPU_GET_RUNCOUNT(PC,RI) P4_PCPU_GET_FLAGS(PC,RI,0x0F) #define P4_PCPU_SET_RUNCOUNT(PC,RI,V) P4_PCPU_SET_FLAGS(PC,RI,0x0F,V) -#define P4_PCPU_GET_CFGCOUNT(PC,RI) (P4_PCPU_GET_FLAGS(PC,RI,0xF0) >> 4) -#define P4_PCPU_SET_CFGCOUNT(PC,RI,C) P4_PCPU_SET_FLAGS(PC,RI,0xF0,((C) <<4)) +#define P4_PCPU_GET_CFGFLAGS(PC,RI) (P4_PCPU_GET_FLAGS(PC,RI,0xF0) >> 4) +#define P4_PCPU_SET_CFGFLAGS(PC,RI,C) P4_PCPU_SET_FLAGS(PC,RI,0xF0,((C) <<4)) + +#define P4_CPU_TO_FLAG(C) (pmc_cpu_is_logical(cpu) ? 0x2 : 0x1) /* ESCR row disposition */ static int p4_escrdisp[P4_NESCR]; @@ -583,10 +631,10 @@ p4_switch_in(struct pmc_cpu *pc, struct pmc_process *pp) (void) pc; PMCDBG(MDP,SWI,1, "pc=%p pp=%p enable-msr=%d", pc, pp, - (pp->pp_flags & PMC_FLAG_ENABLE_MSR_ACCESS) != 0); + (pp->pp_flags & PMC_PP_ENABLE_MSR_ACCESS) != 0); /* enable the RDPMC instruction */ - if (pp->pp_flags & PMC_FLAG_ENABLE_MSR_ACCESS) + if (pp->pp_flags & PMC_PP_ENABLE_MSR_ACCESS) load_cr4(rcr4() | CR4_PCE); PMCDBG(MDP,SWI,2, "cr4=0x%x", rcr4()); @@ -642,11 +690,15 @@ p4_read_pmc(int cpu, int ri, pmc_value_t *v) ("[p4,%d] No owner for HWPMC [cpu%d,pmc%d]", __LINE__, cpu, ri)); - mode = pm->pm_mode; + KASSERT(pd->pm_descr.pd_class == PMC_TO_CLASS(pm), + ("[p4,%d] class mismatch pd %d != id class %d", __LINE__, + pd->pm_descr.pd_class, PMC_TO_CLASS(pm))); + + mode = PMC_TO_MODE(pm); PMCDBG(MDP,REA,1, "p4-read cpu=%d ri=%d mode=%d", cpu, ri, mode); - if (pd->pm_descr.pd_class == PMC_CLASS_TSC) { + if (PMC_TO_CLASS(pm) == PMC_CLASS_TSC) { KASSERT(PMC_IS_COUNTING_MODE(mode), ("[p4,%d] TSC counter in non-counting mode", __LINE__)); *v = rdtsc(); @@ -657,13 +709,19 @@ p4_read_pmc(int cpu, int ri, pmc_value_t *v) KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4, ("[p4,%d] unknown PMC class %d", __LINE__, pd->pm_descr.pd_class)); - if (PMC_IS_SYSTEM_MODE(pm->pm_mode)) - tmp = rdmsr(p4_pmcdesc[ri].pm_pmc_msr); - else - tmp = P4_PCPU_PMC_VALUE(pc,ri,cpu); + tmp = rdmsr(p4_pmcdesc[ri].pm_pmc_msr); - if (PMC_IS_SAMPLING_MODE(mode)) - *v = -(tmp + 1); /* undo transformation */ + if (PMC_IS_VIRTUAL_MODE(mode)) { + if (tmp < P4_PCPU_HW_VALUE(pc,ri,cpu)) /* 40 bit overflow */ + tmp += (P4_PERFCTR_MASK + 1) - + P4_PCPU_HW_VALUE(pc,ri,cpu); + else + tmp -= P4_PCPU_HW_VALUE(pc,ri,cpu); + tmp += P4_PCPU_PMC_VALUE(pc,ri,cpu); + } + + if (PMC_IS_SAMPLING_MODE(mode)) /* undo transformation */ + *v = P4_PERFCTR_VALUE_TO_RELOAD_COUNT(tmp); else *v = tmp; @@ -678,6 +736,7 @@ p4_read_pmc(int cpu, int ri, pmc_value_t *v) static int p4_write_pmc(int cpu, int ri, pmc_value_t v) { + enum pmc_mode mode; struct pmc *pm; struct p4_cpu *pc; const struct pmc_hw *phw; @@ -697,15 +756,17 @@ p4_write_pmc(int cpu, int ri, pmc_value_t v) ("[p4,%d] No owner for HWPMC [cpu%d,pmc%d]", __LINE__, cpu, ri)); + mode = PMC_TO_MODE(pm); + PMCDBG(MDP,WRI,1, "p4-write cpu=%d ri=%d mode=%d v=%jx", cpu, ri, - pm->pm_mode, v); + mode, v); /* * The P4's TSC register is writeable, but we don't allow a * write as changing the TSC's value could interfere with - * other parts of the system. + * timekeeping and other system functions. */ - if (pd->pm_descr.pd_class == PMC_CLASS_TSC) + if (PMC_TO_CLASS(pm) == PMC_CLASS_TSC) return 0; /* @@ -713,10 +774,10 @@ p4_write_pmc(int cpu, int ri, pmc_value_t v) * sampling mode PMCs, the value to be programmed into the PMC * counter is -(C+1) where 'C' is the requested sample rate. */ - if (PMC_IS_SAMPLING_MODE(pm->pm_mode)) - v = -(v + 1); + if (PMC_IS_SAMPLING_MODE(mode)) + v = P4_RELOAD_COUNT_TO_PERFCTR_VALUE(v); - if (PMC_IS_SYSTEM_MODE(pm->pm_mode)) + if (PMC_IS_SYSTEM_MODE(mode)) wrmsr(pd->pm_pmc_msr, v); else P4_PCPU_PMC_VALUE(pc,ri,cpu) = v; @@ -730,7 +791,9 @@ p4_write_pmc(int cpu, int ri, pmc_value_t v) * 'pm' may be NULL to indicate de-configuration. * * On HTT systems, a PMC may get configured twice, once for each - * "logical" CPU. + * "logical" CPU. We track this using the CFGFLAGS field of the + * per-cpu state; this field is a bit mask with one bit each for + * logical CPUs 0 & 1. */ static int @@ -738,7 +801,7 @@ p4_config_pmc(int cpu, int ri, struct pmc *pm) { struct pmc_hw *phw; struct p4_cpu *pc; - int cfgcount; + int cfgflags, cpuflag; KASSERT(cpu >= 0 && cpu < mp_ncpus, ("[p4,%d] illegal CPU %d", __LINE__, cpu)); @@ -753,42 +816,71 @@ p4_config_pmc(int cpu, int ri, struct pmc *pm) ("[p4,%d] hwpmc not unconfigured before re-config", __LINE__)); mtx_lock_spin(&pc->pc_mtx); - cfgcount = P4_PCPU_GET_CFGCOUNT(pc,ri); + cfgflags = P4_PCPU_GET_CFGFLAGS(pc,ri); - KASSERT(cfgcount >= 0 || cfgcount <= 2, - ("[p4,%d] illegal cfgcount cfg=%d on cpu=%d ri=%d", __LINE__, - cfgcount, cpu, ri)); + KASSERT(cfgflags >= 0 || cfgflags <= 3, + ("[p4,%d] illegal cfgflags cfg=%d on cpu=%d ri=%d", __LINE__, + cfgflags, cpu, ri)); - KASSERT(cfgcount == 0 || phw->phw_pmc, + KASSERT(cfgflags == 0 || phw->phw_pmc, ("[p4,%d] cpu=%d ri=%d pmc configured with zero cfg count", __LINE__, cpu, ri)); - PMCDBG(MDP,CFG,1, "cpu=%d ri=%d cfg=%d pm=%p", cpu, ri, cfgcount, + PMCDBG(MDP,CFG,1, "cpu=%d ri=%d cfg=%d pm=%p", cpu, ri, cfgflags, pm); + cpuflag = P4_CPU_TO_FLAG(cpu); + if (pm) { /* config */ - if (cfgcount == 0) + if (cfgflags == 0) phw->phw_pmc = pm; KASSERT(phw->phw_pmc == pm, ("[p4,%d] cpu=%d ri=%d config %p != hw %p", __LINE__, cpu, ri, pm, phw->phw_pmc)); - cfgcount++; + cfgflags |= cpuflag; } else { /* unconfig */ - --cfgcount; - if (cfgcount == 0) + cfgflags &= ~cpuflag; + + if (cfgflags == 0) phw->phw_pmc = NULL; } - KASSERT(cfgcount >= 0 || cfgcount <= 2, + KASSERT(cfgflags >= 0 || cfgflags <= 3, ("[p4,%d] illegal runcount cfg=%d on cpu=%d ri=%d", __LINE__, - cfgcount, cpu, ri)); + cfgflags, cpu, ri)); + + P4_PCPU_SET_CFGFLAGS(pc,ri,cfgflags); + + mtx_unlock_spin(&pc->pc_mtx); + + return 0; +} + +/* + * Retrieve a configured PMC pointer from hardware state. + */ - P4_PCPU_SET_CFGCOUNT(pc,ri,cfgcount); +static int +p4_get_config(int cpu, int ri, struct pmc **ppm) +{ + struct p4_cpu *pc; + struct pmc_hw *phw; + int cfgflags; + pc = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)]; + phw = pc->pc_hwpmcs[ri]; + + mtx_lock_spin(&pc->pc_mtx); + cfgflags = P4_PCPU_GET_CFGFLAGS(pc,ri); mtx_unlock_spin(&pc->pc_mtx); + if (cfgflags & P4_CPU_TO_FLAG(cpu)) + *ppm = phw->phw_pmc; /* PMC config'ed on this CPU */ + else + *ppm = NULL; + return 0; } @@ -845,11 +937,11 @@ p4_allocate_pmc(int cpu, int ri, struct pmc *pm, pd = &p4_pmcdesc[ri]; PMCDBG(MDP,ALL,1, "p4-allocate ri=%d class=%d pmccaps=0x%x " - "reqcaps=0x%x\n", ri, pd->pm_descr.pd_class, pd->pm_descr.pd_caps, + "reqcaps=0x%x", ri, pd->pm_descr.pd_class, pd->pm_descr.pd_caps, pm->pm_caps); /* check class */ - if (pd->pm_descr.pd_class != pm->pm_class) + if (pd->pm_descr.pd_class != a->pm_class) return EINVAL; /* check requested capabilities */ @@ -872,7 +964,7 @@ p4_allocate_pmc(int cpu, int ri, struct pmc *pm, */ if (p4_system_has_htt && - PMC_IS_VIRTUAL_MODE(pm->pm_mode) && + PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) && pmc_getrowdisp(ri) != 0) return EBUSY; @@ -898,7 +990,8 @@ p4_allocate_pmc(int cpu, int ri, struct pmc *pm, */ if (P4_EVENT_IS_TI(pevent) && - PMC_IS_VIRTUAL_MODE(pm->pm_mode) && p4_system_has_htt) + PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) && + p4_system_has_htt) return EINVAL; pc = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)]; @@ -917,7 +1010,7 @@ p4_allocate_pmc(int cpu, int ri, struct pmc *pm, * should also be free on the current CPU. */ - if (PMC_IS_SYSTEM_MODE(pm->pm_mode)) { + if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) { if (P4_ESCR_ROW_DISP_IS_THREAD(escr) || pc->pc_escrs[escr] != P4_INVALID_PMC_INDEX) continue; @@ -935,7 +1028,7 @@ p4_allocate_pmc(int cpu, int ri, struct pmc *pm, * ESCRs from rows marked as 'FREE'. */ - if (PMC_IS_VIRTUAL_MODE(pm->pm_mode)) { + if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) { if (p4_system_has_htt) { if (!P4_ESCR_ROW_DISP_IS_FREE(escr)) continue; @@ -963,7 +1056,7 @@ p4_allocate_pmc(int cpu, int ri, struct pmc *pm, ("[p4,%d] illegal ESCR value %d", __LINE__, escr)); /* mark ESCR row mode */ - if (PMC_IS_SYSTEM_MODE(pm->pm_mode)) { + if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) { pc->pc_escrs[escr] = ri; /* mark ESCR as in use on this cpu */ P4_ESCR_MARK_ROW_STANDALONE(escr); } else { @@ -1024,7 +1117,7 @@ p4_allocate_pmc(int cpu, int ri, struct pmc *pm, pm->pm_md.pm_p4.pm_p4_escrvalue = escrvalue; PMCDBG(MDP,ALL,2, "p4-allocate cccrsel=0x%x cccrval=0x%x " - "escr=%d escrmsr=0x%x escrval=0x%x\n", pevent->pm_cccr_select, + "escr=%d escrmsr=0x%x escrval=0x%x", pevent->pm_cccr_select, cccrvalue, escr, pm->pm_md.pm_p4.pm_p4_escrmsr, escrvalue); return 0; @@ -1048,7 +1141,7 @@ p4_release_pmc(int cpu, int ri, struct pmc *pm) PMCDBG(MDP,REL,1, "p4-release cpu=%d ri=%d escr=%d", cpu, ri, escr); - if (PMC_IS_SYSTEM_MODE(pm->pm_mode)) { + if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) { pc = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)]; phw = pc->pc_hwpmcs[ri]; @@ -1120,7 +1213,7 @@ p4_start_pmc(int cpu, int ri) } /* start system mode PMCs directly */ - if (PMC_IS_SYSTEM_MODE(pm->pm_mode)) { + if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) { wrmsr(escrmsr, escrvalue | escrtbits); wrmsr(pd->pm_cccr_msr, cccrvalue | cccrtbits | P4_CCCR_ENABLE); return 0; @@ -1144,11 +1237,6 @@ p4_start_pmc(int cpu, int ri) rc)); if (rc == 0) { /* 1st CPU and the non-HTT case */ - /* - * Enable the correct bits for this CPU. - */ - escrvalue |= escrtbits; - cccrvalue |= cccrtbits | P4_CCCR_ENABLE; KASSERT(P4_PMC_IS_STOPPED(pd->pm_cccr_msr), ("[p4,%d] cpu=%d ri=%d cccr=0x%x not stopped", __LINE__, @@ -1157,36 +1245,24 @@ p4_start_pmc(int cpu, int ri) /* write out the low 40 bits of the saved value to hardware */ wrmsr(pd->pm_pmc_msr, P4_PCPU_PMC_VALUE(pc,ri,cpu) & P4_PERFCTR_MASK); - P4_PCPU_SAVED_VALUE(pc,ri,cpu) = P4_PCPU_PMC_VALUE(pc,ri,cpu) & - P4_PERFCTR_MASK; - - /* Program the ESCR and CCCR and start the PMC */ - wrmsr(escrmsr, escrvalue); - wrmsr(pd->pm_cccr_msr, cccrvalue); - - PMCDBG(MDP,STA,2,"p4-start cpu=%d rc=%d ri=%d escr=%d " - "escrmsr=0x%x escrvalue=0x%x cccr_config=0x%x\n", cpu, rc, - ri, pm->pm_md.pm_p4.pm_p4_escr, escrmsr, escrvalue, - cccrvalue); } else if (rc == 1) { /* 2nd CPU */ /* - * Retrieve the CCCR and ESCR values from their MSRs, - * and turn on the addition T[0/1] bits for the 2nd - * CPU. Remember the difference between the saved - * value from the previous 'write()' operation to this - * (PMC,CPU) pair and the current PMC reading; this is - * used at PMCSTOP time to derive the correct - * increment. + * Stop the PMC and retrieve the CCCR and ESCR values + * from their MSRs, and turn on the additional T[0/1] + * bits for the 2nd CPU. */ cccrvalue = rdmsr(pd->pm_cccr_msr); + wrmsr(pd->pm_cccr_msr, cccrvalue & ~P4_CCCR_ENABLE); + /* check that the configuration bits read back match the PMC */ KASSERT((cccrvalue & P4_CCCR_Tx_MASK) == (pm->pm_md.pm_p4.pm_p4_cccrvalue & P4_CCCR_Tx_MASK), - ("[p4,%d] cpu=%d rc=%d ri=%d CCCR bits 0x%x PMC 0x%x", - __LINE__, cpu, rc, ri, cccrvalue & P4_CCCR_Tx_MASK, + ("[p4,%d] Extra CCCR bits cpu=%d rc=%d ri=%d " + "cccr=0x%x PMC=0x%x", __LINE__, cpu, rc, ri, + cccrvalue & P4_CCCR_Tx_MASK, pm->pm_md.pm_p4.pm_p4_cccrvalue & P4_CCCR_Tx_MASK)); KASSERT(cccrvalue & P4_CCCR_ENABLE, ("[p4,%d] 2nd cpu rc=%d cpu=%d ri=%d not running", @@ -1196,9 +1272,6 @@ p4_start_pmc(int cpu, int ri) "cccrvalue=0x%x tbits=0x%x", __LINE__, rc, cpu, ri, cccrvalue, cccrtbits)); - /* stop PMC */ - wrmsr(pd->pm_cccr_msr, cccrvalue & ~P4_CCCR_ENABLE); - escrvalue = rdmsr(escrmsr); KASSERT((escrvalue & P4_ESCR_Tx_MASK) == @@ -1207,40 +1280,33 @@ p4_start_pmc(int cpu, int ri) "escr=0x%x pm=0x%x", __LINE__, cpu, rc, ri, escrvalue & P4_ESCR_Tx_MASK, pm->pm_md.pm_p4.pm_p4_escrvalue & P4_ESCR_Tx_MASK)); - KASSERT((escrvalue & escrtbits) == 0, ("[p4,%d] ESCR T0/T1 mismatch rc=%d cpu=%d ri=%d " "escrmsr=0x%x escrvalue=0x%x tbits=0x%x", __LINE__, rc, cpu, ri, escrmsr, escrvalue, escrtbits)); + } - /* read current value and save it */ - P4_PCPU_SAVED_VALUE(pc,ri,cpu) = - rdmsr(pd->pm_pmc_msr) & P4_PERFCTR_MASK; - - /* - * program the new bits into the ESCR and CCCR, - * starting the PMC in the process. - */ - - escrvalue |= escrtbits; - cccrvalue |= cccrvalue; + /* Enable the correct bits for this CPU. */ + escrvalue |= escrtbits; + cccrvalue |= cccrtbits | P4_CCCR_ENABLE; - wrmsr(escrmsr, escrvalue); - wrmsr(pd->pm_cccr_msr, cccrvalue); + /* Save HW value at the time of starting hardware */ + P4_PCPU_HW_VALUE(pc,ri,cpu) = rdmsr(pd->pm_pmc_msr); - PMCDBG(MDP,STA,2,"p4-start/2 cpu=%d rc=%d ri=%d escr=%d" - "escrmsr=0x%x escrvalue=0x%x cccr_config=0x%x", - cpu, rc, ri, pm->pm_md.pm_p4.pm_p4_escr, escrmsr, - escrvalue, cccrvalue); - - } else - panic("invalid runcount %d\n", rc); + /* Program the ESCR and CCCR and start the PMC */ + wrmsr(escrmsr, escrvalue); + wrmsr(pd->pm_cccr_msr, cccrvalue); ++rc; P4_PCPU_SET_RUNCOUNT(pc,ri,rc); mtx_unlock_spin(&pc->pc_mtx); + PMCDBG(MDP,STA,2,"p4-start cpu=%d rc=%d ri=%d escr=%d " + "escrmsr=0x%x escrvalue=0x%x cccr_config=0x%x v=%jx", cpu, rc, + ri, pm->pm_md.pm_p4.pm_p4_escr, escrmsr, escrvalue, + cccrvalue, P4_PCPU_HW_VALUE(pc,ri,cpu)); + return 0; } @@ -1282,7 +1348,7 @@ p4_stop_pmc(int cpu, int ri) PMCDBG(MDP,STO,1, "p4-stop cpu=%d ri=%d", cpu, ri); - if (PMC_IS_SYSTEM_MODE(pm->pm_mode)) { + if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) { wrmsr(pd->pm_cccr_msr, pm->pm_md.pm_p4.pm_p4_cccrvalue & ~P4_CCCR_ENABLE); return 0; @@ -1294,12 +1360,9 @@ p4_stop_pmc(int cpu, int ri) * On HTT machines, this PMC may be in use by two threads * running on two logical CPUS. Thus we look at the * 'pm_runcount' field and only turn off the appropriate TO/T1 - * bits (and keep the PMC running). + * bits (and keep the PMC running) if two logical CPUs were + * using the PMC. * - * The 'pc_saved' field has the 'diff' between the value in - * the hardware register at PMCSTART time and the nominal - * start value for the PMC. This diff is added to the current - * PMC reading to derived the correct (absolute) return value. */ /* bits to mask */ @@ -1329,54 +1392,157 @@ p4_stop_pmc(int cpu, int ri) escrmsr = pm->pm_md.pm_p4.pm_p4_escrmsr; escrvalue = rdmsr(escrmsr); - /* get the current PMC reading */ - tmp = rdmsr(pd->pm_pmc_msr) & P4_PERFCTR_MASK; - - if (rc == 1) { /* need to keep the PMC running */ - - KASSERT(escrvalue & escrtbits, - ("[p4,%d] ESCR T0/T1 mismatch cpu=%d ri=%d escrmsr=0x%x " - "escrvalue=0x%x tbits=0x%x", __LINE__, cpu, ri, escrmsr, - escrvalue, escrtbits)); + /* The current CPU should be running on this PMC */ + KASSERT(escrvalue & escrtbits, + ("[p4,%d] ESCR T0/T1 mismatch cpu=%d rc=%d ri=%d escrmsr=0x%x " + "escrvalue=0x%x tbits=0x%x", __LINE__, cpu, rc, ri, escrmsr, + escrvalue, escrtbits)); + KASSERT(PMC_IS_COUNTING_MODE(PMC_TO_MODE(pm)) || + (cccrvalue & cccrtbits), + ("[p4,%d] CCCR T0/T1 mismatch cpu=%d ri=%d cccrvalue=0x%x " + "tbits=0x%x", __LINE__, cpu, ri, cccrvalue, cccrtbits)); - KASSERT(PMC_IS_COUNTING_MODE(pm->pm_mode) || - (cccrvalue & cccrtbits), - ("[p4,%d] CCCR T0/T1 mismatch cpu=%d ri=%d cccrvalue=0x%x " - "tbits=0x%x", __LINE__, cpu, ri, cccrvalue, cccrtbits)); + /* get the current hardware reading */ + tmp = rdmsr(pd->pm_pmc_msr); + if (rc == 1) { /* need to keep the PMC running */ escrvalue &= ~escrtbits; cccrvalue &= ~cccrtbits; - wrmsr(escrmsr, escrvalue); wrmsr(pd->pm_cccr_msr, cccrvalue); - } - PMCDBG(MDP,STO,2, "p4-stop/2 cpu=%d rc=%d ri=%d escrmsr=0x%x escrval=0x%x " - "cccrval=0x%x", cpu, rc, ri, escrmsr, escrvalue, cccrvalue); + mtx_unlock_spin(&pc->pc_mtx); + + PMCDBG(MDP,STO,2, "p4-stop cpu=%d rc=%d ri=%d escrmsr=0x%x " + "escrval=0x%x cccrval=0x%x v=%jx", cpu, rc, ri, escrmsr, + escrvalue, cccrvalue, tmp); - /* get the incremental count from this context switch */ - tmp -= P4_PCPU_SAVED_VALUE(pc,ri,cpu); - if ((int64_t) tmp < 0) /* counter wrap-around */ - tmp = -tmp + 1; + if (tmp < P4_PCPU_HW_VALUE(pc,ri,cpu)) /* 40 bit counter overflow */ + tmp += (P4_PERFCTR_MASK + 1) - P4_PCPU_HW_VALUE(pc,ri,cpu); + else + tmp -= P4_PCPU_HW_VALUE(pc,ri,cpu); P4_PCPU_PMC_VALUE(pc,ri,cpu) += tmp; - mtx_unlock_spin(&pc->pc_mtx); return 0; } /* * Handle an interrupt. + * + * The hardware sets the CCCR_OVF whenever a counter overflow occurs, so the handler + * examines all the 18 CCCR registers, processing the counters that have overflowed. + * + * On HTT machines, multiple logical CPUs may try to enter the NMI service + * routine at the same time. */ +extern volatile lapic_t *lapic; + +static void +p4_lapic_enable_pmc_interrupt(void) +{ + uint32_t value; + + value = lapic->lvt_pcint; + value &= ~APIC_LVT_M; + lapic->lvt_pcint = value; +} + + static int p4_intr(int cpu, uintptr_t eip) { - (void) cpu; + int i, pmc_interrupted; + uint32_t cccrval, pmi_ovf_mask; + struct p4_cpu *pc; + struct pmc_hw *phw; + struct pmc *pm; + pmc_value_t v; + (void) eip; + PMCDBG(MDP,INT, 1, "cpu=%d eip=%x pcint=0x%x", cpu, eip, + lapic->lvt_pcint); - return 0; + pmc_interrupted = 0; + pc = (struct p4_cpu *) pmc_pcpu[cpu]; + + pmi_ovf_mask = pmc_cpu_is_logical(cpu) ? + P4_CCCR_OVF_PMI_T1 : P4_CCCR_OVF_PMI_T0; + pmi_ovf_mask |= P4_CCCR_OVF; + + /* + * Loop through all CCCRs, looking for ones that have the + * OVF_PMI bit set for our logical CPU. + */ + + for (i = 1; i < P4_NPMCS; i++) { + cccrval = rdmsr(P4_CCCR_MSR_FIRST + i - 1); + + if ((cccrval & pmi_ovf_mask) != pmi_ovf_mask) + continue; + + v = rdmsr(P4_PERFCTR_MSR_FIRST + i - 1); + + pmc_interrupted = 1; + + PMCDBG(MDP,INT, 2, "ri=%d v=%jx", i, v); + + /* Stop the counter, and turn off the overflow bit */ + cccrval &= ~(P4_CCCR_OVF | P4_CCCR_ENABLE); + wrmsr(P4_CCCR_MSR_FIRST + i - 1, cccrval); + + phw = pc->pc_hwpmcs[i]; + pm = phw->phw_pmc; + + /* + * Ignore de-configured or stopped PMCs. + * Also ignore counting mode PMCs that may + * have overflowed their counters. + */ + if (pm == NULL || + pm->pm_state != PMC_STATE_RUNNING || + !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) + continue; + + /* + * If the previous sample hasn't been read yet, the + * sampling interrupt is coming in too fast for the + * rest of the system to cope. Do not re-enable the + * counter. + */ + + if (P4_PCPU_SAVED_IP(pc,i,cpu)) { + atomic_add_int(&pmc_stats.pm_intr_ignored, 1); + continue; + } + + /* + * write the the reload count and restart the + * hardware. + */ + + v = P4_RELOAD_COUNT_TO_PERFCTR_VALUE( + pm->pm_sc.pm_reloadcount); + wrmsr(P4_PERFCTR_MSR_FIRST + i - 1, v); + wrmsr(P4_CCCR_MSR_FIRST + i - 1, + cccrval | P4_CCCR_ENABLE); + } + + if (pmc_interrupted) { + + /* + * On Intel CPUs, the PMC 'pcint' entry in the LAPIC + * gets masked when a PMC interrupts the CPU. We need + * to unmask this. + */ + p4_lapic_enable_pmc_interrupt(); + + /* XXX: Invoke helper (non-NMI) interrupt here */ + } + + return pmc_interrupted; } /* @@ -1410,8 +1576,6 @@ p4_describe(int cpu, int ri, struct pmc_info *pi, return error; pi->pm_class = pd->pm_descr.pd_class; - pi->pm_caps = pd->pm_descr.pd_caps; - pi->pm_width = pd->pm_descr.pd_width; if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) { pi->pm_enabled = TRUE; @@ -1456,7 +1620,9 @@ pmc_initialize_p4(struct pmc_mdep *pmc_mdep) case PMC_CPU_INTEL_PIV: pmc_mdep->pmd_npmc = P4_NPMCS; - pmc_mdep->pmd_classes[1] = PMC_CLASS_P4; + pmc_mdep->pmd_classes[1].pm_class = PMC_CLASS_P4; + pmc_mdep->pmd_classes[1].pm_caps = P4_PMC_CAPS; + pmc_mdep->pmd_classes[1].pm_width = 40; pmc_mdep->pmd_nclasspmcs[1] = 18; pmc_mdep->pmd_init = p4_init; @@ -1466,6 +1632,7 @@ pmc_initialize_p4(struct pmc_mdep *pmc_mdep) pmc_mdep->pmd_read_pmc = p4_read_pmc; pmc_mdep->pmd_write_pmc = p4_write_pmc; pmc_mdep->pmd_config_pmc = p4_config_pmc; + pmc_mdep->pmd_get_config = p4_get_config; pmc_mdep->pmd_allocate_pmc = p4_allocate_pmc; pmc_mdep->pmd_release_pmc = p4_release_pmc; pmc_mdep->pmd_start_pmc = p4_start_pmc; diff --git a/sys/dev/hwpmc/hwpmc_ppro.c b/sys/dev/hwpmc/hwpmc_ppro.c index 1bd19be6..13f9195 100644 --- a/sys/dev/hwpmc/hwpmc_ppro.c +++ b/sys/dev/hwpmc/hwpmc_ppro.c @@ -336,9 +336,15 @@ p6_switch_in(struct pmc_cpu *pc, struct pmc_process *pp) { (void) pc; + PMCDBG(MDP,SWI,1, "pc=%p pp=%p enable-msr=%d", pc, pp, + pp->pp_flags & PMC_PP_ENABLE_MSR_ACCESS); + /* allow the RDPMC instruction if needed */ - if (pp->pp_flags & PMC_FLAG_ENABLE_MSR_ACCESS) + if (pp->pp_flags & PMC_PP_ENABLE_MSR_ACCESS) load_cr4(rcr4() | CR4_PCE); + + PMCDBG(MDP,SWI,1, "cr4=0x%x", rcr4()); + return 0; } @@ -348,8 +354,10 @@ p6_switch_out(struct pmc_cpu *pc, struct pmc_process *pp) (void) pc; (void) pp; /* can be NULL */ + PMCDBG(MDP,SWO,1, "pc=%p pp=%p cr4=0x%x", pc, pp, rcr4()); + /* always turn off the RDPMC instruction */ - load_cr4(rcr4() & ~CR4_PCE); + load_cr4(rcr4() & ~CR4_PCE); return 0; } @@ -373,7 +381,7 @@ p6_read_pmc(int cpu, int ri, pmc_value_t *v) return 0; tmp = rdmsr(pd->pm_pmc_msr) & P6_PERFCTR_MASK; - if (PMC_IS_SAMPLING_MODE(pm->pm_mode)) + if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) *v = -tmp; else *v = tmp; @@ -404,7 +412,7 @@ p6_write_pmc(int cpu, int ri, pmc_value_t v) PMCDBG(MDP,WRI,1, "p6-write cpu=%d ri=%d msr=0x%x v=%jx", cpu, ri, pd->pm_pmc_msr, v); - if (PMC_IS_SAMPLING_MODE(pm->pm_mode)) + if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) v = -v; wrmsr(pd->pm_pmc_msr, v & P6_PERFCTR_MASK); @@ -426,6 +434,19 @@ p6_config_pmc(int cpu, int ri, struct pmc *pm) } /* + * Retrieve a configured PMC pointer from hardware state. + */ + +static int +p6_get_config(int cpu, int ri, struct pmc **ppm) +{ + *ppm = pmc_pcpu[cpu]->pc_hwpmcs[ri]->phw_pmc; + + return 0; +} + + +/* * A pmc may be allocated to a given row index if: * - the event is valid for this CPU * - the event is valid for this counter index @@ -454,7 +475,7 @@ p6_allocate_pmc(int cpu, int ri, struct pmc *pm, pm->pm_caps); /* check class */ - if (pd->pm_descr.pd_class != pm->pm_class) + if (pd->pm_descr.pd_class != a->pm_class) return EINVAL; /* check requested capabilities */ @@ -675,8 +696,6 @@ p6_describe(int cpu, int ri, struct pmc_info *pi, return error; pi->pm_class = pd->pm_descr.pd_class; - pi->pm_caps = pd->pm_descr.pd_caps; - pi->pm_width = pd->pm_descr.pd_width; if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) { pi->pm_enabled = TRUE; @@ -695,7 +714,7 @@ p6_get_msr(int ri, uint32_t *msr) KASSERT(ri >= 0 && ri < P6_NPMCS, ("[p6,%d ri %d out of range", __LINE__, ri)); - *msr = p6_pmcdesc[ri].pm_pmc_msr; + *msr = p6_pmcdesc[ri].pm_pmc_msr - P6_MSR_PERFCTR0; return 0; } @@ -722,7 +741,9 @@ pmc_initialize_p6(struct pmc_mdep *pmc_mdep) p6_cputype = pmc_mdep->pmd_cputype; pmc_mdep->pmd_npmc = P6_NPMCS; - pmc_mdep->pmd_classes[1] = PMC_CLASS_P6; + pmc_mdep->pmd_classes[1].pm_class = PMC_CLASS_P6; + pmc_mdep->pmd_classes[1].pm_caps = P6_PMC_CAPS; + pmc_mdep->pmd_classes[1].pm_width = 40; pmc_mdep->pmd_nclasspmcs[1] = 2; pmc_mdep->pmd_init = p6_init; @@ -732,6 +753,7 @@ pmc_initialize_p6(struct pmc_mdep *pmc_mdep) pmc_mdep->pmd_read_pmc = p6_read_pmc; pmc_mdep->pmd_write_pmc = p6_write_pmc; pmc_mdep->pmd_config_pmc = p6_config_pmc; + pmc_mdep->pmd_get_config = p6_get_config; pmc_mdep->pmd_allocate_pmc = p6_allocate_pmc; pmc_mdep->pmd_release_pmc = p6_release_pmc; pmc_mdep->pmd_start_pmc = p6_start_pmc; |