summaryrefslogtreecommitdiffstats
path: root/sys/dev/hwpmc
diff options
context:
space:
mode:
Diffstat (limited to 'sys/dev/hwpmc')
-rw-r--r--sys/dev/hwpmc/hwpmc_amd.c42
-rw-r--r--sys/dev/hwpmc/hwpmc_intel.c4
-rw-r--r--sys/dev/hwpmc/hwpmc_mod.c398
-rw-r--r--sys/dev/hwpmc/hwpmc_piv.c437
-rw-r--r--sys/dev/hwpmc/hwpmc_ppro.c40
5 files changed, 625 insertions, 296 deletions
diff --git a/sys/dev/hwpmc/hwpmc_amd.c b/sys/dev/hwpmc/hwpmc_amd.c
index 158dc0b..cd3db04 100644
--- a/sys/dev/hwpmc/hwpmc_amd.c
+++ b/sys/dev/hwpmc/hwpmc_amd.c
@@ -360,7 +360,7 @@ amd_read_pmc(int cpu, int ri, pmc_value_t *v)
("[amd,%d] No owner for HWPMC [cpu%d,pmc%d]", __LINE__,
cpu, ri));
- mode = pm->pm_mode;
+ mode = PMC_TO_MODE(pm);
PMCDBG(MDP,REA,1,"amd-read id=%d class=%d", ri, pd->pm_descr.pd_class);
@@ -413,7 +413,7 @@ amd_write_pmc(int cpu, int ri, pmc_value_t v)
("[amd,%d] PMC not owned (cpu%d,pmc%d)", __LINE__,
cpu, ri));
- mode = pm->pm_mode;
+ mode = PMC_TO_MODE(pm);
if (pd->pm_descr.pd_class == PMC_CLASS_TSC)
return 0;
@@ -461,6 +461,18 @@ amd_config_pmc(int cpu, int ri, struct pmc *pm)
}
/*
+ * Retrieve a configured PMC pointer from hardware state.
+ */
+
+static int
+amd_get_config(int cpu, int ri, struct pmc **ppm)
+{
+ *ppm = pmc_pcpu[cpu]->pc_hwpmcs[ri]->phw_pmc;
+
+ return 0;
+}
+
+/*
* Machine dependent actions taken during the context switch in of a
* thread.
*/
@@ -471,10 +483,10 @@ amd_switch_in(struct pmc_cpu *pc, struct pmc_process *pp)
(void) pc;
PMCDBG(MDP,SWI,1, "pc=%p pp=%p enable-msr=%d", pc, pp,
- (pp->pp_flags & PMC_FLAG_ENABLE_MSR_ACCESS) != 0);
+ (pp->pp_flags & PMC_PP_ENABLE_MSR_ACCESS) != 0);
/* enable the RDPMC instruction if needed */
- if (pp->pp_flags & PMC_FLAG_ENABLE_MSR_ACCESS)
+ if (pp->pp_flags & PMC_PP_ENABLE_MSR_ACCESS)
load_cr4(rcr4() | CR4_PCE);
return 0;
@@ -492,7 +504,7 @@ amd_switch_out(struct pmc_cpu *pc, struct pmc_process *pp)
(void) pp; /* can be NULL */
PMCDBG(MDP,SWO,1, "pc=%p pp=%p enable-msr=%d", pc, pp, pp ?
- (pp->pp_flags & PMC_FLAG_ENABLE_MSR_ACCESS) == 1 : 0);
+ (pp->pp_flags & PMC_PP_ENABLE_MSR_ACCESS) == 1 : 0);
/* always turn off the RDPMC instruction */
load_cr4(rcr4() & ~CR4_PCE);
@@ -523,7 +535,7 @@ amd_allocate_pmc(int cpu, int ri, struct pmc *pm,
pd = &amd_pmcdesc[ri].pm_descr;
/* check class match */
- if (pd->pd_class != pm->pm_class)
+ if (pd->pd_class != a->pm_class)
return EINVAL;
caps = pm->pm_caps;
@@ -765,7 +777,7 @@ amd_intr(int cpu, uintptr_t eip)
continue;
}
- mode = pm->pm_mode;
+ mode = PMC_TO_MODE(pm);
if (PMC_IS_SAMPLING_MODE(mode) &&
AMD_PMC_HAS_OVERFLOWED(perfctr)) {
atomic_add_int(&pmc_stats.pm_intr_processed, 1);
@@ -803,8 +815,6 @@ amd_describe(int cpu, int ri, struct pmc_info *pi, struct pmc **ppmc)
return error;
pi->pm_class = pd->pm_descr.pd_class;
- pi->pm_caps = pd->pm_descr.pd_caps;
- pi->pm_width = pd->pm_descr.pd_width;
if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) {
pi->pm_enabled = TRUE;
@@ -982,8 +992,17 @@ pmc_amd_initialize(void)
/* this processor has two classes of usable PMCs */
pmc_mdep->pmd_nclass = 2;
- pmc_mdep->pmd_classes[0] = PMC_CLASS_TSC;
- pmc_mdep->pmd_classes[1] = AMD_PMC_CLASS;
+
+ /* TSC */
+ pmc_mdep->pmd_classes[0].pm_class = PMC_CLASS_TSC;
+ pmc_mdep->pmd_classes[0].pm_caps = PMC_CAP_READ;
+ pmc_mdep->pmd_classes[0].pm_width = 64;
+
+ /* AMD K7/K8 PMCs */
+ pmc_mdep->pmd_classes[1].pm_class = AMD_PMC_CLASS;
+ pmc_mdep->pmd_classes[1].pm_caps = AMD_PMC_CAPS;
+ pmc_mdep->pmd_classes[1].pm_width = 48;
+
pmc_mdep->pmd_nclasspmcs[0] = 1;
pmc_mdep->pmd_nclasspmcs[1] = (AMD_NPMCS-1);
@@ -994,6 +1013,7 @@ pmc_amd_initialize(void)
pmc_mdep->pmd_read_pmc = amd_read_pmc;
pmc_mdep->pmd_write_pmc = amd_write_pmc;
pmc_mdep->pmd_config_pmc = amd_config_pmc;
+ pmc_mdep->pmd_get_config = amd_get_config;
pmc_mdep->pmd_allocate_pmc = amd_allocate_pmc;
pmc_mdep->pmd_release_pmc = amd_release_pmc;
pmc_mdep->pmd_start_pmc = amd_start_pmc;
diff --git a/sys/dev/hwpmc/hwpmc_intel.c b/sys/dev/hwpmc/hwpmc_intel.c
index 3f6f330..ea2c6c8 100644
--- a/sys/dev/hwpmc/hwpmc_intel.c
+++ b/sys/dev/hwpmc/hwpmc_intel.c
@@ -92,7 +92,9 @@ pmc_intel_initialize(void)
pmc_mdep->pmd_cputype = cputype;
pmc_mdep->pmd_nclass = 2;
- pmc_mdep->pmd_classes[0] = PMC_CLASS_TSC;
+ pmc_mdep->pmd_classes[0].pm_class = PMC_CLASS_TSC;
+ pmc_mdep->pmd_classes[0].pm_caps = PMC_CAP_READ;
+ pmc_mdep->pmd_classes[0].pm_width = 64;
pmc_mdep->pmd_nclasspmcs[0] = 1;
error = 0;
diff --git a/sys/dev/hwpmc/hwpmc_mod.c b/sys/dev/hwpmc/hwpmc_mod.c
index 00cf9fa..36e4761 100644
--- a/sys/dev/hwpmc/hwpmc_mod.c
+++ b/sys/dev/hwpmc/hwpmc_mod.c
@@ -151,12 +151,14 @@ static struct pmc *pmc_allocate_pmc_descriptor(void);
static struct pmc *pmc_find_pmc_descriptor_in_process(struct pmc_owner *po,
pmc_id_t pmc);
static void pmc_release_pmc_descriptor(struct pmc *pmc);
-static int pmc_can_allocate_rowindex(struct proc *p, unsigned int ri);
+static int pmc_can_allocate_rowindex(struct proc *p, unsigned int ri,
+ int cpu);
static struct pmc_process *pmc_find_process_descriptor(struct proc *p,
uint32_t mode);
static void pmc_remove_process_descriptor(struct pmc_process *pp);
static struct pmc_owner *pmc_find_owner_descriptor(struct proc *p);
static int pmc_find_pmc(pmc_id_t pmcid, struct pmc **pm);
+static void pmc_force_context_switch(void);
static void pmc_remove_owner(struct pmc_owner *po);
static void pmc_maybe_remove_owner(struct pmc_owner *po);
static void pmc_unlink_target_process(struct pmc *pmc,
@@ -364,6 +366,7 @@ pmc_debugflags_parse(char *newstr, char *fence)
CMP_SET_FLAG_MIN("cfg", CFG);
CMP_SET_FLAG_MIN("sta", STA);
CMP_SET_FLAG_MIN("sto", STO);
+ CMP_SET_FLAG_MIN("int", INT);
CMP_SET_FLAG_MIN("bnd", BND);
CMP_SET_FLAG_MIN("sel", SEL);
else /* unrecognized keyword */
@@ -573,6 +576,27 @@ pmc_select_cpu(int cpu)
}
/*
+ * Force a context switch.
+ *
+ * We do this by tsleep'ing for 1 tick -- invoking mi_switch() is not
+ * guaranteed to force a context switch.
+ */
+
+static void
+pmc_force_context_switch(void)
+{
+ u_char curpri;
+
+ mtx_lock_spin(&sched_lock);
+ curpri = curthread->td_priority;
+ mtx_unlock_spin(&sched_lock);
+
+ (void) tsleep((void *) pmc_force_context_switch, curpri,
+ "pmcctx", 1);
+
+}
+
+/*
* Update the per-pmc histogram
*/
@@ -671,7 +695,7 @@ pmc_remove_owner(struct pmc_owner *po)
* XXX rework needed.
*/
- if (po->po_flags & PMC_FLAG_OWNS_LOGFILE)
+ if (po->po_flags & PMC_PO_OWNS_LOGFILE)
pmc_configure_log(po, -1);
}
@@ -693,7 +717,7 @@ pmc_maybe_remove_owner(struct pmc_owner *po)
*/
if (LIST_EMPTY(&po->po_pmcs) &&
- ((po->po_flags & PMC_FLAG_OWNS_LOGFILE) == 0)) {
+ ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0)) {
pmc_remove_owner(po);
FREE(po, M_PMC);
}
@@ -718,7 +742,7 @@ pmc_link_target_process(struct pmc *pm, struct pmc_process *pp)
("[pmc,%d] Illegal reference count %d for process record %p",
__LINE__, pp->pp_refcnt, (void *) pp));
- ri = pm->pm_rowindex;
+ ri = PMC_TO_ROWINDEX(pm);
PMCDBG(PRC,TLK,1, "link-target pmc=%p ri=%d pmc-process=%p",
pm, ri, pp);
@@ -740,12 +764,10 @@ pmc_link_target_process(struct pmc *pm, struct pmc_process *pp)
atomic_store_rel_ptr(&pp->pp_pmcs[ri].pp_pmc, pm);
if (pm->pm_owner->po_owner == pp->pp_proc)
- pp->pp_flags |= PMC_FLAG_ENABLE_MSR_ACCESS;
+ pm->pm_flags |= PMC_F_ATTACHED_TO_OWNER;
pp->pp_refcnt++;
- PMCDBG(PRC,TLK,2, "enable-msr %d",
- (pp->pp_flags & PMC_FLAG_ENABLE_MSR_ACCESS) != 0);
}
/*
@@ -767,7 +789,7 @@ pmc_unlink_target_process(struct pmc *pm, struct pmc_process *pp)
("[pmc,%d] Illegal ref count %d on process record %p",
__LINE__, pp->pp_refcnt, (void *) pp));
- ri = pm->pm_rowindex;
+ ri = PMC_TO_ROWINDEX(pm);
PMCDBG(PRC,TUL,1, "unlink-target pmc=%p ri=%d pmc-process=%p",
pm, ri, pp);
@@ -779,8 +801,11 @@ pmc_unlink_target_process(struct pmc *pm, struct pmc_process *pp)
pp->pp_pmcs[ri].pp_pmc = NULL;
pp->pp_pmcs[ri].pp_pmcval = (pmc_value_t) 0;
- if (pm->pm_owner->po_owner == pp->pp_proc)
- pp->pp_flags &= ~PMC_FLAG_ENABLE_MSR_ACCESS;
+ /* Remove owner-specific flags */
+ if (pm->pm_owner->po_owner == pp->pp_proc) {
+ pp->pp_flags &= ~PMC_PP_ENABLE_MSR_ACCESS;
+ pm->pm_flags &= ~PMC_F_ATTACHED_TO_OWNER;
+ }
pp->pp_refcnt--;
@@ -792,9 +817,6 @@ pmc_unlink_target_process(struct pmc *pm, struct pmc_process *pp)
KASSERT(ptgt != NULL, ("[pmc,%d] process %p (pp: %p) not found "
"in pmc %p", __LINE__, pp->pp_proc, pp, pm));
- PMCDBG(PRC,TUL,4, "unlink ptgt=%p, enable-msr=%d", ptgt,
- (pp->pp_flags & PMC_FLAG_ENABLE_MSR_ACCESS) != 0);
-
LIST_REMOVE(ptgt, pt_next);
FREE(ptgt, M_PMC);
}
@@ -897,7 +919,7 @@ pmc_attach_one_process(struct proc *p, struct pmc *pm)
sx_assert(&pmc_sx, SX_XLOCKED);
PMCDBG(PRC,ATT,2, "attach-one pm=%p ri=%d proc=%p (%d, %s)", pm,
- pm->pm_rowindex, p, p->p_pid, p->p_comm);
+ PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm);
/*
* Locate the process descriptor corresponding to process 'p',
@@ -910,7 +932,7 @@ pmc_attach_one_process(struct proc *p, struct pmc *pm)
* process descriptor and PMC.
*/
- ri = pm->pm_rowindex;
+ ri = PMC_TO_ROWINDEX(pm);
if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_ALLOCATE)) == NULL)
return ENOMEM;
@@ -944,7 +966,16 @@ pmc_attach_process(struct proc *p, struct pmc *pm)
sx_assert(&pmc_sx, SX_XLOCKED);
PMCDBG(PRC,ATT,1, "attach pm=%p ri=%d proc=%p (%d, %s)", pm,
- pm->pm_rowindex, p, p->p_pid, p->p_comm);
+ PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm);
+
+
+ /*
+ * If this PMC successfully allowed a GETMSR operation
+ * in the past, disallow further ATTACHes.
+ */
+
+ if ((pm->pm_flags & PMC_PP_ENABLE_MSR_ACCESS) != 0)
+ return EPERM;
if ((pm->pm_flags & PMC_F_DESCENDANTS) == 0)
return pmc_attach_one_process(p, pm);
@@ -999,10 +1030,10 @@ pmc_detach_one_process(struct proc *p, struct pmc *pm, int flags)
KASSERT(pm != NULL,
("[pmc,%d] null pm pointer", __LINE__));
- PMCDBG(PRC,ATT,2, "detach-one pm=%p ri=%d proc=%p (%d, %s) flags=0x%x",
- pm, pm->pm_rowindex, p, p->p_pid, p->p_comm, flags);
+ ri = PMC_TO_ROWINDEX(pm);
- ri = pm->pm_rowindex;
+ PMCDBG(PRC,ATT,2, "detach-one pm=%p ri=%d proc=%p (%d, %s) flags=0x%x",
+ pm, ri, p, p->p_pid, p->p_comm, flags);
if ((pp = pmc_find_process_descriptor(p, 0)) == NULL)
return ESRCH;
@@ -1049,7 +1080,7 @@ pmc_detach_process(struct proc *p, struct pmc *pm)
sx_assert(&pmc_sx, SX_XLOCKED);
PMCDBG(PRC,ATT,1, "detach pm=%p ri=%d proc=%p (%d, %s)", pm,
- pm->pm_rowindex, p, p->p_pid, p->p_comm);
+ PMC_TO_ROWINDEX(pm), p, p->p_pid, p->p_comm);
if ((pm->pm_flags & PMC_F_DESCENDANTS) == 0)
return pmc_detach_one_process(p, pm, PMC_FLAG_REMOVE);
@@ -1131,7 +1162,6 @@ pmc_hook_handler(struct thread *td, int function, void *arg)
int cpu;
unsigned int ri;
struct pmc *pm;
- struct pmc_hw *phw;
struct pmc_process *pp;
struct pmc_owner *po;
struct proc *p;
@@ -1183,22 +1213,22 @@ pmc_hook_handler(struct thread *td, int function, void *arg)
* state similar to the CSW_OUT code.
*/
- phw = pmc_pcpu[cpu]->pc_hwpmcs[ri];
- pm = phw->phw_pmc;
+ pm = NULL;
+ (void) (*md->pmd_get_config)(cpu, ri, &pm);
PMCDBG(PRC,EXT,2, "ri=%d pm=%p", ri, pm);
if (pm == NULL ||
- !PMC_IS_VIRTUAL_MODE(pm->pm_mode))
+ !PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)))
continue;
PMCDBG(PRC,EXT,2, "ppmcs[%d]=%p pm=%p "
"state=%d", ri, pp->pp_pmcs[ri].pp_pmc,
pm, pm->pm_state);
- KASSERT(pm->pm_rowindex == ri,
+ KASSERT(PMC_TO_ROWINDEX(pm) == ri,
("[pmc,%d] ri mismatch pmc(%d) ri(%d)",
- __LINE__, pm->pm_rowindex, ri));
+ __LINE__, PMC_TO_ROWINDEX(pm), ri));
KASSERT(pm == pp->pp_pmcs[ri].pp_pmc,
("[pmc,%d] pm %p != pp_pmcs[%d] %p",
@@ -1222,10 +1252,11 @@ pmc_hook_handler(struct thread *td, int function, void *arg)
mtx_pool_unlock_spin(pmc_mtxpool, pm);
}
+ atomic_subtract_rel_32(&pm->pm_runcount,1);
+
KASSERT((int) pm->pm_runcount >= 0,
("[pmc,%d] runcount is %d", __LINE__, ri));
- atomic_subtract_rel_32(&pm->pm_runcount,1);
(void) md->pmd_config_pmc(cpu, ri, NULL);
}
@@ -1254,6 +1285,7 @@ pmc_hook_handler(struct thread *td, int function, void *arg)
FREE(pp, M_PMC);
+
} else
critical_exit(); /* pp == NULL */
@@ -1445,13 +1477,13 @@ pmc_hook_handler(struct thread *td, int function, void *arg)
if ((pm = pp->pp_pmcs[ri].pp_pmc) == NULL)
continue;
- KASSERT(PMC_IS_VIRTUAL_MODE(pm->pm_mode),
+ KASSERT(PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)),
("[pmc,%d] Target PMC in non-virtual mode (%d)",
- __LINE__, pm->pm_mode));
+ __LINE__, PMC_TO_MODE(pm)));
- KASSERT(pm->pm_rowindex == ri,
+ KASSERT(PMC_TO_ROWINDEX(pm) == ri,
("[pmc,%d] Row index mismatch pmc %d != ri %d",
- __LINE__, pm->pm_rowindex, ri));
+ __LINE__, PMC_TO_ROWINDEX(pm), ri));
/*
* Only PMCs that are marked as 'RUNNING' need
@@ -1510,7 +1542,6 @@ pmc_hook_handler(struct thread *td, int function, void *arg)
struct pmc *pm;
struct proc *p;
struct pmc_cpu *pc;
- struct pmc_hw *phw;
struct pmc_process *pp;
pmc_value_t newvalue, tmp;
@@ -1560,18 +1591,18 @@ pmc_hook_handler(struct thread *td, int function, void *arg)
for (ri = 0; ri < md->pmd_npmc; ri++) {
- phw = pc->pc_hwpmcs[ri];
- pm = phw->phw_pmc;
+ pm = NULL;
+ (void) (*md->pmd_get_config)(cpu, ri, &pm);
if (pm == NULL) /* nothing at this row index */
continue;
- if (!PMC_IS_VIRTUAL_MODE(pm->pm_mode))
+ if (!PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)))
continue; /* not a process virtual PMC */
- KASSERT(pm->pm_rowindex == ri,
+ KASSERT(PMC_TO_ROWINDEX(pm) == ri,
("[pmc,%d] ri mismatch pmc(%d) ri(%d)",
- __LINE__, pm->pm_rowindex, ri));
+ __LINE__, PMC_TO_ROWINDEX(pm), ri));
/* Stop hardware */
md->pmd_stop_pmc(cpu, ri);
@@ -1838,7 +1869,7 @@ pmc_release_pmc_descriptor(struct pmc *pm)
volatile int maxloop;
#endif
u_int ri, cpu;
- u_char curpri;
+ enum pmc_mode mode;
struct pmc_hw *phw;
struct pmc_process *pp;
struct pmc_target *ptgt, *tmp;
@@ -1848,16 +1879,17 @@ pmc_release_pmc_descriptor(struct pmc *pm)
KASSERT(pm, ("[pmc,%d] null pmc", __LINE__));
- ri = pm->pm_rowindex;
+ ri = PMC_TO_ROWINDEX(pm);
+ mode = PMC_TO_MODE(pm);
PMCDBG(PMC,REL,1, "release-pmc pmc=%p ri=%d mode=%d", pm, ri,
- pm->pm_mode);
+ mode);
/*
* First, we take the PMC off hardware.
*/
cpu = 0;
- if (PMC_IS_SYSTEM_MODE(pm->pm_mode)) {
+ if (PMC_IS_SYSTEM_MODE(mode)) {
/*
* A system mode PMC runs on a specific CPU. Switch
@@ -1866,7 +1898,7 @@ pmc_release_pmc_descriptor(struct pmc *pm)
pmc_save_cpu_binding(&pb);
- cpu = pm->pm_gv.pm_cpu;
+ cpu = PMC_TO_CPU(pm);
if (pm->pm_state == PMC_STATE_RUNNING) {
@@ -1895,7 +1927,7 @@ pmc_release_pmc_descriptor(struct pmc *pm)
pmc_restore_cpu_binding(&pb);
- } else if (PMC_IS_VIRTUAL_MODE(pm->pm_mode)) {
+ } else if (PMC_IS_VIRTUAL_MODE(mode)) {
/*
* A virtual PMC could be running on multiple CPUs at
@@ -1924,17 +1956,11 @@ pmc_release_pmc_descriptor(struct pmc *pm)
maxloop--;
KASSERT(maxloop > 0,
("[pmc,%d] (ri%d, rc%d) waiting too long for "
- "pmc to be free", __LINE__, pm->pm_rowindex,
- pm->pm_runcount));
+ "pmc to be free", __LINE__,
+ PMC_TO_ROWINDEX(pm), pm->pm_runcount));
#endif
- mtx_lock_spin(&sched_lock);
- curpri = curthread->td_priority;
- mtx_unlock_spin(&sched_lock);
-
- (void) tsleep((void *) pmc_release_pmc_descriptor,
- curpri, "pmcrel", 1);
-
+ pmc_force_context_switch();
}
/*
@@ -1977,7 +2003,7 @@ pmc_release_pmc_descriptor(struct pmc *pm)
* Update row disposition
*/
- if (PMC_IS_SYSTEM_MODE(pm->pm_mode))
+ if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm)))
PMC_UNMARK_ROW_STANDALONE(ri);
else
PMC_UNMARK_ROW_THREAD(ri);
@@ -2007,21 +2033,20 @@ pmc_register_owner(struct proc *p, struct pmc *pmc)
if (pl == NULL)
return ENOMEM;
- if ((po = pmc_find_owner_descriptor(p)) == NULL) {
+ if ((po = pmc_find_owner_descriptor(p)) == NULL)
if ((po = pmc_allocate_owner_descriptor(p)) == NULL) {
FREE(pl, M_PMC);
return ENOMEM;
}
- po->po_flags |= PMC_FLAG_IS_OWNER; /* real owner */
- }
- if (pmc->pm_mode == PMC_MODE_TS) {
+ /* XXX is this too restrictive */
+ if (PMC_ID_TO_MODE(pmc->pm_id) == PMC_MODE_TS) {
/* can have only one TS mode PMC per process */
- if (po->po_flags & PMC_FLAG_HAS_TS_PMC) {
+ if (po->po_flags & PMC_PO_HAS_TS_PMC) {
FREE(pl, M_PMC);
return EINVAL;
}
- po->po_flags |= PMC_FLAG_HAS_TS_PMC;
+ po->po_flags |= PMC_PO_HAS_TS_PMC;
}
KASSERT(pmc->pm_owner == NULL,
@@ -2067,22 +2092,41 @@ pmc_getrowdisp(int ri)
*/
static int
-pmc_can_allocate_rowindex(struct proc *p, unsigned int ri)
+pmc_can_allocate_rowindex(struct proc *p, unsigned int ri, int cpu)
{
+ enum pmc_mode mode;
+ struct pmc *pm;
struct pmc_list *pl;
struct pmc_owner *po;
struct pmc_process *pp;
- PMCDBG(PMC,ALR,1, "can-allocate-rowindex proc=%p (%d, %s) ri=%d",
- p, p->p_pid, p->p_comm, ri);
+ PMCDBG(PMC,ALR,1, "can-allocate-rowindex proc=%p (%d, %s) ri=%d "
+ "cpu=%d", p, p->p_pid, p->p_comm, ri, cpu);
- /* we shouldn't have allocated a PMC at row index 'ri' */
+ /*
+ * We shouldn't have already allocated a process-mode PMC at
+ * row index 'ri'.
+ *
+ * We shouldn't have allocated a system-wide PMC on the same
+ * CPU and same RI.
+ */
if ((po = pmc_find_owner_descriptor(p)) != NULL)
- LIST_FOREACH(pl, &po->po_pmcs, pl_next)
- if (pl->pl_pmc->pm_rowindex == ri)
- return EEXIST;
+ LIST_FOREACH(pl, &po->po_pmcs, pl_next) {
+ pm = pl->pl_pmc;
+ if (PMC_TO_ROWINDEX(pm) == ri) {
+ mode = PMC_TO_MODE(pm);
+ if (PMC_IS_VIRTUAL_MODE(mode))
+ return EEXIST;
+ if (PMC_IS_SYSTEM_MODE(mode) &&
+ (int) PMC_TO_CPU(pm) == cpu)
+ return EEXIST;
+ }
+ }
- /* we shouldn't be the target of any PMC ourselves at this index */
+ /*
+ * We also shouldn't be the target of any PMC at this index
+ * since otherwise a PMC_ATTACH to ourselves will fail.
+ */
if ((pp = pmc_find_process_descriptor(p, 0)) != NULL)
if (pp->pp_pmcs[ri].pp_pmc)
return EEXIST;
@@ -2139,7 +2183,7 @@ pmc_can_allocate_row(int ri, enum pmc_mode mode)
}
/*
- * Find a PMC descriptor with user handle 'pmc' for thread 'td'.
+ * Find a PMC descriptor with user handle 'pmcid' for thread 'td'.
*/
static struct pmc *
@@ -2147,12 +2191,12 @@ pmc_find_pmc_descriptor_in_process(struct pmc_owner *po, pmc_id_t pmcid)
{
struct pmc_list *pl;
- KASSERT(pmcid < md->pmd_npmc,
- ("[pmc,%d] Illegal pmc index %d (max %d)", __LINE__, pmcid,
- md->pmd_npmc));
+ KASSERT(PMC_ID_TO_ROWINDEX(pmcid) < md->pmd_npmc,
+ ("[pmc,%d] Illegal pmc index %d (max %d)", __LINE__,
+ PMC_ID_TO_ROWINDEX(pmcid), md->pmd_npmc));
LIST_FOREACH(pl, &po->po_pmcs, pl_next)
- if (pl->pl_pmc->pm_rowindex == pmcid)
+ if (pl->pl_pmc->pm_id == pmcid)
return pl->pl_pmc;
return NULL;
@@ -2187,17 +2231,21 @@ static int
pmc_start(struct pmc *pm)
{
int error, cpu, ri;
+ enum pmc_mode mode;
struct pmc_binding pb;
KASSERT(pm != NULL,
("[pmc,%d] null pm", __LINE__));
- PMCDBG(PMC,OPS,1, "start pmc=%p mode=%d ri=%d", pm, pm->pm_mode,
- pm->pm_rowindex);
+ mode = PMC_TO_MODE(pm);
+ ri = PMC_TO_ROWINDEX(pm);
+ error = 0;
+
+ PMCDBG(PMC,OPS,1, "start pmc=%p mode=%d ri=%d", pm, mode, ri);
pm->pm_state = PMC_STATE_RUNNING;
- if (PMC_IS_VIRTUAL_MODE(pm->pm_mode)) {
+ if (PMC_IS_VIRTUAL_MODE(mode)) {
/*
* If a PMCATTACH hadn't been done on this
@@ -2205,32 +2253,36 @@ pmc_start(struct pmc *pm)
*/
if (LIST_EMPTY(&pm->pm_targets))
- return pmc_attach_process(pm->pm_owner->po_owner, pm);
+ error = pmc_attach_process(pm->pm_owner->po_owner, pm);
+ /*
+ * If the PMC is attached to its owner, then force a context
+ * switch to ensure that the MD state gets set correctly.
+ */
+ if (error == 0 && (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER))
+ pmc_force_context_switch();
/*
* Nothing further to be done; thread context switch code
- * will start/stop the PMC as appropriate.
+ * will start/stop the hardware as appropriate.
*/
- return 0;
+ return error;
}
/*
- * A system-mode PMC. Move to the CPU associated with this
+ * A system-wide PMC. Move to the CPU associated with this
* PMC, and start the hardware.
*/
pmc_save_cpu_binding(&pb);
- cpu = pm->pm_gv.pm_cpu;
+ cpu = PMC_TO_CPU(pm);
if (pmc_cpu_is_disabled(cpu))
return ENXIO;
- ri = pm->pm_rowindex;
-
pmc_select_cpu(cpu);
/*
@@ -2238,11 +2290,13 @@ pmc_start(struct pmc *pm)
* so write out the initial value and start the PMC.
*/
+ critical_enter();
if ((error = md->pmd_write_pmc(cpu, ri,
- PMC_IS_SAMPLING_MODE(pm->pm_mode) ?
+ PMC_IS_SAMPLING_MODE(mode) ?
pm->pm_sc.pm_reloadcount :
pm->pm_sc.pm_initial)) == 0)
error = md->pmd_start_pmc(cpu, ri);
+ critical_exit();
pmc_restore_cpu_binding(&pb);
@@ -2256,13 +2310,13 @@ pmc_start(struct pmc *pm)
static int
pmc_stop(struct pmc *pm)
{
- int error, cpu;
+ int cpu, error, ri;
struct pmc_binding pb;
KASSERT(pm != NULL, ("[pmc,%d] null pmc", __LINE__));
- PMCDBG(PMC,OPS,1, "stop pmc=%p mode=%d ri=%d", pm, pm->pm_mode,
- pm->pm_rowindex);
+ PMCDBG(PMC,OPS,1, "stop pmc=%p mode=%d ri=%d", pm,
+ PMC_TO_MODE(pm), PMC_TO_ROWINDEX(pm));
pm->pm_state = PMC_STATE_STOPPED;
@@ -2276,7 +2330,7 @@ pmc_stop(struct pmc *pm)
* switched out.
*/
- if (PMC_IS_VIRTUAL_MODE(pm->pm_mode))
+ if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)))
return 0;
/*
@@ -2288,16 +2342,22 @@ pmc_stop(struct pmc *pm)
pmc_save_cpu_binding(&pb);
- cpu = pm->pm_gv.pm_cpu;
+ cpu = PMC_TO_CPU(pm);
+
+ KASSERT(cpu >= 0 && cpu < mp_ncpus,
+ ("[pmc,%d] illegal cpu=%d", __LINE__, cpu));
if (pmc_cpu_is_disabled(cpu))
return ENXIO;
pmc_select_cpu(cpu);
- if ((error = md->pmd_stop_pmc(cpu, pm->pm_rowindex)) == 0)
- error = md->pmd_read_pmc(cpu, pm->pm_rowindex,
- &pm->pm_sc.pm_initial);
+ ri = PMC_TO_ROWINDEX(pm);
+
+ critical_enter();
+ if ((error = md->pmd_stop_pmc(cpu, ri)) == 0)
+ error = md->pmd_read_pmc(cpu, ri, &pm->pm_sc.pm_initial);
+ critical_exit();
pmc_restore_cpu_binding(&pb);
@@ -2396,11 +2456,11 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
struct pmc_op_getcpuinfo gci;
gci.pm_cputype = md->pmd_cputype;
+ gci.pm_ncpu = mp_ncpus;
gci.pm_npmc = md->pmd_npmc;
gci.pm_nclass = md->pmd_nclass;
bcopy(md->pmd_classes, &gci.pm_classes,
sizeof(gci.pm_classes));
- gci.pm_ncpu = mp_ncpus;
error = copyout(&gci, arg, sizeof(gci));
}
break;
@@ -2499,11 +2559,11 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
__LINE__));
p->pm_ownerpid = po->po_owner->p_pid;
- p->pm_mode = pm->pm_mode;
+ p->pm_mode = PMC_TO_MODE(pm);
p->pm_event = pm->pm_event;
p->pm_flags = pm->pm_flags;
- if (PMC_IS_SAMPLING_MODE(pm->pm_mode))
+ if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
p->pm_reloadcount =
pm->pm_sc.pm_reloadcount;
}
@@ -2628,6 +2688,7 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
int n;
enum pmc_mode mode;
struct pmc *pmc;
+ struct pmc_hw *phw;
struct pmc_op_pmcallocate pa;
struct pmc_binding pb;
@@ -2708,10 +2769,10 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
pa.pm_ev, caps, mode, cpu);
pmc = pmc_allocate_pmc_descriptor();
+ pmc->pm_id = PMC_ID_MAKE_ID(cpu,pa.pm_mode,pa.pm_class,
+ PMC_ID_INVALID);
pmc->pm_event = pa.pm_ev;
- pmc->pm_class = pa.pm_class;
pmc->pm_state = PMC_STATE_FREE;
- pmc->pm_mode = mode;
pmc->pm_caps = caps;
pmc->pm_flags = pa.pm_flags;
@@ -2729,7 +2790,7 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
for (n = 0; n < (int) md->pmd_npmc; n++)
if (pmc_can_allocate_row(n, mode) == 0 &&
pmc_can_allocate_rowindex(
- curthread->td_proc, n) == 0 &&
+ curthread->td_proc, n, cpu) == 0 &&
(PMC_IS_UNALLOCATED(cpu, n) ||
PMC_IS_SHAREABLE_PMC(cpu, n)) &&
md->pmd_allocate_pmc(cpu, n, pmc,
@@ -2740,7 +2801,8 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
for (n = 0; n < (int) md->pmd_npmc; n++) {
if (pmc_can_allocate_row(n, mode) == 0 &&
pmc_can_allocate_rowindex(
- curthread->td_proc, n) == 0 &&
+ curthread->td_proc, n,
+ PMC_CPU_ANY) == 0 &&
md->pmd_allocate_pmc(curthread->td_oncpu,
n, pmc, &pa) == 0)
break;
@@ -2760,27 +2822,37 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
break;
}
- PMCDBG(PMC,ALL,2, "ev=%d class=%d mode=%d -> n=%d",
- pmc->pm_event, pmc->pm_class, pmc->pm_mode, n);
+ /* Fill in the correct value in the ID field */
+ pmc->pm_id = PMC_ID_MAKE_ID(cpu,mode,pa.pm_class,n);
+
+ PMCDBG(PMC,ALL,2, "ev=%d class=%d mode=%d n=%d -> pmcid=%x",
+ pmc->pm_event, pa.pm_class, mode, n, pmc->pm_id);
/*
* Configure global pmc's immediately
*/
- if (PMC_IS_SYSTEM_MODE(pmc->pm_mode))
- if ((error = md->pmd_config_pmc(cpu, n, pmc)) != 0) {
+ if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pmc))) {
+
+ pmc_save_cpu_binding(&pb);
+ pmc_select_cpu(cpu);
+
+ phw = pmc_pcpu[cpu]->pc_hwpmcs[n];
+
+ if ((phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) == 0 ||
+ (error = md->pmd_config_pmc(cpu, n, pmc)) != 0) {
(void) md->pmd_release_pmc(cpu, n, pmc);
pmc_destroy_pmc_descriptor(pmc);
FREE(pmc, M_PMC);
pmc = NULL;
+ pmc_restore_cpu_binding(&pb);
+ error = EPERM;
break;
}
- /*
- * Mark the row index allocated.
- */
+ pmc_restore_cpu_binding(&pb);
+ }
- pmc->pm_rowindex = n;
pmc->pm_state = PMC_STATE_ALLOCATED;
/*
@@ -2793,14 +2865,6 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
PMC_MARK_ROW_THREAD(n);
/*
- * If this is a system-wide CPU, mark the CPU it
- * was allocated on.
- */
-
- if (PMC_IS_SYSTEM_MODE(mode))
- pmc->pm_gv.pm_cpu = cpu;
-
- /*
* Register this PMC with the current thread as its owner.
*/
@@ -2816,7 +2880,7 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
* Return the allocated index.
*/
- pa.pm_pmcid = n;
+ pa.pm_pmcid = pmc->pm_id;
error = copyout(&pa, arg, sizeof(pa));
}
@@ -2847,7 +2911,7 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
if ((error = pmc_find_pmc(a.pm_pmc, &pm)) != 0)
break;
- if (PMC_IS_SYSTEM_MODE(pm->pm_mode)) {
+ if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
error = EINVAL;
break;
}
@@ -3022,19 +3086,43 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
break;
}
- if (PMC_IS_VIRTUAL_MODE(pm->pm_mode)) {
+ if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) {
+
+ /*
+ * If this PMC is attached to its owner (i.e.,
+ * the process requesting this operation) and
+ * is running, then attempt to get an
+ * upto-date reading from hardware for a READ.
+ * Writes are only allowed when the PMC is
+ * stopped, so only update the saved value
+ * field.
+ *
+ * If the PMC is not running, or is not
+ * attached to its owner, read/write to the
+ * savedvalue field.
+ */
+
+ ri = PMC_TO_ROWINDEX(pm);
- /* read/write the saved value in the PMC record */
mtx_pool_lock_spin(pmc_mtxpool, pm);
- if (prw.pm_flags & PMC_F_OLDVALUE)
- oldvalue = pm->pm_gv.pm_savedvalue;
+ cpu = curthread->td_oncpu;
+
+ if (prw.pm_flags & PMC_F_OLDVALUE) {
+ if ((pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) &&
+ (pm->pm_state == PMC_STATE_RUNNING))
+ error = (*md->pmd_read_pmc)(cpu, ri,
+ &oldvalue);
+ else
+ oldvalue = pm->pm_gv.pm_savedvalue;
+ }
if (prw.pm_flags & PMC_F_NEWVALUE)
pm->pm_gv.pm_savedvalue = prw.pm_value;
+
mtx_pool_unlock_spin(pmc_mtxpool, pm);
} else { /* System mode PMCs */
- cpu = pm->pm_gv.pm_cpu;
- ri = pm->pm_rowindex;
+ cpu = PMC_TO_CPU(pm);
+ ri = PMC_TO_ROWINDEX(pm);
if (pmc_cpu_is_disabled(cpu)) {
error = ENXIO;
@@ -3045,6 +3133,7 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
pmc_save_cpu_binding(&pb);
pmc_select_cpu(cpu);
+ critical_enter();
/* save old value */
if (prw.pm_flags & PMC_F_OLDVALUE)
if ((error = (*md->pmd_read_pmc)(cpu, ri,
@@ -3055,6 +3144,7 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
error = (*md->pmd_write_pmc)(cpu, ri,
prw.pm_value);
error:
+ critical_exit();
pmc_restore_cpu_binding(&pb);
if (error)
break;
@@ -3114,7 +3204,7 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
break;
}
- if (PMC_IS_SAMPLING_MODE(pm->pm_mode))
+ if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
pm->pm_sc.pm_reloadcount = sc.pm_count;
else
pm->pm_sc.pm_initial = sc.pm_count;
@@ -3142,9 +3232,9 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
if ((error = pmc_find_pmc(pmcid, &pm)) != 0)
break;
- KASSERT(pmcid == pm->pm_rowindex,
- ("[pmc,%d] row index %d != id %d", __LINE__,
- pm->pm_rowindex, pmcid));
+ KASSERT(pmcid == pm->pm_id,
+ ("[pmc,%d] pmcid %x != id %x", __LINE__,
+ pm->pm_id, pmcid));
if (pm->pm_state == PMC_STATE_RUNNING) /* already running */
break;
@@ -3184,9 +3274,9 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
if ((error = pmc_find_pmc(pmcid, &pm)) != 0)
break;
- KASSERT(pmcid == pm->pm_rowindex,
- ("[pmc,%d] row index %d != pmcid %d", __LINE__,
- pm->pm_rowindex, pmcid));
+ KASSERT(pmcid == pm->pm_id,
+ ("[pmc,%d] pmc id %x != pmcid %x", __LINE__,
+ pm->pm_id, pmcid));
if (pm->pm_state == PMC_STATE_STOPPED) /* already stopped */
break;
@@ -3234,6 +3324,7 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
{
int ri;
struct pmc *pm;
+ struct pmc_target *pt;
struct pmc_op_x86_getmsr gm;
PMC_DOWNGRADE_SX();
@@ -3251,26 +3342,53 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
break;
/*
- * The allocated PMC needs to be a process virtual PMC,
- * i.e., of type T[CS].
+ * The allocated PMC has to be a process virtual PMC,
+ * i.e., of type MODE_T[CS]. Global PMCs can only be
+ * read using the PMCREAD operation since they may be
+ * allocated on a different CPU than the one we could
+ * be running on at the time of the RDPMC instruction.
*
- * Global PMCs can only be read using the PMCREAD
- * operation since they may be allocated on a
- * different CPU than the one we could be running on
- * at the time of the read.
+ * The GETMSR operation is not allowed for PMCs that
+ * are inherited across processes.
*/
- if (!PMC_IS_VIRTUAL_MODE(pm->pm_mode)) {
+ if (!PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) ||
+ (pm->pm_flags & PMC_F_DESCENDANTS)) {
error = EINVAL;
break;
}
- ri = pm->pm_rowindex;
+ /*
+ * It only makes sense to use a RDPMC (or its
+ * equivalent instruction on non-x86 architectures) on
+ * a process that has allocated and attached a PMC to
+ * itself. Conversely the PMC is only allowed to have
+ * one process attached to it -- its owner.
+ */
+
+ if ((pt = LIST_FIRST(&pm->pm_targets)) == NULL ||
+ LIST_NEXT(pt, pt_next) != NULL ||
+ pt->pt_process->pp_proc != pm->pm_owner->po_owner) {
+ error = EINVAL;
+ break;
+ }
+
+ ri = PMC_TO_ROWINDEX(pm);
if ((error = (*md->pmd_get_msr)(ri, &gm.pm_msr)) < 0)
break;
+
if ((error = copyout(&gm, arg, sizeof(gm))) < 0)
break;
+
+ /*
+ * Mark our process as using MSRs. Update machine
+ * state using a forced context switch.
+ */
+
+ pt->pt_process->pp_flags |= PMC_PP_ENABLE_MSR_ACCESS;
+ pmc_force_context_switch();
+
}
break;
#endif
@@ -3314,13 +3432,13 @@ pmc_configure_log(struct pmc_owner *po, int logfd)
if (po->po_logfd >= 0 && logfd < 0) {
/* deconfigure log */
/* XXX */
- po->po_flags &= ~PMC_FLAG_OWNS_LOGFILE;
+ po->po_flags &= ~PMC_PO_OWNS_LOGFILE;
pmc_maybe_remove_owner(po);
} else if (po->po_logfd < 0 && logfd >= 0) {
/* configure log file */
/* XXX */
- po->po_flags |= PMC_FLAG_OWNS_LOGFILE;
+ po->po_flags |= PMC_PO_OWNS_LOGFILE;
/* mark process as using HWPMCs */
PROC_LOCK(p);
@@ -3530,7 +3648,7 @@ pmc_initialize(void)
printf(PMC_MODULE_NAME ":");
for (n = 0; n < (int) md->pmd_nclass; n++)
printf(" %s(%d)",
- pmc_name_of_pmcclass[md->pmd_classes[n]],
+ pmc_name_of_pmcclass[md->pmd_classes[n].pm_class],
md->pmd_nclasspmcs[n]);
printf("\n");
}
diff --git a/sys/dev/hwpmc/hwpmc_piv.c b/sys/dev/hwpmc/hwpmc_piv.c
index 67be026..e81e4e4 100644
--- a/sys/dev/hwpmc/hwpmc_piv.c
+++ b/sys/dev/hwpmc/hwpmc_piv.c
@@ -35,7 +35,7 @@ __FBSDID("$FreeBSD$");
#include <sys/smp.h>
#include <sys/systm.h>
-#include <machine/cputypes.h>
+#include <machine/apicreg.h>
#include <machine/md_var.h>
/*
@@ -96,12 +96,52 @@ __FBSDID("$FreeBSD$");
* - Threads of multi-threaded processes that get scheduled on the same
* physical CPU are handled correctly.
*
+ * HTT Detection
+ *
* Not all HTT capable systems will have HTT enabled since users may
* have turned HTT support off using the appropriate sysctls
- * (machdep.hlt_logical_cpus and machdep.logical_cpus_mask). We
- * detect the presence of HTT by remembering if an initialization was
- * done for a logical CPU.
+ * (machdep.hlt_logical_cpus or machdep.logical_cpus_mask). We detect
+ * the presence of HTT by remembering if 'p4_init()' was called for a
+ * logical CPU. Note that hwpmc(4) cannot deal with a change in HTT
+ * status once it is loaded.
+ *
+ * Handling HTT READ / WRITE / START / STOP
+ *
+ * PMC resources are shared across multiple logical CPUs. In each
+ * physical CPU's state we keep track of a 'runcount' which reflects
+ * the number of PMC-using processes that have been scheduled on the
+ * logical CPUs of this physical CPU. Process-mode PMC operations
+ * will actually 'start' or 'stop' hardware only if these are the
+ * first or last processes respectively to use the hardware. PMC
+ * values written by a 'write' operation are saved and are transferred
+ * to hardware at PMC 'start' time if the runcount is 0. If the
+ * runcount is greater than 0 at the time of a 'start' operation, we
+ * keep track of the actual hardware value at the time of the 'start'
+ * operation and use this to adjust the final readings at PMC 'stop'
+ * or 'read' time.
+ *
+ * Execution sequences:
+ *
+ * Case 1: CPUx +...- (no overlap)
+ * CPUy +...-
+ * RC 0 1 0 1 0
+ *
+ * Case 2: CPUx +........- (partial overlap)
+ * CPUy +........-
+ * RC 0 1 2 1 0
+ *
+ * Case 3: CPUx +..............- (fully overlapped)
+ * CPUy +.....-
+ * RC 0 1 2 1 0
+ *
+ * Here CPUx and CPUy are one of the two logical processors on a HTT CPU.
*
+ * Handling HTT CONFIG
+ *
+ * Different processes attached to the same PMC may get scheduled on
+ * the two logical processors in the package. We keep track of config
+ * and de-config operations using the CFGFLAGS fields of the per-physical
+ * cpu state.
*/
#define P4_PMCS() \
@@ -386,9 +426,11 @@ static int p4_system_has_htt;
* [19 struct pmc_hw structures]
* [45 ESCRs status bytes]
* [per-cpu spin mutex]
- * [19 flags for holding the config count and runcount]
- * [19*2 saved value fields] (Thread mode PMC support)
- * [19*2 pmc value fields] (-do-)
+ * [19 flag fields for holding config flags and a runcount]
+ * [19*2 hw value fields] (Thread mode PMC support)
+ * or
+ * [19*2 EIP values] (Sampling mode PMCs)
+ * [19*2 pmc value fields] (Thread mode PMC support))
*/
struct p4_cpu {
@@ -398,12 +440,16 @@ struct p4_cpu {
char pc_escrs[P4_NESCR];
struct mtx pc_mtx; /* spin lock */
unsigned char pc_flags[P4_NPMCS]; /* 4 bits each: {cfg,run}count */
- pmc_value_t pc_saved[P4_NPMCS * P4_NHTT];
+ union {
+ pmc_value_t pc_hw[P4_NPMCS * P4_NHTT];
+ uintptr_t pc_ip[P4_NPMCS * P4_NHTT];
+ } pc_si;
pmc_value_t pc_pmc_values[P4_NPMCS * P4_NHTT];
};
-#define P4_PCPU_SAVED_VALUE(PC,RI,CPU) (PC)->pc_saved[(RI)*((CPU) & 1)]
-#define P4_PCPU_PMC_VALUE(P,R,C) (P)->pc_pmc_values[(R)*((C) & 1)]
+#define P4_PCPU_PMC_VALUE(PC,RI,CPU) (PC)->pc_pmc_values[(RI)*((CPU) & 1)]
+#define P4_PCPU_HW_VALUE(PC,RI,CPU) (PC)->pc_si.pc_hw[(RI)*((CPU) & 1)]
+#define P4_PCPU_SAVED_IP(PC,RI,CPU) (PC)->pc_si.pc_ip[(RI)*((CPU) & 1)]
#define P4_PCPU_GET_FLAGS(PC,RI,MASK) ((PC)->pc_flags[(RI)] & (MASK))
#define P4_PCPU_SET_FLAGS(PC,RI,MASK,VAL) do { \
@@ -417,8 +463,10 @@ struct p4_cpu {
#define P4_PCPU_GET_RUNCOUNT(PC,RI) P4_PCPU_GET_FLAGS(PC,RI,0x0F)
#define P4_PCPU_SET_RUNCOUNT(PC,RI,V) P4_PCPU_SET_FLAGS(PC,RI,0x0F,V)
-#define P4_PCPU_GET_CFGCOUNT(PC,RI) (P4_PCPU_GET_FLAGS(PC,RI,0xF0) >> 4)
-#define P4_PCPU_SET_CFGCOUNT(PC,RI,C) P4_PCPU_SET_FLAGS(PC,RI,0xF0,((C) <<4))
+#define P4_PCPU_GET_CFGFLAGS(PC,RI) (P4_PCPU_GET_FLAGS(PC,RI,0xF0) >> 4)
+#define P4_PCPU_SET_CFGFLAGS(PC,RI,C) P4_PCPU_SET_FLAGS(PC,RI,0xF0,((C) <<4))
+
+#define P4_CPU_TO_FLAG(C) (pmc_cpu_is_logical(cpu) ? 0x2 : 0x1)
/* ESCR row disposition */
static int p4_escrdisp[P4_NESCR];
@@ -583,10 +631,10 @@ p4_switch_in(struct pmc_cpu *pc, struct pmc_process *pp)
(void) pc;
PMCDBG(MDP,SWI,1, "pc=%p pp=%p enable-msr=%d", pc, pp,
- (pp->pp_flags & PMC_FLAG_ENABLE_MSR_ACCESS) != 0);
+ (pp->pp_flags & PMC_PP_ENABLE_MSR_ACCESS) != 0);
/* enable the RDPMC instruction */
- if (pp->pp_flags & PMC_FLAG_ENABLE_MSR_ACCESS)
+ if (pp->pp_flags & PMC_PP_ENABLE_MSR_ACCESS)
load_cr4(rcr4() | CR4_PCE);
PMCDBG(MDP,SWI,2, "cr4=0x%x", rcr4());
@@ -642,11 +690,15 @@ p4_read_pmc(int cpu, int ri, pmc_value_t *v)
("[p4,%d] No owner for HWPMC [cpu%d,pmc%d]", __LINE__,
cpu, ri));
- mode = pm->pm_mode;
+ KASSERT(pd->pm_descr.pd_class == PMC_TO_CLASS(pm),
+ ("[p4,%d] class mismatch pd %d != id class %d", __LINE__,
+ pd->pm_descr.pd_class, PMC_TO_CLASS(pm)));
+
+ mode = PMC_TO_MODE(pm);
PMCDBG(MDP,REA,1, "p4-read cpu=%d ri=%d mode=%d", cpu, ri, mode);
- if (pd->pm_descr.pd_class == PMC_CLASS_TSC) {
+ if (PMC_TO_CLASS(pm) == PMC_CLASS_TSC) {
KASSERT(PMC_IS_COUNTING_MODE(mode),
("[p4,%d] TSC counter in non-counting mode", __LINE__));
*v = rdtsc();
@@ -657,13 +709,19 @@ p4_read_pmc(int cpu, int ri, pmc_value_t *v)
KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4,
("[p4,%d] unknown PMC class %d", __LINE__, pd->pm_descr.pd_class));
- if (PMC_IS_SYSTEM_MODE(pm->pm_mode))
- tmp = rdmsr(p4_pmcdesc[ri].pm_pmc_msr);
- else
- tmp = P4_PCPU_PMC_VALUE(pc,ri,cpu);
+ tmp = rdmsr(p4_pmcdesc[ri].pm_pmc_msr);
- if (PMC_IS_SAMPLING_MODE(mode))
- *v = -(tmp + 1); /* undo transformation */
+ if (PMC_IS_VIRTUAL_MODE(mode)) {
+ if (tmp < P4_PCPU_HW_VALUE(pc,ri,cpu)) /* 40 bit overflow */
+ tmp += (P4_PERFCTR_MASK + 1) -
+ P4_PCPU_HW_VALUE(pc,ri,cpu);
+ else
+ tmp -= P4_PCPU_HW_VALUE(pc,ri,cpu);
+ tmp += P4_PCPU_PMC_VALUE(pc,ri,cpu);
+ }
+
+ if (PMC_IS_SAMPLING_MODE(mode)) /* undo transformation */
+ *v = P4_PERFCTR_VALUE_TO_RELOAD_COUNT(tmp);
else
*v = tmp;
@@ -678,6 +736,7 @@ p4_read_pmc(int cpu, int ri, pmc_value_t *v)
static int
p4_write_pmc(int cpu, int ri, pmc_value_t v)
{
+ enum pmc_mode mode;
struct pmc *pm;
struct p4_cpu *pc;
const struct pmc_hw *phw;
@@ -697,15 +756,17 @@ p4_write_pmc(int cpu, int ri, pmc_value_t v)
("[p4,%d] No owner for HWPMC [cpu%d,pmc%d]", __LINE__,
cpu, ri));
+ mode = PMC_TO_MODE(pm);
+
PMCDBG(MDP,WRI,1, "p4-write cpu=%d ri=%d mode=%d v=%jx", cpu, ri,
- pm->pm_mode, v);
+ mode, v);
/*
* The P4's TSC register is writeable, but we don't allow a
* write as changing the TSC's value could interfere with
- * other parts of the system.
+ * timekeeping and other system functions.
*/
- if (pd->pm_descr.pd_class == PMC_CLASS_TSC)
+ if (PMC_TO_CLASS(pm) == PMC_CLASS_TSC)
return 0;
/*
@@ -713,10 +774,10 @@ p4_write_pmc(int cpu, int ri, pmc_value_t v)
* sampling mode PMCs, the value to be programmed into the PMC
* counter is -(C+1) where 'C' is the requested sample rate.
*/
- if (PMC_IS_SAMPLING_MODE(pm->pm_mode))
- v = -(v + 1);
+ if (PMC_IS_SAMPLING_MODE(mode))
+ v = P4_RELOAD_COUNT_TO_PERFCTR_VALUE(v);
- if (PMC_IS_SYSTEM_MODE(pm->pm_mode))
+ if (PMC_IS_SYSTEM_MODE(mode))
wrmsr(pd->pm_pmc_msr, v);
else
P4_PCPU_PMC_VALUE(pc,ri,cpu) = v;
@@ -730,7 +791,9 @@ p4_write_pmc(int cpu, int ri, pmc_value_t v)
* 'pm' may be NULL to indicate de-configuration.
*
* On HTT systems, a PMC may get configured twice, once for each
- * "logical" CPU.
+ * "logical" CPU. We track this using the CFGFLAGS field of the
+ * per-cpu state; this field is a bit mask with one bit each for
+ * logical CPUs 0 & 1.
*/
static int
@@ -738,7 +801,7 @@ p4_config_pmc(int cpu, int ri, struct pmc *pm)
{
struct pmc_hw *phw;
struct p4_cpu *pc;
- int cfgcount;
+ int cfgflags, cpuflag;
KASSERT(cpu >= 0 && cpu < mp_ncpus,
("[p4,%d] illegal CPU %d", __LINE__, cpu));
@@ -753,42 +816,71 @@ p4_config_pmc(int cpu, int ri, struct pmc *pm)
("[p4,%d] hwpmc not unconfigured before re-config", __LINE__));
mtx_lock_spin(&pc->pc_mtx);
- cfgcount = P4_PCPU_GET_CFGCOUNT(pc,ri);
+ cfgflags = P4_PCPU_GET_CFGFLAGS(pc,ri);
- KASSERT(cfgcount >= 0 || cfgcount <= 2,
- ("[p4,%d] illegal cfgcount cfg=%d on cpu=%d ri=%d", __LINE__,
- cfgcount, cpu, ri));
+ KASSERT(cfgflags >= 0 || cfgflags <= 3,
+ ("[p4,%d] illegal cfgflags cfg=%d on cpu=%d ri=%d", __LINE__,
+ cfgflags, cpu, ri));
- KASSERT(cfgcount == 0 || phw->phw_pmc,
+ KASSERT(cfgflags == 0 || phw->phw_pmc,
("[p4,%d] cpu=%d ri=%d pmc configured with zero cfg count",
__LINE__, cpu, ri));
- PMCDBG(MDP,CFG,1, "cpu=%d ri=%d cfg=%d pm=%p", cpu, ri, cfgcount,
+ PMCDBG(MDP,CFG,1, "cpu=%d ri=%d cfg=%d pm=%p", cpu, ri, cfgflags,
pm);
+ cpuflag = P4_CPU_TO_FLAG(cpu);
+
if (pm) { /* config */
- if (cfgcount == 0)
+ if (cfgflags == 0)
phw->phw_pmc = pm;
KASSERT(phw->phw_pmc == pm,
("[p4,%d] cpu=%d ri=%d config %p != hw %p",
__LINE__, cpu, ri, pm, phw->phw_pmc));
- cfgcount++;
+ cfgflags |= cpuflag;
} else { /* unconfig */
- --cfgcount;
- if (cfgcount == 0)
+ cfgflags &= ~cpuflag;
+
+ if (cfgflags == 0)
phw->phw_pmc = NULL;
}
- KASSERT(cfgcount >= 0 || cfgcount <= 2,
+ KASSERT(cfgflags >= 0 || cfgflags <= 3,
("[p4,%d] illegal runcount cfg=%d on cpu=%d ri=%d", __LINE__,
- cfgcount, cpu, ri));
+ cfgflags, cpu, ri));
+
+ P4_PCPU_SET_CFGFLAGS(pc,ri,cfgflags);
+
+ mtx_unlock_spin(&pc->pc_mtx);
+
+ return 0;
+}
+
+/*
+ * Retrieve a configured PMC pointer from hardware state.
+ */
- P4_PCPU_SET_CFGCOUNT(pc,ri,cfgcount);
+static int
+p4_get_config(int cpu, int ri, struct pmc **ppm)
+{
+ struct p4_cpu *pc;
+ struct pmc_hw *phw;
+ int cfgflags;
+ pc = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)];
+ phw = pc->pc_hwpmcs[ri];
+
+ mtx_lock_spin(&pc->pc_mtx);
+ cfgflags = P4_PCPU_GET_CFGFLAGS(pc,ri);
mtx_unlock_spin(&pc->pc_mtx);
+ if (cfgflags & P4_CPU_TO_FLAG(cpu))
+ *ppm = phw->phw_pmc; /* PMC config'ed on this CPU */
+ else
+ *ppm = NULL;
+
return 0;
}
@@ -845,11 +937,11 @@ p4_allocate_pmc(int cpu, int ri, struct pmc *pm,
pd = &p4_pmcdesc[ri];
PMCDBG(MDP,ALL,1, "p4-allocate ri=%d class=%d pmccaps=0x%x "
- "reqcaps=0x%x\n", ri, pd->pm_descr.pd_class, pd->pm_descr.pd_caps,
+ "reqcaps=0x%x", ri, pd->pm_descr.pd_class, pd->pm_descr.pd_caps,
pm->pm_caps);
/* check class */
- if (pd->pm_descr.pd_class != pm->pm_class)
+ if (pd->pm_descr.pd_class != a->pm_class)
return EINVAL;
/* check requested capabilities */
@@ -872,7 +964,7 @@ p4_allocate_pmc(int cpu, int ri, struct pmc *pm,
*/
if (p4_system_has_htt &&
- PMC_IS_VIRTUAL_MODE(pm->pm_mode) &&
+ PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) &&
pmc_getrowdisp(ri) != 0)
return EBUSY;
@@ -898,7 +990,8 @@ p4_allocate_pmc(int cpu, int ri, struct pmc *pm,
*/
if (P4_EVENT_IS_TI(pevent) &&
- PMC_IS_VIRTUAL_MODE(pm->pm_mode) && p4_system_has_htt)
+ PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) &&
+ p4_system_has_htt)
return EINVAL;
pc = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)];
@@ -917,7 +1010,7 @@ p4_allocate_pmc(int cpu, int ri, struct pmc *pm,
* should also be free on the current CPU.
*/
- if (PMC_IS_SYSTEM_MODE(pm->pm_mode)) {
+ if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
if (P4_ESCR_ROW_DISP_IS_THREAD(escr) ||
pc->pc_escrs[escr] != P4_INVALID_PMC_INDEX)
continue;
@@ -935,7 +1028,7 @@ p4_allocate_pmc(int cpu, int ri, struct pmc *pm,
* ESCRs from rows marked as 'FREE'.
*/
- if (PMC_IS_VIRTUAL_MODE(pm->pm_mode)) {
+ if (PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm))) {
if (p4_system_has_htt) {
if (!P4_ESCR_ROW_DISP_IS_FREE(escr))
continue;
@@ -963,7 +1056,7 @@ p4_allocate_pmc(int cpu, int ri, struct pmc *pm,
("[p4,%d] illegal ESCR value %d", __LINE__, escr));
/* mark ESCR row mode */
- if (PMC_IS_SYSTEM_MODE(pm->pm_mode)) {
+ if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
pc->pc_escrs[escr] = ri; /* mark ESCR as in use on this cpu */
P4_ESCR_MARK_ROW_STANDALONE(escr);
} else {
@@ -1024,7 +1117,7 @@ p4_allocate_pmc(int cpu, int ri, struct pmc *pm,
pm->pm_md.pm_p4.pm_p4_escrvalue = escrvalue;
PMCDBG(MDP,ALL,2, "p4-allocate cccrsel=0x%x cccrval=0x%x "
- "escr=%d escrmsr=0x%x escrval=0x%x\n", pevent->pm_cccr_select,
+ "escr=%d escrmsr=0x%x escrval=0x%x", pevent->pm_cccr_select,
cccrvalue, escr, pm->pm_md.pm_p4.pm_p4_escrmsr, escrvalue);
return 0;
@@ -1048,7 +1141,7 @@ p4_release_pmc(int cpu, int ri, struct pmc *pm)
PMCDBG(MDP,REL,1, "p4-release cpu=%d ri=%d escr=%d", cpu, ri, escr);
- if (PMC_IS_SYSTEM_MODE(pm->pm_mode)) {
+ if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
pc = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)];
phw = pc->pc_hwpmcs[ri];
@@ -1120,7 +1213,7 @@ p4_start_pmc(int cpu, int ri)
}
/* start system mode PMCs directly */
- if (PMC_IS_SYSTEM_MODE(pm->pm_mode)) {
+ if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
wrmsr(escrmsr, escrvalue | escrtbits);
wrmsr(pd->pm_cccr_msr, cccrvalue | cccrtbits | P4_CCCR_ENABLE);
return 0;
@@ -1144,11 +1237,6 @@ p4_start_pmc(int cpu, int ri)
rc));
if (rc == 0) { /* 1st CPU and the non-HTT case */
- /*
- * Enable the correct bits for this CPU.
- */
- escrvalue |= escrtbits;
- cccrvalue |= cccrtbits | P4_CCCR_ENABLE;
KASSERT(P4_PMC_IS_STOPPED(pd->pm_cccr_msr),
("[p4,%d] cpu=%d ri=%d cccr=0x%x not stopped", __LINE__,
@@ -1157,36 +1245,24 @@ p4_start_pmc(int cpu, int ri)
/* write out the low 40 bits of the saved value to hardware */
wrmsr(pd->pm_pmc_msr,
P4_PCPU_PMC_VALUE(pc,ri,cpu) & P4_PERFCTR_MASK);
- P4_PCPU_SAVED_VALUE(pc,ri,cpu) = P4_PCPU_PMC_VALUE(pc,ri,cpu) &
- P4_PERFCTR_MASK;
-
- /* Program the ESCR and CCCR and start the PMC */
- wrmsr(escrmsr, escrvalue);
- wrmsr(pd->pm_cccr_msr, cccrvalue);
-
- PMCDBG(MDP,STA,2,"p4-start cpu=%d rc=%d ri=%d escr=%d "
- "escrmsr=0x%x escrvalue=0x%x cccr_config=0x%x\n", cpu, rc,
- ri, pm->pm_md.pm_p4.pm_p4_escr, escrmsr, escrvalue,
- cccrvalue);
} else if (rc == 1) { /* 2nd CPU */
/*
- * Retrieve the CCCR and ESCR values from their MSRs,
- * and turn on the addition T[0/1] bits for the 2nd
- * CPU. Remember the difference between the saved
- * value from the previous 'write()' operation to this
- * (PMC,CPU) pair and the current PMC reading; this is
- * used at PMCSTOP time to derive the correct
- * increment.
+ * Stop the PMC and retrieve the CCCR and ESCR values
+ * from their MSRs, and turn on the additional T[0/1]
+ * bits for the 2nd CPU.
*/
cccrvalue = rdmsr(pd->pm_cccr_msr);
+ wrmsr(pd->pm_cccr_msr, cccrvalue & ~P4_CCCR_ENABLE);
+ /* check that the configuration bits read back match the PMC */
KASSERT((cccrvalue & P4_CCCR_Tx_MASK) ==
(pm->pm_md.pm_p4.pm_p4_cccrvalue & P4_CCCR_Tx_MASK),
- ("[p4,%d] cpu=%d rc=%d ri=%d CCCR bits 0x%x PMC 0x%x",
- __LINE__, cpu, rc, ri, cccrvalue & P4_CCCR_Tx_MASK,
+ ("[p4,%d] Extra CCCR bits cpu=%d rc=%d ri=%d "
+ "cccr=0x%x PMC=0x%x", __LINE__, cpu, rc, ri,
+ cccrvalue & P4_CCCR_Tx_MASK,
pm->pm_md.pm_p4.pm_p4_cccrvalue & P4_CCCR_Tx_MASK));
KASSERT(cccrvalue & P4_CCCR_ENABLE,
("[p4,%d] 2nd cpu rc=%d cpu=%d ri=%d not running",
@@ -1196,9 +1272,6 @@ p4_start_pmc(int cpu, int ri)
"cccrvalue=0x%x tbits=0x%x", __LINE__, rc, cpu, ri,
cccrvalue, cccrtbits));
- /* stop PMC */
- wrmsr(pd->pm_cccr_msr, cccrvalue & ~P4_CCCR_ENABLE);
-
escrvalue = rdmsr(escrmsr);
KASSERT((escrvalue & P4_ESCR_Tx_MASK) ==
@@ -1207,40 +1280,33 @@ p4_start_pmc(int cpu, int ri)
"escr=0x%x pm=0x%x", __LINE__, cpu, rc, ri,
escrvalue & P4_ESCR_Tx_MASK,
pm->pm_md.pm_p4.pm_p4_escrvalue & P4_ESCR_Tx_MASK));
-
KASSERT((escrvalue & escrtbits) == 0,
("[p4,%d] ESCR T0/T1 mismatch rc=%d cpu=%d ri=%d "
"escrmsr=0x%x escrvalue=0x%x tbits=0x%x", __LINE__,
rc, cpu, ri, escrmsr, escrvalue, escrtbits));
+ }
- /* read current value and save it */
- P4_PCPU_SAVED_VALUE(pc,ri,cpu) =
- rdmsr(pd->pm_pmc_msr) & P4_PERFCTR_MASK;
-
- /*
- * program the new bits into the ESCR and CCCR,
- * starting the PMC in the process.
- */
-
- escrvalue |= escrtbits;
- cccrvalue |= cccrvalue;
+ /* Enable the correct bits for this CPU. */
+ escrvalue |= escrtbits;
+ cccrvalue |= cccrtbits | P4_CCCR_ENABLE;
- wrmsr(escrmsr, escrvalue);
- wrmsr(pd->pm_cccr_msr, cccrvalue);
+ /* Save HW value at the time of starting hardware */
+ P4_PCPU_HW_VALUE(pc,ri,cpu) = rdmsr(pd->pm_pmc_msr);
- PMCDBG(MDP,STA,2,"p4-start/2 cpu=%d rc=%d ri=%d escr=%d"
- "escrmsr=0x%x escrvalue=0x%x cccr_config=0x%x",
- cpu, rc, ri, pm->pm_md.pm_p4.pm_p4_escr, escrmsr,
- escrvalue, cccrvalue);
-
- } else
- panic("invalid runcount %d\n", rc);
+ /* Program the ESCR and CCCR and start the PMC */
+ wrmsr(escrmsr, escrvalue);
+ wrmsr(pd->pm_cccr_msr, cccrvalue);
++rc;
P4_PCPU_SET_RUNCOUNT(pc,ri,rc);
mtx_unlock_spin(&pc->pc_mtx);
+ PMCDBG(MDP,STA,2,"p4-start cpu=%d rc=%d ri=%d escr=%d "
+ "escrmsr=0x%x escrvalue=0x%x cccr_config=0x%x v=%jx", cpu, rc,
+ ri, pm->pm_md.pm_p4.pm_p4_escr, escrmsr, escrvalue,
+ cccrvalue, P4_PCPU_HW_VALUE(pc,ri,cpu));
+
return 0;
}
@@ -1282,7 +1348,7 @@ p4_stop_pmc(int cpu, int ri)
PMCDBG(MDP,STO,1, "p4-stop cpu=%d ri=%d", cpu, ri);
- if (PMC_IS_SYSTEM_MODE(pm->pm_mode)) {
+ if (PMC_IS_SYSTEM_MODE(PMC_TO_MODE(pm))) {
wrmsr(pd->pm_cccr_msr,
pm->pm_md.pm_p4.pm_p4_cccrvalue & ~P4_CCCR_ENABLE);
return 0;
@@ -1294,12 +1360,9 @@ p4_stop_pmc(int cpu, int ri)
* On HTT machines, this PMC may be in use by two threads
* running on two logical CPUS. Thus we look at the
* 'pm_runcount' field and only turn off the appropriate TO/T1
- * bits (and keep the PMC running).
+ * bits (and keep the PMC running) if two logical CPUs were
+ * using the PMC.
*
- * The 'pc_saved' field has the 'diff' between the value in
- * the hardware register at PMCSTART time and the nominal
- * start value for the PMC. This diff is added to the current
- * PMC reading to derived the correct (absolute) return value.
*/
/* bits to mask */
@@ -1329,54 +1392,157 @@ p4_stop_pmc(int cpu, int ri)
escrmsr = pm->pm_md.pm_p4.pm_p4_escrmsr;
escrvalue = rdmsr(escrmsr);
- /* get the current PMC reading */
- tmp = rdmsr(pd->pm_pmc_msr) & P4_PERFCTR_MASK;
-
- if (rc == 1) { /* need to keep the PMC running */
-
- KASSERT(escrvalue & escrtbits,
- ("[p4,%d] ESCR T0/T1 mismatch cpu=%d ri=%d escrmsr=0x%x "
- "escrvalue=0x%x tbits=0x%x", __LINE__, cpu, ri, escrmsr,
- escrvalue, escrtbits));
+ /* The current CPU should be running on this PMC */
+ KASSERT(escrvalue & escrtbits,
+ ("[p4,%d] ESCR T0/T1 mismatch cpu=%d rc=%d ri=%d escrmsr=0x%x "
+ "escrvalue=0x%x tbits=0x%x", __LINE__, cpu, rc, ri, escrmsr,
+ escrvalue, escrtbits));
+ KASSERT(PMC_IS_COUNTING_MODE(PMC_TO_MODE(pm)) ||
+ (cccrvalue & cccrtbits),
+ ("[p4,%d] CCCR T0/T1 mismatch cpu=%d ri=%d cccrvalue=0x%x "
+ "tbits=0x%x", __LINE__, cpu, ri, cccrvalue, cccrtbits));
- KASSERT(PMC_IS_COUNTING_MODE(pm->pm_mode) ||
- (cccrvalue & cccrtbits),
- ("[p4,%d] CCCR T0/T1 mismatch cpu=%d ri=%d cccrvalue=0x%x "
- "tbits=0x%x", __LINE__, cpu, ri, cccrvalue, cccrtbits));
+ /* get the current hardware reading */
+ tmp = rdmsr(pd->pm_pmc_msr);
+ if (rc == 1) { /* need to keep the PMC running */
escrvalue &= ~escrtbits;
cccrvalue &= ~cccrtbits;
-
wrmsr(escrmsr, escrvalue);
wrmsr(pd->pm_cccr_msr, cccrvalue);
-
}
- PMCDBG(MDP,STO,2, "p4-stop/2 cpu=%d rc=%d ri=%d escrmsr=0x%x escrval=0x%x "
- "cccrval=0x%x", cpu, rc, ri, escrmsr, escrvalue, cccrvalue);
+ mtx_unlock_spin(&pc->pc_mtx);
+
+ PMCDBG(MDP,STO,2, "p4-stop cpu=%d rc=%d ri=%d escrmsr=0x%x "
+ "escrval=0x%x cccrval=0x%x v=%jx", cpu, rc, ri, escrmsr,
+ escrvalue, cccrvalue, tmp);
- /* get the incremental count from this context switch */
- tmp -= P4_PCPU_SAVED_VALUE(pc,ri,cpu);
- if ((int64_t) tmp < 0) /* counter wrap-around */
- tmp = -tmp + 1;
+ if (tmp < P4_PCPU_HW_VALUE(pc,ri,cpu)) /* 40 bit counter overflow */
+ tmp += (P4_PERFCTR_MASK + 1) - P4_PCPU_HW_VALUE(pc,ri,cpu);
+ else
+ tmp -= P4_PCPU_HW_VALUE(pc,ri,cpu);
P4_PCPU_PMC_VALUE(pc,ri,cpu) += tmp;
- mtx_unlock_spin(&pc->pc_mtx);
return 0;
}
/*
* Handle an interrupt.
+ *
+ * The hardware sets the CCCR_OVF whenever a counter overflow occurs, so the handler
+ * examines all the 18 CCCR registers, processing the counters that have overflowed.
+ *
+ * On HTT machines, multiple logical CPUs may try to enter the NMI service
+ * routine at the same time.
*/
+extern volatile lapic_t *lapic;
+
+static void
+p4_lapic_enable_pmc_interrupt(void)
+{
+ uint32_t value;
+
+ value = lapic->lvt_pcint;
+ value &= ~APIC_LVT_M;
+ lapic->lvt_pcint = value;
+}
+
+
static int
p4_intr(int cpu, uintptr_t eip)
{
- (void) cpu;
+ int i, pmc_interrupted;
+ uint32_t cccrval, pmi_ovf_mask;
+ struct p4_cpu *pc;
+ struct pmc_hw *phw;
+ struct pmc *pm;
+ pmc_value_t v;
+
(void) eip;
+ PMCDBG(MDP,INT, 1, "cpu=%d eip=%x pcint=0x%x", cpu, eip,
+ lapic->lvt_pcint);
- return 0;
+ pmc_interrupted = 0;
+ pc = (struct p4_cpu *) pmc_pcpu[cpu];
+
+ pmi_ovf_mask = pmc_cpu_is_logical(cpu) ?
+ P4_CCCR_OVF_PMI_T1 : P4_CCCR_OVF_PMI_T0;
+ pmi_ovf_mask |= P4_CCCR_OVF;
+
+ /*
+ * Loop through all CCCRs, looking for ones that have the
+ * OVF_PMI bit set for our logical CPU.
+ */
+
+ for (i = 1; i < P4_NPMCS; i++) {
+ cccrval = rdmsr(P4_CCCR_MSR_FIRST + i - 1);
+
+ if ((cccrval & pmi_ovf_mask) != pmi_ovf_mask)
+ continue;
+
+ v = rdmsr(P4_PERFCTR_MSR_FIRST + i - 1);
+
+ pmc_interrupted = 1;
+
+ PMCDBG(MDP,INT, 2, "ri=%d v=%jx", i, v);
+
+ /* Stop the counter, and turn off the overflow bit */
+ cccrval &= ~(P4_CCCR_OVF | P4_CCCR_ENABLE);
+ wrmsr(P4_CCCR_MSR_FIRST + i - 1, cccrval);
+
+ phw = pc->pc_hwpmcs[i];
+ pm = phw->phw_pmc;
+
+ /*
+ * Ignore de-configured or stopped PMCs.
+ * Also ignore counting mode PMCs that may
+ * have overflowed their counters.
+ */
+ if (pm == NULL ||
+ pm->pm_state != PMC_STATE_RUNNING ||
+ !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
+ continue;
+
+ /*
+ * If the previous sample hasn't been read yet, the
+ * sampling interrupt is coming in too fast for the
+ * rest of the system to cope. Do not re-enable the
+ * counter.
+ */
+
+ if (P4_PCPU_SAVED_IP(pc,i,cpu)) {
+ atomic_add_int(&pmc_stats.pm_intr_ignored, 1);
+ continue;
+ }
+
+ /*
+ * write the the reload count and restart the
+ * hardware.
+ */
+
+ v = P4_RELOAD_COUNT_TO_PERFCTR_VALUE(
+ pm->pm_sc.pm_reloadcount);
+ wrmsr(P4_PERFCTR_MSR_FIRST + i - 1, v);
+ wrmsr(P4_CCCR_MSR_FIRST + i - 1,
+ cccrval | P4_CCCR_ENABLE);
+ }
+
+ if (pmc_interrupted) {
+
+ /*
+ * On Intel CPUs, the PMC 'pcint' entry in the LAPIC
+ * gets masked when a PMC interrupts the CPU. We need
+ * to unmask this.
+ */
+ p4_lapic_enable_pmc_interrupt();
+
+ /* XXX: Invoke helper (non-NMI) interrupt here */
+ }
+
+ return pmc_interrupted;
}
/*
@@ -1410,8 +1576,6 @@ p4_describe(int cpu, int ri, struct pmc_info *pi,
return error;
pi->pm_class = pd->pm_descr.pd_class;
- pi->pm_caps = pd->pm_descr.pd_caps;
- pi->pm_width = pd->pm_descr.pd_width;
if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) {
pi->pm_enabled = TRUE;
@@ -1456,7 +1620,9 @@ pmc_initialize_p4(struct pmc_mdep *pmc_mdep)
case PMC_CPU_INTEL_PIV:
pmc_mdep->pmd_npmc = P4_NPMCS;
- pmc_mdep->pmd_classes[1] = PMC_CLASS_P4;
+ pmc_mdep->pmd_classes[1].pm_class = PMC_CLASS_P4;
+ pmc_mdep->pmd_classes[1].pm_caps = P4_PMC_CAPS;
+ pmc_mdep->pmd_classes[1].pm_width = 40;
pmc_mdep->pmd_nclasspmcs[1] = 18;
pmc_mdep->pmd_init = p4_init;
@@ -1466,6 +1632,7 @@ pmc_initialize_p4(struct pmc_mdep *pmc_mdep)
pmc_mdep->pmd_read_pmc = p4_read_pmc;
pmc_mdep->pmd_write_pmc = p4_write_pmc;
pmc_mdep->pmd_config_pmc = p4_config_pmc;
+ pmc_mdep->pmd_get_config = p4_get_config;
pmc_mdep->pmd_allocate_pmc = p4_allocate_pmc;
pmc_mdep->pmd_release_pmc = p4_release_pmc;
pmc_mdep->pmd_start_pmc = p4_start_pmc;
diff --git a/sys/dev/hwpmc/hwpmc_ppro.c b/sys/dev/hwpmc/hwpmc_ppro.c
index 1bd19be6..13f9195 100644
--- a/sys/dev/hwpmc/hwpmc_ppro.c
+++ b/sys/dev/hwpmc/hwpmc_ppro.c
@@ -336,9 +336,15 @@ p6_switch_in(struct pmc_cpu *pc, struct pmc_process *pp)
{
(void) pc;
+ PMCDBG(MDP,SWI,1, "pc=%p pp=%p enable-msr=%d", pc, pp,
+ pp->pp_flags & PMC_PP_ENABLE_MSR_ACCESS);
+
/* allow the RDPMC instruction if needed */
- if (pp->pp_flags & PMC_FLAG_ENABLE_MSR_ACCESS)
+ if (pp->pp_flags & PMC_PP_ENABLE_MSR_ACCESS)
load_cr4(rcr4() | CR4_PCE);
+
+ PMCDBG(MDP,SWI,1, "cr4=0x%x", rcr4());
+
return 0;
}
@@ -348,8 +354,10 @@ p6_switch_out(struct pmc_cpu *pc, struct pmc_process *pp)
(void) pc;
(void) pp; /* can be NULL */
+ PMCDBG(MDP,SWO,1, "pc=%p pp=%p cr4=0x%x", pc, pp, rcr4());
+
/* always turn off the RDPMC instruction */
- load_cr4(rcr4() & ~CR4_PCE);
+ load_cr4(rcr4() & ~CR4_PCE);
return 0;
}
@@ -373,7 +381,7 @@ p6_read_pmc(int cpu, int ri, pmc_value_t *v)
return 0;
tmp = rdmsr(pd->pm_pmc_msr) & P6_PERFCTR_MASK;
- if (PMC_IS_SAMPLING_MODE(pm->pm_mode))
+ if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
*v = -tmp;
else
*v = tmp;
@@ -404,7 +412,7 @@ p6_write_pmc(int cpu, int ri, pmc_value_t v)
PMCDBG(MDP,WRI,1, "p6-write cpu=%d ri=%d msr=0x%x v=%jx", cpu, ri,
pd->pm_pmc_msr, v);
- if (PMC_IS_SAMPLING_MODE(pm->pm_mode))
+ if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
v = -v;
wrmsr(pd->pm_pmc_msr, v & P6_PERFCTR_MASK);
@@ -426,6 +434,19 @@ p6_config_pmc(int cpu, int ri, struct pmc *pm)
}
/*
+ * Retrieve a configured PMC pointer from hardware state.
+ */
+
+static int
+p6_get_config(int cpu, int ri, struct pmc **ppm)
+{
+ *ppm = pmc_pcpu[cpu]->pc_hwpmcs[ri]->phw_pmc;
+
+ return 0;
+}
+
+
+/*
* A pmc may be allocated to a given row index if:
* - the event is valid for this CPU
* - the event is valid for this counter index
@@ -454,7 +475,7 @@ p6_allocate_pmc(int cpu, int ri, struct pmc *pm,
pm->pm_caps);
/* check class */
- if (pd->pm_descr.pd_class != pm->pm_class)
+ if (pd->pm_descr.pd_class != a->pm_class)
return EINVAL;
/* check requested capabilities */
@@ -675,8 +696,6 @@ p6_describe(int cpu, int ri, struct pmc_info *pi,
return error;
pi->pm_class = pd->pm_descr.pd_class;
- pi->pm_caps = pd->pm_descr.pd_caps;
- pi->pm_width = pd->pm_descr.pd_width;
if (phw->phw_state & PMC_PHW_FLAG_IS_ENABLED) {
pi->pm_enabled = TRUE;
@@ -695,7 +714,7 @@ p6_get_msr(int ri, uint32_t *msr)
KASSERT(ri >= 0 && ri < P6_NPMCS,
("[p6,%d ri %d out of range", __LINE__, ri));
- *msr = p6_pmcdesc[ri].pm_pmc_msr;
+ *msr = p6_pmcdesc[ri].pm_pmc_msr - P6_MSR_PERFCTR0;
return 0;
}
@@ -722,7 +741,9 @@ pmc_initialize_p6(struct pmc_mdep *pmc_mdep)
p6_cputype = pmc_mdep->pmd_cputype;
pmc_mdep->pmd_npmc = P6_NPMCS;
- pmc_mdep->pmd_classes[1] = PMC_CLASS_P6;
+ pmc_mdep->pmd_classes[1].pm_class = PMC_CLASS_P6;
+ pmc_mdep->pmd_classes[1].pm_caps = P6_PMC_CAPS;
+ pmc_mdep->pmd_classes[1].pm_width = 40;
pmc_mdep->pmd_nclasspmcs[1] = 2;
pmc_mdep->pmd_init = p6_init;
@@ -732,6 +753,7 @@ pmc_initialize_p6(struct pmc_mdep *pmc_mdep)
pmc_mdep->pmd_read_pmc = p6_read_pmc;
pmc_mdep->pmd_write_pmc = p6_write_pmc;
pmc_mdep->pmd_config_pmc = p6_config_pmc;
+ pmc_mdep->pmd_get_config = p6_get_config;
pmc_mdep->pmd_allocate_pmc = p6_allocate_pmc;
pmc_mdep->pmd_release_pmc = p6_release_pmc;
pmc_mdep->pmd_start_pmc = p6_start_pmc;
OpenPOWER on IntegriCloud