summaryrefslogtreecommitdiffstats
path: root/sys/dev/hwpmc/hwpmc_piv.c
diff options
context:
space:
mode:
Diffstat (limited to 'sys/dev/hwpmc/hwpmc_piv.c')
-rw-r--r--sys/dev/hwpmc/hwpmc_piv.c346
1 files changed, 240 insertions, 106 deletions
diff --git a/sys/dev/hwpmc/hwpmc_piv.c b/sys/dev/hwpmc/hwpmc_piv.c
index c9ade03..0b14745 100644
--- a/sys/dev/hwpmc/hwpmc_piv.c
+++ b/sys/dev/hwpmc/hwpmc_piv.c
@@ -35,8 +35,9 @@ __FBSDID("$FreeBSD$");
#include <sys/smp.h>
#include <sys/systm.h>
-#include <machine/apicreg.h>
+#include <machine/cpufunc.h>
#include <machine/md_var.h>
+#include <machine/specialreg.h>
/*
* PENTIUM 4 SUPPORT
@@ -134,7 +135,11 @@ __FBSDID("$FreeBSD$");
* CPUy +.....-
* RC 0 1 2 1 0
*
- * Here CPUx and CPUy are one of the two logical processors on a HTT CPU.
+ * Key:
+ * 'CPU[xy]' : one of the two logical processors on a HTT CPU.
+ * 'RC' : run count (#threads per physical core).
+ * '+' : point in time when a thread is put on a CPU.
+ * '-' : point in time where a thread is taken off a CPU.
*
* Handling HTT CONFIG
*
@@ -438,7 +443,9 @@ struct p4_cpu {
struct pmc_hw *pc_hwpmcs[P4_NPMCS];
struct pmc_hw pc_p4pmcs[P4_NPMCS];
char pc_escrs[P4_NESCR];
- struct mtx pc_mtx; /* spin lock */
+ struct mtx pc_mtx; /* spin lock */
+ uint32_t pc_intrflag; /* NMI handler flags */
+ unsigned int pc_intrlock; /* NMI handler spin lock */
unsigned char pc_flags[P4_NPMCS]; /* 4 bits each: {cfg,run}count */
union {
pmc_value_t pc_hw[P4_NPMCS * P4_NHTT];
@@ -447,6 +454,20 @@ struct p4_cpu {
pmc_value_t pc_pmc_values[P4_NPMCS * P4_NHTT];
};
+/*
+ * A 'logical' CPU shares PMC resources with partner 'physical' CPU,
+ * except the TSC, which is architectural and hence seperate. The
+ * 'logical' CPU descriptor thus has pointers to the physical CPUs
+ * descriptor state except for the TSC (rowindex 0) which is not
+ * shared.
+ */
+
+struct p4_logicalcpu {
+ struct pmc_cpu pc_common;
+ struct pmc_hw *pc_hwpmcs[P4_NPMCS];
+ struct pmc_hw pc_tsc;
+};
+
#define P4_PCPU_PMC_VALUE(PC,RI,CPU) (PC)->pc_pmc_values[(RI)*((CPU) & 1)]
#define P4_PCPU_HW_VALUE(PC,RI,CPU) (PC)->pc_si.pc_hw[(RI)*((CPU) & 1)]
#define P4_PCPU_SAVED_IP(PC,RI,CPU) (PC)->pc_si.pc_ip[(RI)*((CPU) & 1)]
@@ -468,6 +489,29 @@ struct p4_cpu {
#define P4_CPU_TO_FLAG(C) (pmc_cpu_is_logical(cpu) ? 0x2 : 0x1)
+#define P4_PCPU_GET_INTRFLAG(PC,I) ((PC)->pc_intrflag & (1 << (I)))
+#define P4_PCPU_SET_INTRFLAG(PC,I,V) do { \
+ uint32_t __mask; \
+ __mask = 1 << (I); \
+ if ((V)) \
+ (PC)->pc_intrflag |= __mask; \
+ else \
+ (PC)->pc_intrflag &= ~__mask; \
+ } while (0)
+
+/*
+ * A minimal spin lock implementation for use inside the NMI handler.
+ *
+ * We don't want to use a regular spin lock here, because curthread
+ * may not be consistent at the time the handler is invoked.
+ */
+#define P4_PCPU_ACQ_INTR_SPINLOCK(PC) do { \
+ while (!atomic_cmpset_acq_int(&pc->pc_intrlock, 0, 1)) \
+ ia32_pause(); \
+ } while (0)
+#define P4_PCPU_REL_INTR_SPINLOCK(PC) \
+ atomic_store_rel_int(&pc->pc_intrlock, 0);
+
/* ESCR row disposition */
static int p4_escrdisp[P4_NESCR];
@@ -538,6 +582,7 @@ p4_init(int cpu)
int n, phycpu;
char *pescr;
struct p4_cpu *pcs;
+ struct p4_logicalcpu *plcs;
struct pmc_hw *phw;
KASSERT(cpu >= 0 && cpu < mp_ncpus,
@@ -562,8 +607,23 @@ p4_init(int cpu)
cpu, phycpu));
if (pcs == NULL) /* decline to init */
return ENXIO;
+
p4_system_has_htt = 1;
- pmc_pcpu[cpu] = (struct pmc_cpu *) pcs;
+
+ MALLOC(plcs, struct p4_logicalcpu *,
+ sizeof(struct p4_logicalcpu), M_PMC, M_WAITOK|M_ZERO);
+
+ /* The TSC is architectural state and is not shared */
+ plcs->pc_hwpmcs[0] = &plcs->pc_tsc;
+ plcs->pc_tsc.phw_state = PMC_PHW_FLAG_IS_ENABLED |
+ PMC_PHW_CPU_TO_STATE(cpu) | PMC_PHW_INDEX_TO_STATE(0) |
+ PMC_PHW_FLAG_IS_SHAREABLE;
+
+ /* Other PMCs are shared with the physical CPU */
+ for (n = 1; n < P4_NPMCS; n++)
+ plcs->pc_hwpmcs[n] = pcs->pc_hwpmcs[n];
+
+ pmc_pcpu[cpu] = (struct pmc_cpu *) plcs;
return 0;
}
@@ -605,16 +665,17 @@ p4_cleanup(int cpu)
PMCDBG(MDP,INI,0, "p4-cleanup cpu=%d", cpu);
+ if ((pcs = (struct p4_cpu *) pmc_pcpu[cpu]) == NULL)
+ return 0;
+
/*
- * Free up the per-cpu structure for the given cpu if
- * allocated, and if this is a physical CPU.
+ * If the CPU is physical we need to teardown the
+ * full MD state.
*/
-
- if ((pcs = (struct p4_cpu *) pmc_pcpu[cpu]) != NULL &&
- !pmc_cpu_is_logical(cpu)) {
+ if (!pmc_cpu_is_logical(cpu))
mtx_destroy(&pcs->pc_mtx);
- FREE(pcs, M_PMC);
- }
+
+ FREE(pcs, M_PMC);
pmc_pcpu[cpu] = NULL;
@@ -637,7 +698,7 @@ p4_switch_in(struct pmc_cpu *pc, struct pmc_process *pp)
if (pp->pp_flags & PMC_PP_ENABLE_MSR_ACCESS)
load_cr4(rcr4() | CR4_PCE);
- PMCDBG(MDP,SWI,2, "cr4=0x%x", rcr4());
+ PMCDBG(MDP,SWI,2, "cr4=0x%x", (uint32_t) rcr4());
return 0;
}
@@ -657,7 +718,7 @@ p4_switch_out(struct pmc_cpu *pc, struct pmc_process *pp)
/* always disallow the RDPMC instruction */
load_cr4(rcr4() & ~CR4_PCE);
- PMCDBG(MDP,SWO,2, "cr4=0x%x", rcr4());
+ PMCDBG(MDP,SWO,2, "cr4=0x%x", (uint32_t) rcr4());
return 0;
}
@@ -681,6 +742,26 @@ p4_read_pmc(int cpu, int ri, pmc_value_t *v)
KASSERT(ri >= 0 && ri < P4_NPMCS,
("[p4,%d] illegal row-index %d", __LINE__, ri));
+
+ if (ri == 0) { /* TSC */
+#if DEBUG
+ pc = (struct p4_cpu *) pmc_pcpu[cpu];
+ phw = pc->pc_hwpmcs[ri];
+ pm = phw->phw_pmc;
+
+ KASSERT(pm, ("[p4,%d] cpu=%d ri=%d not configured", __LINE__,
+ cpu, ri));
+ KASSERT(PMC_TO_CLASS(pm) == PMC_CLASS_TSC,
+ ("[p4,%d] cpu=%d ri=%d not a TSC (%d)", __LINE__, cpu, ri,
+ PMC_TO_CLASS(pm)));
+ KASSERT(PMC_IS_COUNTING_MODE(PMC_TO_MODE(pm)),
+ ("[p4,%d] TSC counter in non-counting mode", __LINE__));
+#endif
+ *v = rdtsc();
+ PMCDBG(MDP,REA,2, "p4-read -> %jx", *v);
+ return 0;
+ }
+
pc = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)];
phw = pc->pc_hwpmcs[ri];
pd = &p4_pmcdesc[ri];
@@ -698,14 +779,6 @@ p4_read_pmc(int cpu, int ri, pmc_value_t *v)
PMCDBG(MDP,REA,1, "p4-read cpu=%d ri=%d mode=%d", cpu, ri, mode);
- if (PMC_TO_CLASS(pm) == PMC_CLASS_TSC) {
- KASSERT(PMC_IS_COUNTING_MODE(mode),
- ("[p4,%d] TSC counter in non-counting mode", __LINE__));
- *v = rdtsc();
- PMCDBG(MDP,REA,2, "p4-read -> %jx", *v);
- return 0;
- }
-
KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4,
("[p4,%d] unknown PMC class %d", __LINE__, pd->pm_descr.pd_class));
@@ -747,6 +820,27 @@ p4_write_pmc(int cpu, int ri, pmc_value_t v)
KASSERT(ri >= 0 && ri < P4_NPMCS,
("[amd,%d] illegal row-index %d", __LINE__, ri));
+
+ /*
+ * The P4's TSC register is writeable, but we don't allow a
+ * write as changing the TSC's value could interfere with
+ * timekeeping and other system functions.
+ */
+ if (ri == 0) {
+#if DEBUG
+ pc = (struct p4_cpu *) pmc_pcpu[cpu];
+ phw = pc->pc_hwpmcs[ri];
+ pm = phw->phw_pmc;
+ KASSERT(pm, ("[p4,%d] cpu=%d ri=%d not configured", __LINE__,
+ cpu, ri));
+ KASSERT(PMC_TO_CLASS(pm) == PMC_CLASS_TSC,
+ ("[p4,%d] cpu=%d ri=%d not a TSC (%d)", __LINE__,
+ cpu, ri, PMC_TO_CLASS(pm)));
+#endif
+ return 0;
+ }
+
+ /* Shared PMCs */
pc = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)];
phw = pc->pc_hwpmcs[ri];
pm = phw->phw_pmc;
@@ -762,14 +856,6 @@ p4_write_pmc(int cpu, int ri, pmc_value_t v)
mode, v);
/*
- * The P4's TSC register is writeable, but we don't allow a
- * write as changing the TSC's value could interfere with
- * timekeeping and other system functions.
- */
- if (PMC_TO_CLASS(pm) == PMC_CLASS_TSC)
- return 0;
-
- /*
* write the PMC value to the register/saved value: for
* sampling mode PMCs, the value to be programmed into the PMC
* counter is -(C+1) where 'C' is the requested sample rate.
@@ -808,7 +894,21 @@ p4_config_pmc(int cpu, int ri, struct pmc *pm)
KASSERT(ri >= 0 && ri < P4_NPMCS,
("[p4,%d] illegal row-index %d", __LINE__, ri));
- pc = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)];
+ PMCDBG(MDP,CFG,1, "cpu=%d ri=%d pm=%p", cpu, ri, pm);
+
+ if (ri == 0) { /* TSC */
+ pc = (struct p4_cpu *) pmc_pcpu[cpu];
+ phw = pc->pc_hwpmcs[ri];
+
+ KASSERT(pm == NULL || phw->phw_pmc == NULL,
+ ("[p4,%d] hwpmc doubly config'ed", __LINE__));
+ phw->phw_pmc = pm;
+ return 0;
+ }
+
+ /* Shared PMCs */
+
+ pc = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)];
phw = pc->pc_hwpmcs[ri];
KASSERT(pm == NULL || phw->phw_pmc == NULL ||
@@ -826,9 +926,6 @@ p4_config_pmc(int cpu, int ri, struct pmc *pm)
("[p4,%d] cpu=%d ri=%d pmc configured with zero cfg count",
__LINE__, cpu, ri));
- PMCDBG(MDP,CFG,1, "cpu=%d ri=%d cfg=%d pm=%p", cpu, ri, cfgflags,
- pm);
-
cpuflag = P4_CPU_TO_FLAG(cpu);
if (pm) { /* config */
@@ -1073,8 +1170,8 @@ p4_allocate_pmc(int cpu, int ri, struct pmc *pm,
/* CCCR fields */
if (caps & PMC_CAP_THRESHOLD)
- cccrvalue |= (a->pm_p4_cccrconfig & P4_CCCR_THRESHOLD_MASK) |
- P4_CCCR_COMPARE;
+ cccrvalue |= (a->pm_md.pm_p4.pm_p4_cccrconfig &
+ P4_CCCR_THRESHOLD_MASK) | P4_CCCR_COMPARE;
if (caps & PMC_CAP_EDGE)
cccrvalue |= P4_CCCR_EDGE;
@@ -1083,7 +1180,8 @@ p4_allocate_pmc(int cpu, int ri, struct pmc *pm,
cccrvalue |= P4_CCCR_COMPLEMENT;
if (p4_system_has_htt)
- cccrvalue |= a->pm_p4_cccrconfig & P4_CCCR_ACTIVE_THREAD_MASK;
+ cccrvalue |= a->pm_md.pm_p4.pm_p4_cccrconfig &
+ P4_CCCR_ACTIVE_THREAD_MASK;
else /* no HTT; thread field should be '11b' */
cccrvalue |= P4_CCCR_TO_ACTIVE_THREAD(0x3);
@@ -1096,12 +1194,14 @@ p4_allocate_pmc(int cpu, int ri, struct pmc *pm,
/* ESCR fields */
if (caps & PMC_CAP_QUALIFIER)
- escrvalue |= a->pm_p4_escrconfig & P4_ESCR_EVENT_MASK_MASK;
+ escrvalue |= a->pm_md.pm_p4.pm_p4_escrconfig &
+ P4_ESCR_EVENT_MASK_MASK;
if (caps & PMC_CAP_TAGGING)
- escrvalue |= (a->pm_p4_escrconfig & P4_ESCR_TAG_VALUE_MASK) |
- P4_ESCR_TAG_ENABLE;
+ escrvalue |= (a->pm_md.pm_p4.pm_p4_escrconfig &
+ P4_ESCR_TAG_VALUE_MASK) | P4_ESCR_TAG_ENABLE;
if (caps & PMC_CAP_QUALIFIER)
- escrvalue |= (a->pm_p4_escrconfig & P4_ESCR_EVENT_MASK_MASK);
+ escrvalue |= (a->pm_md.pm_p4.pm_p4_escrconfig &
+ P4_ESCR_EVENT_MASK_MASK);
/* HTT: T0_{OS,USR} bits may get moved to T1 at pmc start */
tflags = 0;
@@ -1434,116 +1534,150 @@ p4_stop_pmc(int cpu, int ri)
* The hardware sets the CCCR_OVF whenever a counter overflow occurs, so the handler
* examines all the 18 CCCR registers, processing the counters that have overflowed.
*
- * On HTT machines, multiple logical CPUs may try to enter the NMI service
- * routine at the same time.
+ * On HTT machines, the CCCR register is shared and will interrupt
+ * both logical processors if so configured. Thus multiple logical
+ * CPUs could enter the NMI service routine at the same time. These
+ * will get serialized using a per-cpu spinlock dedicated for use in
+ * the NMI handler.
*/
-extern volatile lapic_t *lapic;
-
-static void
-p4_lapic_enable_pmc_interrupt(void)
-{
- uint32_t value;
-
- value = lapic->lvt_pcint;
- value &= ~APIC_LVT_M;
- lapic->lvt_pcint = value;
-}
-
-
static int
p4_intr(int cpu, uintptr_t eip, int usermode)
{
- int i, pmc_interrupted;
- uint32_t cccrval, pmi_ovf_mask;
+ int i, did_interrupt, error, ri;
+ uint32_t cccrval, ovf_mask, ovf_partner;
struct p4_cpu *pc;
struct pmc_hw *phw;
struct pmc *pm;
pmc_value_t v;
- (void) eip;
- (void) usermode;
- PMCDBG(MDP,INT, 1, "cpu=%d eip=%x pcint=0x%x", cpu, eip,
- lapic->lvt_pcint);
+ PMCDBG(MDP,INT, 1, "cpu=%d eip=%p um=%d", cpu, (void *) eip, usermode);
- pmc_interrupted = 0;
- pc = (struct p4_cpu *) pmc_pcpu[cpu];
+ pc = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)];
- pmi_ovf_mask = pmc_cpu_is_logical(cpu) ?
+ ovf_mask = pmc_cpu_is_logical(cpu) ?
P4_CCCR_OVF_PMI_T1 : P4_CCCR_OVF_PMI_T0;
- pmi_ovf_mask |= P4_CCCR_OVF;
+ ovf_mask |= P4_CCCR_OVF;
+ if (p4_system_has_htt)
+ ovf_partner = pmc_cpu_is_logical(cpu) ? P4_CCCR_OVF_PMI_T0 :
+ P4_CCCR_OVF_PMI_T1;
+ else
+ ovf_partner = 0;
+ did_interrupt = 0;
+
+ if (p4_system_has_htt)
+ P4_PCPU_ACQ_INTR_SPINLOCK(pc);
/*
- * Loop through all CCCRs, looking for ones that have the
- * OVF_PMI bit set for our logical CPU.
+ * Loop through all CCCRs, looking for ones that have
+ * interrupted this CPU.
*/
+ for (i = 0; i < P4_NPMCS-1; i++) {
+
+ ri = i + 1; /* row index */
- for (i = 1; i < P4_NPMCS; i++) {
- cccrval = rdmsr(P4_CCCR_MSR_FIRST + i - 1);
+ /*
+ * Check if our partner logical CPU has already marked
+ * this PMC has having interrupted it. If so, reset
+ * the flag and process the interrupt, but leave the
+ * hardware alone.
+ */
+ if (p4_system_has_htt && P4_PCPU_GET_INTRFLAG(pc,ri)) {
+ P4_PCPU_SET_INTRFLAG(pc,ri,0);
+ did_interrupt = 1;
- if ((cccrval & pmi_ovf_mask) != pmi_ovf_mask)
+ /*
+ * Ignore de-configured or stopped PMCs.
+ * Ignore PMCs not in sampling mode.
+ */
+ phw = pc->pc_hwpmcs[ri];
+ pm = phw->phw_pmc;
+ if (pm == NULL ||
+ pm->pm_state != PMC_STATE_RUNNING ||
+ !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) {
+ continue;
+ }
+ (void) pmc_process_interrupt(cpu, pm, eip, usermode);
continue;
+ }
- v = rdmsr(P4_PERFCTR_MSR_FIRST + i - 1);
+ /*
+ * Fresh interrupt. Look for the CCCR_OVF bit
+ * and the OVF_Tx bit for this logical
+ * processor being set.
+ */
+ cccrval = rdmsr(P4_CCCR_MSR_FIRST + i);
- pmc_interrupted = 1;
+ if ((cccrval & ovf_mask) != ovf_mask)
+ continue;
- PMCDBG(MDP,INT, 2, "ri=%d v=%jx", i, v);
+ /*
+ * If the other logical CPU would also have been
+ * interrupted due to the PMC being shared, record
+ * this fact in the per-cpu saved interrupt flag
+ * bitmask.
+ */
+ if (p4_system_has_htt && (cccrval & ovf_partner))
+ P4_PCPU_SET_INTRFLAG(pc, ri, 1);
+
+ v = rdmsr(P4_PERFCTR_MSR_FIRST + i);
+
+ PMCDBG(MDP,INT, 2, "ri=%d v=%jx", ri, v);
- /* Stop the counter, and turn off the overflow bit */
+ /* Stop the counter, and reset the overflow bit */
cccrval &= ~(P4_CCCR_OVF | P4_CCCR_ENABLE);
- wrmsr(P4_CCCR_MSR_FIRST + i - 1, cccrval);
+ wrmsr(P4_CCCR_MSR_FIRST + i, cccrval);
- phw = pc->pc_hwpmcs[i];
- pm = phw->phw_pmc;
+ did_interrupt = 1;
/*
- * Ignore de-configured or stopped PMCs.
- * Also ignore counting mode PMCs that may
- * have overflowed their counters.
+ * Ignore de-configured or stopped PMCs. Ignore PMCs
+ * not in sampling mode.
*/
+ phw = pc->pc_hwpmcs[ri];
+ pm = phw->phw_pmc;
+
if (pm == NULL ||
pm->pm_state != PMC_STATE_RUNNING ||
- !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
+ !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) {
continue;
+ }
/*
- * If the previous sample hasn't been read yet, the
- * sampling interrupt is coming in too fast for the
- * rest of the system to cope. Do not re-enable the
- * counter.
+ * Process the interrupt. Re-enable the PMC if
+ * processing was successful.
*/
-
- if (P4_PCPU_SAVED_IP(pc,i,cpu)) {
- atomic_add_int(&pmc_stats.pm_intr_ignored, 1);
- continue;
- }
+ error = pmc_process_interrupt(cpu, pm, eip, usermode);
/*
- * write the the reload count and restart the
- * hardware.
+ * Only the first processor executing the NMI handler
+ * in a HTT pair will restart a PMC, and that too
+ * only if there were no errors.
*/
-
v = P4_RELOAD_COUNT_TO_PERFCTR_VALUE(
pm->pm_sc.pm_reloadcount);
- wrmsr(P4_PERFCTR_MSR_FIRST + i - 1, v);
- wrmsr(P4_CCCR_MSR_FIRST + i - 1,
- cccrval | P4_CCCR_ENABLE);
+ wrmsr(P4_PERFCTR_MSR_FIRST + i, v);
+ if (error == 0)
+ wrmsr(P4_CCCR_MSR_FIRST + i,
+ cccrval | P4_CCCR_ENABLE);
}
- if (pmc_interrupted) {
+ /* allow the other CPU to proceed */
+ if (p4_system_has_htt)
+ P4_PCPU_REL_INTR_SPINLOCK(pc);
- /*
- * On Intel CPUs, the PMC 'pcint' entry in the LAPIC
- * gets masked when a PMC interrupts the CPU. We need
- * to unmask this.
- */
- p4_lapic_enable_pmc_interrupt();
+ /*
+ * On Intel P4 CPUs, the PMC 'pcint' entry in the LAPIC gets
+ * masked when a PMC interrupts the CPU. We need to unmask
+ * the interrupt source explicitly.
+ */
- /* XXX: Invoke helper (non-NMI) interrupt here */
- }
+ if (did_interrupt)
+ pmc_x86_lapic_enable_pmc_interrupt();
+ else
+ atomic_add_int(&pmc_stats.pm_intr_ignored, 1);
- return pmc_interrupted;
+ return did_interrupt;
}
/*
OpenPOWER on IntegriCloud