summaryrefslogtreecommitdiffstats
diff options
context:
space:
mode:
-rw-r--r--etc/mtree/BSD.include.dist2
-rw-r--r--include/Makefile2
-rw-r--r--lib/libpmc/Makefile14
-rw-r--r--lib/libpmc/libpmc.c1464
-rw-r--r--lib/libpmc/pmc.3104
-rw-r--r--lib/libpmc/pmc.h50
-rw-r--r--lib/libpmc/pmclog.3276
-rw-r--r--lib/libpmc/pmclog.c532
-rw-r--r--lib/libpmc/pmclog.h146
-rw-r--r--share/man/man4/hwpmc.4131
-rw-r--r--sys/alpha/include/pmc_mdep.h13
-rw-r--r--sys/amd64/include/pmc_mdep.h51
-rw-r--r--sys/arm/include/pmc_mdep.h14
-rw-r--r--sys/conf/files1
-rw-r--r--sys/conf/files.alpha1
-rw-r--r--sys/conf/files.amd642
-rw-r--r--sys/conf/files.arm1
-rw-r--r--sys/conf/files.i3862
-rw-r--r--sys/conf/files.ia641
-rw-r--r--sys/conf/files.pc982
-rw-r--r--sys/conf/files.powerpc1
-rw-r--r--sys/conf/files.sparc641
-rw-r--r--sys/dev/hwpmc/hwpmc_alpha.c40
-rw-r--r--sys/dev/hwpmc/hwpmc_amd.c284
-rw-r--r--sys/dev/hwpmc/hwpmc_amd.h103
-rw-r--r--sys/dev/hwpmc/hwpmc_arm.c40
-rw-r--r--sys/dev/hwpmc/hwpmc_ia64.c40
-rw-r--r--sys/dev/hwpmc/hwpmc_logging.c978
-rw-r--r--sys/dev/hwpmc/hwpmc_mod.c2149
-rw-r--r--sys/dev/hwpmc/hwpmc_pentium.c3
-rw-r--r--sys/dev/hwpmc/hwpmc_pentium.h72
-rw-r--r--sys/dev/hwpmc/hwpmc_piv.c346
-rw-r--r--sys/dev/hwpmc/hwpmc_piv.h124
-rw-r--r--sys/dev/hwpmc/hwpmc_powerpc.c40
-rw-r--r--sys/dev/hwpmc/hwpmc_ppro.c168
-rw-r--r--sys/dev/hwpmc/hwpmc_ppro.h83
-rw-r--r--sys/dev/hwpmc/hwpmc_sparc64.c40
-rw-r--r--sys/dev/hwpmc/hwpmc_x86.c (renamed from sys/dev/hwpmc/hwpmc_intel.c)52
-rw-r--r--sys/dev/hwpmc/pmc_events.h530
-rw-r--r--sys/i386/include/pmc_mdep.h184
-rw-r--r--sys/ia64/include/pmc_mdep.h14
-rw-r--r--sys/kern/kern_exec.c5
-rw-r--r--sys/kern/kern_pmc.c9
-rw-r--r--sys/modules/hwpmc/Makefile32
-rw-r--r--sys/powerpc/include/pmc_mdep.h15
-rw-r--r--sys/sparc64/include/pmc_mdep.h14
-rw-r--r--sys/sys/pmc.h770
-rw-r--r--sys/sys/pmckern.h10
-rw-r--r--sys/sys/pmclog.h229
-rw-r--r--usr.sbin/pmccontrol/pmccontrol.c23
-rw-r--r--usr.sbin/pmcstat/pmcstat.860
-rw-r--r--usr.sbin/pmcstat/pmcstat.c590
52 files changed, 6945 insertions, 2913 deletions
diff --git a/etc/mtree/BSD.include.dist b/etc/mtree/BSD.include.dist
index 874be31..2b99ebb 100644
--- a/etc/mtree/BSD.include.dist
+++ b/etc/mtree/BSD.include.dist
@@ -40,6 +40,8 @@
..
firewire
..
+ hwpmc
+ ..
ic
..
ieee488
diff --git a/include/Makefile b/include/Makefile
index d38e500..4fb1ad1 100644
--- a/include/Makefile
+++ b/include/Makefile
@@ -36,7 +36,7 @@ LDIRS= bsm cam geom net net80211 netatalk netatm netgraph netinet netinet6 \
pccard posix4 sys vm
LSUBDIRS= cam/scsi \
- dev/acpica dev/an dev/bktr dev/firewire \
+ dev/acpica dev/an dev/bktr dev/firewire dev/hwpmc \
dev/ic dev/iicbus ${_dev_ieee488} dev/ofw \
dev/pbio dev/ppbus dev/smbus dev/usb dev/wi dev/utopia \
fs/devfs fs/fdescfs fs/fifofs fs/msdosfs fs/ntfs fs/nullfs \
diff --git a/lib/libpmc/Makefile b/lib/libpmc/Makefile
index 7d24d85..c2560bd 100644
--- a/lib/libpmc/Makefile
+++ b/lib/libpmc/Makefile
@@ -2,12 +2,12 @@
LIB= pmc
-SRCS= libpmc.c
-INCS= pmc.h
+SRCS= libpmc.c pmclog.c
+INCS= pmc.h pmclog.h
WARNS?= 6
-MAN= pmc.3
+MAN= pmc.3 pmclog.3
MLINKS+= \
pmc.3 pmc_allocate.3 \
@@ -19,6 +19,7 @@ MLINKS+= \
pmc.3 pmc_disable.3 \
pmc.3 pmc_enable.3 \
pmc.3 pmc_event_names_of_class.3 \
+ pmc.3 pmc_flush_logfile.3 \
pmc.3 pmc_get_driver_stats.3 \
pmc.3 pmc_init.3 \
pmc.3 pmc_name_of_capability.3 \
@@ -38,6 +39,13 @@ MLINKS+= \
pmc.3 pmc_stop.3 \
pmc.3 pmc_width.3 \
pmc.3 pmc_write.3 \
+ pmc.3 pmc_writelog.3 \
pmc.3 pmc_x86_get_msr.3
+MLINKS+= \
+ pmclog.3 pmclog_open.3 \
+ pmclog.3 pmclog_close.3 \
+ pmclog.3 pmclog_feed.3 \
+ pmclog.3 pmclog_read.3
+
.include <bsd.lib.mk>
diff --git a/lib/libpmc/libpmc.c b/lib/libpmc/libpmc.c
index 272d25a..09cc2b4 100644
--- a/lib/libpmc/libpmc.c
+++ b/lib/libpmc/libpmc.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2003,2004 Joseph Koshy
+ * Copyright (c) 2003-2005 Joseph Koshy
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -46,14 +46,17 @@ __FBSDID("$FreeBSD$");
#if defined(__i386__)
static int k7_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
struct pmc_op_pmcallocate *_pmc_config);
-static int p6_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
+#endif
+#if defined(__amd64__)
+static int k8_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
struct pmc_op_pmcallocate *_pmc_config);
+#endif
+#if defined(__i386__)
static int p4_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
struct pmc_op_pmcallocate *_pmc_config);
static int p5_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
struct pmc_op_pmcallocate *_pmc_config);
-#elif defined(__amd64__)
-static int k8_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
+static int p6_allocate_pmc(enum pmc_event _pe, char *_ctrspec,
struct pmc_op_pmcallocate *_pmc_config);
#endif
@@ -212,7 +215,7 @@ k7_allocate_pmc(enum pmc_event pe, char *ctrspec,
int c, has_unitmask;
uint32_t count, unitmask;
- pmc_config->pm_amd_config = 0;
+ pmc_config->pm_md.pm_amd.pm_amd_config = 0;
pmc_config->pm_caps |= PMC_CAP_READ;
if (pe == PMC_EV_TSC_TSC) {
@@ -226,7 +229,7 @@ k7_allocate_pmc(enum pmc_event pe, char *ctrspec,
pe == PMC_EV_K7_DC_REFILLS_FROM_SYSTEM ||
pe == PMC_EV_K7_DC_WRITEBACKS) {
has_unitmask = 1;
- unitmask = K7_PMC_UNITMASK_MOESI;
+ unitmask = AMD_PMC_UNITMASK_MOESI;
} else
unitmask = has_unitmask = 0;
@@ -243,7 +246,8 @@ k7_allocate_pmc(enum pmc_event pe, char *ctrspec,
return -1;
pmc_config->pm_caps |= PMC_CAP_THRESHOLD;
- pmc_config->pm_amd_config |= K7_PMC_TO_COUNTER(count);
+ pmc_config->pm_md.pm_amd.pm_amd_config |=
+ AMD_PMC_TO_COUNTER(count);
} else if (KWMATCH(p, K7_KW_EDGE)) {
pmc_config->pm_caps |= PMC_CAP_EDGE;
@@ -261,15 +265,15 @@ k7_allocate_pmc(enum pmc_event pe, char *ctrspec,
while ((c = tolower(*q++)) != 0)
if (c == 'm')
- unitmask |= K7_PMC_UNITMASK_M;
+ unitmask |= AMD_PMC_UNITMASK_M;
else if (c == 'o')
- unitmask |= K7_PMC_UNITMASK_O;
+ unitmask |= AMD_PMC_UNITMASK_O;
else if (c == 'e')
- unitmask |= K7_PMC_UNITMASK_E;
+ unitmask |= AMD_PMC_UNITMASK_E;
else if (c == 's')
- unitmask |= K7_PMC_UNITMASK_S;
+ unitmask |= AMD_PMC_UNITMASK_S;
else if (c == 'i')
- unitmask |= K7_PMC_UNITMASK_I;
+ unitmask |= AMD_PMC_UNITMASK_I;
else if (c == '+')
continue;
else
@@ -286,14 +290,387 @@ k7_allocate_pmc(enum pmc_event pe, char *ctrspec,
if (has_unitmask) {
pmc_config->pm_caps |= PMC_CAP_QUALIFIER;
- pmc_config->pm_amd_config |=
- K7_PMC_TO_UNITMASK(unitmask);
+ pmc_config->pm_md.pm_amd.pm_amd_config |=
+ AMD_PMC_TO_UNITMASK(unitmask);
}
return 0;
}
+#endif
+
+#if defined(__amd64__)
+
+/*
+ * AMD K8 PMCs.
+ *
+ * These are very similar to AMD K7 PMCs, but support more kinds of
+ * events.
+ */
+
+static struct pmc_event_alias k8_aliases[] = {
+ EV_ALIAS("branches", "k8-fr-retired-taken-branches"),
+ EV_ALIAS("branch-mispredicts",
+ "k8-fr-retired-taken-branches-mispredicted"),
+ EV_ALIAS("cycles", "tsc"),
+ EV_ALIAS("dc-misses", "k8-dc-miss"),
+ EV_ALIAS("ic-misses", "k8-ic-miss"),
+ EV_ALIAS("instructions", "k8-fr-retired-x86-instructions"),
+ EV_ALIAS("interrupts", "k8-fr-taken-hardware-interrupts"),
+ EV_ALIAS(NULL, NULL)
+};
+
+#define __K8MASK(N,V) PMCMASK(N,(1 << (V)))
+
+/*
+ * Parsing tables
+ */
+
+/* fp dispatched fpu ops */
+static const struct pmc_masks k8_mask_fdfo[] = {
+ __K8MASK(add-pipe-excluding-junk-ops, 0),
+ __K8MASK(multiply-pipe-excluding-junk-ops, 1),
+ __K8MASK(store-pipe-excluding-junk-ops, 2),
+ __K8MASK(add-pipe-junk-ops, 3),
+ __K8MASK(multiply-pipe-junk-ops, 4),
+ __K8MASK(store-pipe-junk-ops, 5),
+ NULLMASK
+};
+
+/* ls segment register loads */
+static const struct pmc_masks k8_mask_lsrl[] = {
+ __K8MASK(es, 0),
+ __K8MASK(cs, 1),
+ __K8MASK(ss, 2),
+ __K8MASK(ds, 3),
+ __K8MASK(fs, 4),
+ __K8MASK(gs, 5),
+ __K8MASK(hs, 6),
+ NULLMASK
+};
+
+/* ls locked operation */
+static const struct pmc_masks k8_mask_llo[] = {
+ __K8MASK(locked-instructions, 0),
+ __K8MASK(cycles-in-request, 1),
+ __K8MASK(cycles-to-complete, 2),
+ NULLMASK
+};
+
+/* dc refill from {l2,system} and dc copyback */
+static const struct pmc_masks k8_mask_dc[] = {
+ __K8MASK(invalid, 0),
+ __K8MASK(shared, 1),
+ __K8MASK(exclusive, 2),
+ __K8MASK(owner, 3),
+ __K8MASK(modified, 4),
+ NULLMASK
+};
+
+/* dc one bit ecc error */
+static const struct pmc_masks k8_mask_dobee[] = {
+ __K8MASK(scrubber, 0),
+ __K8MASK(piggyback, 1),
+ NULLMASK
+};
+
+/* dc dispatched prefetch instructions */
+static const struct pmc_masks k8_mask_ddpi[] = {
+ __K8MASK(load, 0),
+ __K8MASK(store, 1),
+ __K8MASK(nta, 2),
+ NULLMASK
+};
+
+/* dc dcache accesses by locks */
+static const struct pmc_masks k8_mask_dabl[] = {
+ __K8MASK(accesses, 0),
+ __K8MASK(misses, 1),
+ NULLMASK
+};
+
+/* bu internal l2 request */
+static const struct pmc_masks k8_mask_bilr[] = {
+ __K8MASK(ic-fill, 0),
+ __K8MASK(dc-fill, 1),
+ __K8MASK(tlb-reload, 2),
+ __K8MASK(tag-snoop, 3),
+ __K8MASK(cancelled, 4),
+ NULLMASK
+};
+
+/* bu fill request l2 miss */
+static const struct pmc_masks k8_mask_bfrlm[] = {
+ __K8MASK(ic-fill, 0),
+ __K8MASK(dc-fill, 1),
+ __K8MASK(tlb-reload, 2),
+ NULLMASK
+};
+
+/* bu fill into l2 */
+static const struct pmc_masks k8_mask_bfil[] = {
+ __K8MASK(dirty-l2-victim, 0),
+ __K8MASK(victim-from-l2, 1),
+ NULLMASK
+};
+
+/* fr retired fpu instructions */
+static const struct pmc_masks k8_mask_frfi[] = {
+ __K8MASK(x87, 0),
+ __K8MASK(mmx-3dnow, 1),
+ __K8MASK(packed-sse-sse2, 2),
+ __K8MASK(scalar-sse-sse2, 3),
+ NULLMASK
+};
+
+/* fr retired fastpath double op instructions */
+static const struct pmc_masks k8_mask_frfdoi[] = {
+ __K8MASK(low-op-pos-0, 0),
+ __K8MASK(low-op-pos-1, 1),
+ __K8MASK(low-op-pos-2, 2),
+ NULLMASK
+};
+
+/* fr fpu exceptions */
+static const struct pmc_masks k8_mask_ffe[] = {
+ __K8MASK(x87-reclass-microfaults, 0),
+ __K8MASK(sse-retype-microfaults, 1),
+ __K8MASK(sse-reclass-microfaults, 2),
+ __K8MASK(sse-and-x87-microtraps, 3),
+ NULLMASK
+};
+
+/* nb memory controller page access event */
+static const struct pmc_masks k8_mask_nmcpae[] = {
+ __K8MASK(page-hit, 0),
+ __K8MASK(page-miss, 1),
+ __K8MASK(page-conflict, 2),
+ NULLMASK
+};
+
+/* nb memory controller turnaround */
+static const struct pmc_masks k8_mask_nmct[] = {
+ __K8MASK(dimm-turnaround, 0),
+ __K8MASK(read-to-write-turnaround, 1),
+ __K8MASK(write-to-read-turnaround, 2),
+ NULLMASK
+};
+
+/* nb memory controller bypass saturation */
+static const struct pmc_masks k8_mask_nmcbs[] = {
+ __K8MASK(memory-controller-hi-pri-bypass, 0),
+ __K8MASK(memory-controller-lo-pri-bypass, 1),
+ __K8MASK(dram-controller-interface-bypass, 2),
+ __K8MASK(dram-controller-queue-bypass, 3),
+ NULLMASK
+};
+
+/* nb sized commands */
+static const struct pmc_masks k8_mask_nsc[] = {
+ __K8MASK(nonpostwrszbyte, 0),
+ __K8MASK(nonpostwrszdword, 1),
+ __K8MASK(postwrszbyte, 2),
+ __K8MASK(postwrszdword, 3),
+ __K8MASK(rdszbyte, 4),
+ __K8MASK(rdszdword, 5),
+ __K8MASK(rdmodwr, 6),
+ NULLMASK
+};
+
+/* nb probe result */
+static const struct pmc_masks k8_mask_npr[] = {
+ __K8MASK(probe-miss, 0),
+ __K8MASK(probe-hit, 1),
+ __K8MASK(probe-hit-dirty-no-memory-cancel, 2),
+ __K8MASK(probe-hit-dirty-with-memory-cancel, 3),
+ NULLMASK
+};
+
+/* nb hypertransport bus bandwidth */
+static const struct pmc_masks k8_mask_nhbb[] = { /* HT bus bandwidth */
+ __K8MASK(command, 0),
+ __K8MASK(data, 1),
+ __K8MASK(buffer-release, 2),
+ __K8MASK(nop, 3),
+ NULLMASK
+};
+
+#undef __K8MASK
+
+#define K8_KW_COUNT "count"
+#define K8_KW_EDGE "edge"
+#define K8_KW_INV "inv"
+#define K8_KW_MASK "mask"
+#define K8_KW_OS "os"
+#define K8_KW_USR "usr"
+
+static int
+k8_allocate_pmc(enum pmc_event pe, char *ctrspec,
+ struct pmc_op_pmcallocate *pmc_config)
+{
+ char *e, *p, *q;
+ int n;
+ uint32_t count, evmask;
+ const struct pmc_masks *pm, *pmask;
+
+ pmc_config->pm_caps |= PMC_CAP_READ;
+ pmc_config->pm_md.pm_amd.pm_amd_config = 0;
+
+ if (pe == PMC_EV_TSC_TSC) {
+ /* TSC events must be unqualified. */
+ if (ctrspec && *ctrspec != '\0')
+ return -1;
+ return 0;
+ }
+
+ pmask = NULL;
+ evmask = 0;
+
+#define __K8SETMASK(M) pmask = k8_mask_##M
+
+ /* setup parsing tables */
+ switch (pe) {
+ case PMC_EV_K8_FP_DISPATCHED_FPU_OPS:
+ __K8SETMASK(fdfo);
+ break;
+ case PMC_EV_K8_LS_SEGMENT_REGISTER_LOAD:
+ __K8SETMASK(lsrl);
+ break;
+ case PMC_EV_K8_LS_LOCKED_OPERATION:
+ __K8SETMASK(llo);
+ break;
+ case PMC_EV_K8_DC_REFILL_FROM_L2:
+ case PMC_EV_K8_DC_REFILL_FROM_SYSTEM:
+ case PMC_EV_K8_DC_COPYBACK:
+ __K8SETMASK(dc);
+ break;
+ case PMC_EV_K8_DC_ONE_BIT_ECC_ERROR:
+ __K8SETMASK(dobee);
+ break;
+ case PMC_EV_K8_DC_DISPATCHED_PREFETCH_INSTRUCTIONS:
+ __K8SETMASK(ddpi);
+ break;
+ case PMC_EV_K8_DC_DCACHE_ACCESSES_BY_LOCKS:
+ __K8SETMASK(dabl);
+ break;
+ case PMC_EV_K8_BU_INTERNAL_L2_REQUEST:
+ __K8SETMASK(bilr);
+ break;
+ case PMC_EV_K8_BU_FILL_REQUEST_L2_MISS:
+ __K8SETMASK(bfrlm);
+ break;
+ case PMC_EV_K8_BU_FILL_INTO_L2:
+ __K8SETMASK(bfil);
+ break;
+ case PMC_EV_K8_FR_RETIRED_FPU_INSTRUCTIONS:
+ __K8SETMASK(frfi);
+ break;
+ case PMC_EV_K8_FR_RETIRED_FASTPATH_DOUBLE_OP_INSTRUCTIONS:
+ __K8SETMASK(frfdoi);
+ break;
+ case PMC_EV_K8_FR_FPU_EXCEPTIONS:
+ __K8SETMASK(ffe);
+ break;
+ case PMC_EV_K8_NB_MEMORY_CONTROLLER_PAGE_ACCESS_EVENT:
+ __K8SETMASK(nmcpae);
+ break;
+ case PMC_EV_K8_NB_MEMORY_CONTROLLER_TURNAROUND:
+ __K8SETMASK(nmct);
+ break;
+ case PMC_EV_K8_NB_MEMORY_CONTROLLER_BYPASS_SATURATION:
+ __K8SETMASK(nmcbs);
+ break;
+ case PMC_EV_K8_NB_SIZED_COMMANDS:
+ __K8SETMASK(nsc);
+ break;
+ case PMC_EV_K8_NB_PROBE_RESULT:
+ __K8SETMASK(npr);
+ break;
+ case PMC_EV_K8_NB_HT_BUS0_BANDWIDTH:
+ case PMC_EV_K8_NB_HT_BUS1_BANDWIDTH:
+ case PMC_EV_K8_NB_HT_BUS2_BANDWIDTH:
+ __K8SETMASK(nhbb);
+ break;
+
+ default:
+ break; /* no options defined */
+ }
+
+ pmc_config->pm_caps |= PMC_CAP_WRITE;
+
+ while ((p = strsep(&ctrspec, ",")) != NULL) {
+ if (KWPREFIXMATCH(p, K8_KW_COUNT "=")) {
+ q = strchr(p, '=');
+ if (*++q == '\0') /* skip '=' */
+ return -1;
+
+ count = strtol(q, &e, 0);
+ if (e == q || *e != '\0')
+ return -1;
+
+ pmc_config->pm_caps |= PMC_CAP_THRESHOLD;
+ pmc_config->pm_md.pm_amd.pm_amd_config |=
+ AMD_PMC_TO_COUNTER(count);
+
+ } else if (KWMATCH(p, K8_KW_EDGE)) {
+ pmc_config->pm_caps |= PMC_CAP_EDGE;
+ } else if (KWMATCH(p, K8_KW_INV)) {
+ pmc_config->pm_caps |= PMC_CAP_INVERT;
+ } else if (KWPREFIXMATCH(p, K8_KW_MASK "=")) {
+ if ((n = pmc_parse_mask(pmask, p, &evmask)) < 0)
+ return -1;
+ pmc_config->pm_caps |= PMC_CAP_QUALIFIER;
+ } else if (KWMATCH(p, K8_KW_OS)) {
+ pmc_config->pm_caps |= PMC_CAP_SYSTEM;
+ } else if (KWMATCH(p, K8_KW_USR)) {
+ pmc_config->pm_caps |= PMC_CAP_USER;
+ } else
+ return -1;
+ }
+
+ /* other post processing */
+
+ switch (pe) {
+ case PMC_EV_K8_FP_DISPATCHED_FPU_OPS:
+ case PMC_EV_K8_FP_CYCLES_WITH_NO_FPU_OPS_RETIRED:
+ case PMC_EV_K8_FP_DISPATCHED_FPU_FAST_FLAG_OPS:
+ case PMC_EV_K8_FR_RETIRED_FASTPATH_DOUBLE_OP_INSTRUCTIONS:
+ case PMC_EV_K8_FR_RETIRED_FPU_INSTRUCTIONS:
+ case PMC_EV_K8_FR_FPU_EXCEPTIONS:
+ /* XXX only available in rev B and later */
+ break;
+ case PMC_EV_K8_DC_DCACHE_ACCESSES_BY_LOCKS:
+ /* XXX only available in rev C and later */
+ break;
+ case PMC_EV_K8_LS_LOCKED_OPERATION:
+ /* XXX CPU Rev A,B evmask is to be zero */
+ if (evmask & (evmask - 1)) /* > 1 bit set */
+ return -1;
+ if (evmask == 0) {
+ evmask = 0x01; /* Rev C and later: #instrs */
+ pmc_config->pm_caps |= PMC_CAP_QUALIFIER;
+ }
+ break;
+ default:
+ if (evmask == 0 && pmask != NULL) {
+ for (pm = pmask; pm->pm_name; pm++)
+ evmask |= pm->pm_value;
+ pmc_config->pm_caps |= PMC_CAP_QUALIFIER;
+ }
+ }
+
+ if (pmc_config->pm_caps & PMC_CAP_QUALIFIER)
+ pmc_config->pm_md.pm_amd.pm_amd_config =
+ AMD_PMC_TO_UNITMASK(evmask);
+
+ return 0;
+}
+
+#endif
+
+#if defined(__i386__)
+
/*
* Intel P4 PMCs
*/
@@ -629,7 +1006,8 @@ p4_allocate_pmc(enum pmc_event pe, char *ctrspec,
const struct pmc_masks *pm, *pmask;
pmc_config->pm_caps |= PMC_CAP_READ;
- pmc_config->pm_p4_cccrconfig = pmc_config->pm_p4_escrconfig = 0;
+ pmc_config->pm_md.pm_p4.pm_p4_cccrconfig =
+ pmc_config->pm_md.pm_p4.pm_p4_escrconfig = 0;
if (pe == PMC_EV_TSC_TSC) {
/* TSC must not be further qualified */
@@ -838,7 +1216,7 @@ p4_allocate_pmc(enum pmc_event pe, char *ctrspec,
return -1;
pmc_config->pm_caps |= PMC_CAP_TAGGING;
- pmc_config->pm_p4_escrconfig |=
+ pmc_config->pm_md.pm_p4.pm_p4_escrconfig |=
P4_ESCR_TO_TAG_VALUE(count);
} else if (KWPREFIXMATCH(p, P4_KW_THRESHOLD "=")) {
q = strchr(p, '=');
@@ -850,8 +1228,10 @@ p4_allocate_pmc(enum pmc_event pe, char *ctrspec,
return -1;
pmc_config->pm_caps |= PMC_CAP_THRESHOLD;
- pmc_config->pm_p4_cccrconfig &= ~P4_CCCR_THRESHOLD_MASK;
- pmc_config->pm_p4_cccrconfig |= P4_CCCR_TO_THRESHOLD(count);
+ pmc_config->pm_md.pm_p4.pm_p4_cccrconfig &=
+ ~P4_CCCR_THRESHOLD_MASK;
+ pmc_config->pm_md.pm_p4.pm_p4_cccrconfig |=
+ P4_CCCR_TO_THRESHOLD(count);
} else if (KWMATCH(p, P4_KW_USR))
pmc_config->pm_caps |= PMC_CAP_USER;
else
@@ -865,7 +1245,7 @@ p4_allocate_pmc(enum pmc_event pe, char *ctrspec,
pmc_config->pm_caps |= PMC_CAP_EDGE;
/* fill in thread activity mask */
- pmc_config->pm_p4_cccrconfig |=
+ pmc_config->pm_md.pm_p4.pm_p4_cccrconfig |=
P4_CCCR_TO_ACTIVE_THREAD(cccractivemask);
if (evmask)
@@ -896,12 +1276,29 @@ p4_allocate_pmc(enum pmc_event pe, char *ctrspec,
}
}
- pmc_config->pm_p4_escrconfig = P4_ESCR_TO_EVENT_MASK(evmask);
+ pmc_config->pm_md.pm_p4.pm_p4_escrconfig =
+ P4_ESCR_TO_EVENT_MASK(evmask);
return 0;
}
/*
+ * Pentium style PMCs
+ */
+
+static struct pmc_event_alias p5_aliases[] = {
+ EV_ALIAS("cycles", "tsc"),
+ EV_ALIAS(NULL, NULL)
+};
+
+static int
+p5_allocate_pmc(enum pmc_event pe, char *ctrspec,
+ struct pmc_op_pmcallocate *pmc_config)
+{
+ return -1 || pe || ctrspec || pmc_config; /* shut up gcc */
+}
+
+/*
* Pentium Pro style PMCs. These PMCs are found in Pentium II, Pentium III,
* and Pentium M CPUs.
*/
@@ -1034,7 +1431,7 @@ p6_allocate_pmc(enum pmc_event pe, char *ctrspec,
const struct pmc_masks *pm, *pmask;
pmc_config->pm_caps |= PMC_CAP_READ;
- pmc_config->pm_p6_config = 0;
+ pmc_config->pm_md.pm_ppro.pm_ppro_config = 0;
if (pe == PMC_EV_TSC_TSC) {
if (ctrspec && *ctrspec != '\0')
@@ -1113,7 +1510,8 @@ p6_allocate_pmc(enum pmc_event pe, char *ctrspec,
if (e == q || *e != '\0')
return -1;
pmc_config->pm_caps |= PMC_CAP_THRESHOLD;
- pmc_config->pm_p6_config |= P6_EVSEL_TO_CMASK(count);
+ pmc_config->pm_md.pm_ppro.pm_ppro_config |=
+ P6_EVSEL_TO_CMASK(count);
} else if (KWMATCH(p, P6_KW_EDGE)) {
pmc_config->pm_caps |= PMC_CAP_EDGE;
} else if (KWMATCH(p, P6_KW_INV)) {
@@ -1222,396 +1620,274 @@ p6_allocate_pmc(enum pmc_event pe, char *ctrspec,
}
if (pmc_config->pm_caps & PMC_CAP_QUALIFIER)
- pmc_config->pm_p6_config |= P6_EVSEL_TO_UMASK(evmask);
+ pmc_config->pm_md.pm_ppro.pm_ppro_config |=
+ P6_EVSEL_TO_UMASK(evmask);
return 0;
}
+#endif
+
/*
- * Pentium style PMCs
+ * API entry points
*/
-static struct pmc_event_alias p5_aliases[] = {
- EV_ALIAS("cycles", "tsc"),
- EV_ALIAS(NULL, NULL)
-};
-static int
-p5_allocate_pmc(enum pmc_event pe, char *ctrspec,
- struct pmc_op_pmcallocate *pmc_config)
+int
+pmc_allocate(const char *ctrspec, enum pmc_mode mode,
+ uint32_t flags, int cpu, pmc_id_t *pmcid)
{
- return -1 || pe || ctrspec || pmc_config; /* shut up gcc */
-}
+ int retval;
+ enum pmc_event pe;
+ char *r, *spec_copy;
+ const char *ctrname;
+ const struct pmc_event_alias *p;
+ struct pmc_op_pmcallocate pmc_config;
-#elif defined(__amd64__)
+ spec_copy = NULL;
+ retval = -1;
-/*
- * AMD K8 PMCs.
- *
- * These are very similar to AMD K7 PMCs, but support more kinds of
- * events.
- */
+ if (mode != PMC_MODE_SS && mode != PMC_MODE_TS &&
+ mode != PMC_MODE_SC && mode != PMC_MODE_TC) {
+ errno = EINVAL;
+ goto out;
+ }
-static struct pmc_event_alias k8_aliases[] = {
- EV_ALIAS("branches", "k8-fr-retired-taken-branches"),
- EV_ALIAS("branch-mispredicts",
- "k8-fr-retired-taken-branches-mispredicted"),
- EV_ALIAS("cycles", "tsc"),
- EV_ALIAS("dc-misses", "k8-dc-miss"),
- EV_ALIAS("ic-misses", "k8-ic-miss"),
- EV_ALIAS("instructions", "k8-fr-retired-x86-instructions"),
- EV_ALIAS("interrupts", "k8-fr-taken-hardware-interrupts"),
- EV_ALIAS(NULL, NULL)
-};
+ /* replace an event alias with the canonical event specifier */
+ if (pmc_mdep_event_aliases)
+ for (p = pmc_mdep_event_aliases; p->pm_alias; p++)
+ if (!strcmp(ctrspec, p->pm_alias)) {
+ spec_copy = strdup(p->pm_spec);
+ break;
+ }
-#define __K8MASK(N,V) PMCMASK(N,(1 << (V)))
+ if (spec_copy == NULL)
+ spec_copy = strdup(ctrspec);
-/*
- * Parsing tables
- */
+ r = spec_copy;
+ ctrname = strsep(&r, ",");
-/* fp dispatched fpu ops */
-static const struct pmc_masks k8_mask_fdfo[] = {
- __K8MASK(add-pipe-excluding-junk-ops, 0),
- __K8MASK(multiply-pipe-excluding-junk-ops, 1),
- __K8MASK(store-pipe-excluding-junk-ops, 2),
- __K8MASK(add-pipe-junk-ops, 3),
- __K8MASK(multiply-pipe-junk-ops, 4),
- __K8MASK(store-pipe-junk-ops, 5),
- NULLMASK
-};
+ /* look for the given counter name */
-/* ls segment register loads */
-static const struct pmc_masks k8_mask_lsrl[] = {
- __K8MASK(es, 0),
- __K8MASK(cs, 1),
- __K8MASK(ss, 2),
- __K8MASK(ds, 3),
- __K8MASK(fs, 4),
- __K8MASK(gs, 5),
- __K8MASK(hs, 6),
- NULLMASK
-};
+ for (pe = PMC_EVENT_FIRST; pe < (PMC_EVENT_LAST+1); pe++)
+ if (!strcmp(ctrname, pmc_event_table[pe].pm_ev_name))
+ break;
-/* ls locked operation */
-static const struct pmc_masks k8_mask_llo[] = {
- __K8MASK(locked-instructions, 0),
- __K8MASK(cycles-in-request, 1),
- __K8MASK(cycles-to-complete, 2),
- NULLMASK
-};
+ if (pe > PMC_EVENT_LAST) {
+ errno = EINVAL;
+ goto out;
+ }
-/* dc refill from {l2,system} and dc copyback */
-static const struct pmc_masks k8_mask_dc[] = {
- __K8MASK(invalid, 0),
- __K8MASK(shared, 1),
- __K8MASK(exclusive, 2),
- __K8MASK(owner, 3),
- __K8MASK(modified, 4),
- NULLMASK
-};
+ bzero(&pmc_config, sizeof(pmc_config));
+ pmc_config.pm_ev = pmc_event_table[pe].pm_ev_code;
+ pmc_config.pm_class = pmc_event_table[pe].pm_ev_class;
+ pmc_config.pm_cpu = cpu;
+ pmc_config.pm_mode = mode;
+ pmc_config.pm_flags = flags;
-/* dc one bit ecc error */
-static const struct pmc_masks k8_mask_dobee[] = {
- __K8MASK(scrubber, 0),
- __K8MASK(piggyback, 1),
- NULLMASK
-};
+ if (PMC_IS_SAMPLING_MODE(mode))
+ pmc_config.pm_caps |= PMC_CAP_INTERRUPT;
-/* dc dispatched prefetch instructions */
-static const struct pmc_masks k8_mask_ddpi[] = {
- __K8MASK(load, 0),
- __K8MASK(store, 1),
- __K8MASK(nta, 2),
- NULLMASK
-};
+ if (pmc_mdep_allocate_pmc(pe, r, &pmc_config) < 0) {
+ errno = EINVAL;
+ goto out;
+ }
-/* dc dcache accesses by locks */
-static const struct pmc_masks k8_mask_dabl[] = {
- __K8MASK(accesses, 0),
- __K8MASK(misses, 1),
- NULLMASK
-};
+ if (PMC_CALL(PMCALLOCATE, &pmc_config) < 0)
+ goto out;
-/* bu internal l2 request */
-static const struct pmc_masks k8_mask_bilr[] = {
- __K8MASK(ic-fill, 0),
- __K8MASK(dc-fill, 1),
- __K8MASK(tlb-reload, 2),
- __K8MASK(tag-snoop, 3),
- __K8MASK(cancelled, 4),
- NULLMASK
-};
+ *pmcid = pmc_config.pm_pmcid;
-/* bu fill request l2 miss */
-static const struct pmc_masks k8_mask_bfrlm[] = {
- __K8MASK(ic-fill, 0),
- __K8MASK(dc-fill, 1),
- __K8MASK(tlb-reload, 2),
- NULLMASK
-};
+ retval = 0;
-/* bu fill into l2 */
-static const struct pmc_masks k8_mask_bfil[] = {
- __K8MASK(dirty-l2-victim, 0),
- __K8MASK(victim-from-l2, 1),
- NULLMASK
-};
+ out:
+ if (spec_copy)
+ free(spec_copy);
-/* fr retired fpu instructions */
-static const struct pmc_masks k8_mask_frfi[] = {
- __K8MASK(x87, 0),
- __K8MASK(mmx-3dnow, 1),
- __K8MASK(packed-sse-sse2, 2),
- __K8MASK(scalar-sse-sse2, 3),
- NULLMASK
-};
+ return retval;
+}
-/* fr retired fastpath double op instructions */
-static const struct pmc_masks k8_mask_frfdoi[] = {
- __K8MASK(low-op-pos-0, 0),
- __K8MASK(low-op-pos-1, 1),
- __K8MASK(low-op-pos-2, 2),
- NULLMASK
-};
+int
+pmc_attach(pmc_id_t pmc, pid_t pid)
+{
+ struct pmc_op_pmcattach pmc_attach_args;
-/* fr fpu exceptions */
-static const struct pmc_masks k8_mask_ffe[] = {
- __K8MASK(x87-reclass-microfaults, 0),
- __K8MASK(sse-retype-microfaults, 1),
- __K8MASK(sse-reclass-microfaults, 2),
- __K8MASK(sse-and-x87-microtraps, 3),
- NULLMASK
-};
+ pmc_attach_args.pm_pmc = pmc;
+ pmc_attach_args.pm_pid = pid;
-/* nb memory controller page access event */
-static const struct pmc_masks k8_mask_nmcpae[] = {
- __K8MASK(page-hit, 0),
- __K8MASK(page-miss, 1),
- __K8MASK(page-conflict, 2),
- NULLMASK
-};
+ return PMC_CALL(PMCATTACH, &pmc_attach_args);
+}
-/* nb memory controller turnaround */
-static const struct pmc_masks k8_mask_nmct[] = {
- __K8MASK(dimm-turnaround, 0),
- __K8MASK(read-to-write-turnaround, 1),
- __K8MASK(write-to-read-turnaround, 2),
- NULLMASK
-};
+int
+pmc_capabilities(pmc_id_t pmcid, uint32_t *caps)
+{
+ unsigned int i;
+ enum pmc_class cl;
-/* nb memory controller bypass saturation */
-static const struct pmc_masks k8_mask_nmcbs[] = {
- __K8MASK(memory-controller-hi-pri-bypass, 0),
- __K8MASK(memory-controller-lo-pri-bypass, 1),
- __K8MASK(dram-controller-interface-bypass, 2),
- __K8MASK(dram-controller-queue-bypass, 3),
- NULLMASK
-};
+ cl = PMC_ID_TO_CLASS(pmcid);
+ for (i = 0; i < cpu_info.pm_nclass; i++)
+ if (cpu_info.pm_classes[i].pm_class == cl) {
+ *caps = cpu_info.pm_classes[i].pm_caps;
+ return 0;
+ }
+ return EINVAL;
+}
-/* nb sized commands */
-static const struct pmc_masks k8_mask_nsc[] = {
- __K8MASK(nonpostwrszbyte, 0),
- __K8MASK(nonpostwrszdword, 1),
- __K8MASK(postwrszbyte, 2),
- __K8MASK(postwrszdword, 3),
- __K8MASK(rdszbyte, 4),
- __K8MASK(rdszdword, 5),
- __K8MASK(rdmodwr, 6),
- NULLMASK
-};
+int
+pmc_configure_logfile(int fd)
+{
+ struct pmc_op_configurelog cla;
-/* nb probe result */
-static const struct pmc_masks k8_mask_npr[] = {
- __K8MASK(probe-miss, 0),
- __K8MASK(probe-hit, 1),
- __K8MASK(probe-hit-dirty-no-memory-cancel, 2),
- __K8MASK(probe-hit-dirty-with-memory-cancel, 3),
- NULLMASK
-};
+ cla.pm_logfd = fd;
+ if (PMC_CALL(CONFIGURELOG, &cla) < 0)
+ return -1;
+ return 0;
+}
-/* nb hypertransport bus bandwidth */
-static const struct pmc_masks k8_mask_nhbb[] = { /* HT bus bandwidth */
- __K8MASK(command, 0),
- __K8MASK(data, 1),
- __K8MASK(buffer-release, 2),
- __K8MASK(nop, 3),
- NULLMASK
-};
+int
+pmc_cpuinfo(const struct pmc_cpuinfo **pci)
+{
+ if (pmc_syscall == -1) {
+ errno = ENXIO;
+ return -1;
+ }
-#undef __K8MASK
+ /* kernel<->library, library<->userland interfaces are identical */
+ *pci = (struct pmc_cpuinfo *) &cpu_info;
+ return 0;
+}
-#define K8_KW_COUNT "count"
-#define K8_KW_EDGE "edge"
-#define K8_KW_INV "inv"
-#define K8_KW_MASK "mask"
-#define K8_KW_OS "os"
-#define K8_KW_USR "usr"
+int
+pmc_detach(pmc_id_t pmc, pid_t pid)
+{
+ struct pmc_op_pmcattach pmc_detach_args;
-static int
-k8_allocate_pmc(enum pmc_event pe, char *ctrspec,
- struct pmc_op_pmcallocate *pmc_config)
+ pmc_detach_args.pm_pmc = pmc;
+ pmc_detach_args.pm_pid = pid;
+
+ return PMC_CALL(PMCDETACH, &pmc_detach_args);
+}
+
+int
+pmc_disable(int cpu, int pmc)
{
- char *e, *p, *q;
- int n;
- uint32_t count, evmask;
- const struct pmc_masks *pm, *pmask;
+ struct pmc_op_pmcadmin ssa;
- pmc_config->pm_caps |= PMC_CAP_READ;
- pmc_config->pm_amd_config = 0;
+ ssa.pm_cpu = cpu;
+ ssa.pm_pmc = pmc;
+ ssa.pm_state = PMC_STATE_DISABLED;
+ return PMC_CALL(PMCADMIN, &ssa);
+}
- if (pe == PMC_EV_TSC_TSC) {
- /* TSC events must be unqualified. */
- if (ctrspec && *ctrspec != '\0')
- return -1;
- return 0;
- }
+int
+pmc_enable(int cpu, int pmc)
+{
+ struct pmc_op_pmcadmin ssa;
- pmask = NULL;
- evmask = 0;
+ ssa.pm_cpu = cpu;
+ ssa.pm_pmc = pmc;
+ ssa.pm_state = PMC_STATE_FREE;
+ return PMC_CALL(PMCADMIN, &ssa);
+}
-#define __K8SETMASK(M) pmask = k8_mask_##M
+/*
+ * Return a list of events known to a given PMC class. 'cl' is the
+ * PMC class identifier, 'eventnames' is the returned list of 'const
+ * char *' pointers pointing to the names of the events. 'nevents' is
+ * the number of event name pointers returned.
+ *
+ * The space for 'eventnames' is allocated using malloc(3). The caller
+ * is responsible for freeing this space when done.
+ */
- /* setup parsing tables */
- switch (pe) {
- case PMC_EV_K8_FP_DISPATCHED_FPU_OPS:
- __K8SETMASK(fdfo);
- break;
- case PMC_EV_K8_LS_SEGMENT_REGISTER_LOAD:
- __K8SETMASK(lsrl);
- break;
- case PMC_EV_K8_LS_LOCKED_OPERATION:
- __K8SETMASK(llo);
- break;
- case PMC_EV_K8_DC_REFILL_FROM_L2:
- case PMC_EV_K8_DC_REFILL_FROM_SYSTEM:
- case PMC_EV_K8_DC_COPYBACK:
- __K8SETMASK(dc);
- break;
- case PMC_EV_K8_DC_ONE_BIT_ECC_ERROR:
- __K8SETMASK(dobee);
- break;
- case PMC_EV_K8_DC_DISPATCHED_PREFETCH_INSTRUCTIONS:
- __K8SETMASK(ddpi);
- break;
- case PMC_EV_K8_DC_DCACHE_ACCESSES_BY_LOCKS:
- __K8SETMASK(dabl);
- break;
- case PMC_EV_K8_BU_INTERNAL_L2_REQUEST:
- __K8SETMASK(bilr);
- break;
- case PMC_EV_K8_BU_FILL_REQUEST_L2_MISS:
- __K8SETMASK(bfrlm);
- break;
- case PMC_EV_K8_BU_FILL_INTO_L2:
- __K8SETMASK(bfil);
- break;
- case PMC_EV_K8_FR_RETIRED_FPU_INSTRUCTIONS:
- __K8SETMASK(frfi);
- break;
- case PMC_EV_K8_FR_RETIRED_FASTPATH_DOUBLE_OP_INSTRUCTIONS:
- __K8SETMASK(frfdoi);
- break;
- case PMC_EV_K8_FR_FPU_EXCEPTIONS:
- __K8SETMASK(ffe);
- break;
- case PMC_EV_K8_NB_MEMORY_CONTROLLER_PAGE_ACCESS_EVENT:
- __K8SETMASK(nmcpae);
+int
+pmc_event_names_of_class(enum pmc_class cl, const char ***eventnames,
+ int *nevents)
+{
+ int count;
+ const char **names;
+ const struct pmc_event_descr *ev;
+
+ switch (cl)
+ {
+ case PMC_CLASS_TSC:
+ ev = &pmc_event_table[PMC_EV_TSC_TSC];
+ count = 1;
break;
- case PMC_EV_K8_NB_MEMORY_CONTROLLER_TURNAROUND:
- __K8SETMASK(nmct);
+ case PMC_CLASS_K7:
+ ev = &pmc_event_table[PMC_EV_K7_FIRST];
+ count = PMC_EV_K7_LAST - PMC_EV_K7_FIRST + 1;
break;
- case PMC_EV_K8_NB_MEMORY_CONTROLLER_BYPASS_SATURATION:
- __K8SETMASK(nmcbs);
+ case PMC_CLASS_K8:
+ ev = &pmc_event_table[PMC_EV_K8_FIRST];
+ count = PMC_EV_K8_LAST - PMC_EV_K8_FIRST + 1;
break;
- case PMC_EV_K8_NB_SIZED_COMMANDS:
- __K8SETMASK(nsc);
+ case PMC_CLASS_P5:
+ ev = &pmc_event_table[PMC_EV_P5_FIRST];
+ count = PMC_EV_P5_LAST - PMC_EV_P5_FIRST + 1;
break;
- case PMC_EV_K8_NB_PROBE_RESULT:
- __K8SETMASK(npr);
+ case PMC_CLASS_P6:
+ ev = &pmc_event_table[PMC_EV_P6_FIRST];
+ count = PMC_EV_P6_LAST - PMC_EV_P6_FIRST + 1;
break;
- case PMC_EV_K8_NB_HT_BUS0_BANDWIDTH:
- case PMC_EV_K8_NB_HT_BUS1_BANDWIDTH:
- case PMC_EV_K8_NB_HT_BUS2_BANDWIDTH:
- __K8SETMASK(nhbb);
+ case PMC_CLASS_P4:
+ ev = &pmc_event_table[PMC_EV_P4_FIRST];
+ count = PMC_EV_P4_LAST - PMC_EV_P4_FIRST + 1;
break;
-
default:
- break; /* no options defined */
+ errno = EINVAL;
+ return -1;
}
- pmc_config->pm_caps |= PMC_CAP_WRITE;
-
- while ((p = strsep(&ctrspec, ",")) != NULL) {
- if (KWPREFIXMATCH(p, K8_KW_COUNT "=")) {
- q = strchr(p, '=');
- if (*++q == '\0') /* skip '=' */
- return -1;
+ if ((names = malloc(count * sizeof(const char *))) == NULL)
+ return -1;
- count = strtol(q, &e, 0);
- if (e == q || *e != '\0')
- return -1;
+ *eventnames = names;
+ *nevents = count;
- pmc_config->pm_caps |= PMC_CAP_THRESHOLD;
- pmc_config->pm_amd_config |= K8_PMC_TO_COUNTER(count);
+ for (;count--; ev++, names++)
+ *names = ev->pm_ev_name;
+ return 0;
+}
- } else if (KWMATCH(p, K8_KW_EDGE)) {
- pmc_config->pm_caps |= PMC_CAP_EDGE;
- } else if (KWMATCH(p, K8_KW_INV)) {
- pmc_config->pm_caps |= PMC_CAP_INVERT;
- } else if (KWPREFIXMATCH(p, K8_KW_MASK "=")) {
- if ((n = pmc_parse_mask(pmask, p, &evmask)) < 0)
- return -1;
- pmc_config->pm_caps |= PMC_CAP_QUALIFIER;
- } else if (KWMATCH(p, K8_KW_OS)) {
- pmc_config->pm_caps |= PMC_CAP_SYSTEM;
- } else if (KWMATCH(p, K8_KW_USR)) {
- pmc_config->pm_caps |= PMC_CAP_USER;
- } else
- return -1;
- }
+int
+pmc_flush_logfile(void)
+{
+ return PMC_CALL(FLUSHLOG,0);
+}
- /* other post processing */
+int
+pmc_get_driver_stats(struct pmc_driverstats *ds)
+{
+ struct pmc_op_getdriverstats gms;
- switch (pe) {
- case PMC_EV_K8_FP_DISPATCHED_FPU_OPS:
- case PMC_EV_K8_FP_CYCLES_WITH_NO_FPU_OPS_RETIRED:
- case PMC_EV_K8_FP_DISPATCHED_FPU_FAST_FLAG_OPS:
- case PMC_EV_K8_FR_RETIRED_FASTPATH_DOUBLE_OP_INSTRUCTIONS:
- case PMC_EV_K8_FR_RETIRED_FPU_INSTRUCTIONS:
- case PMC_EV_K8_FR_FPU_EXCEPTIONS:
- /* XXX only available in rev B and later */
- break;
- case PMC_EV_K8_DC_DCACHE_ACCESSES_BY_LOCKS:
- /* XXX only available in rev C and later */
- break;
- case PMC_EV_K8_LS_LOCKED_OPERATION:
- /* XXX CPU Rev A,B evmask is to be zero */
- if (evmask & (evmask - 1)) /* > 1 bit set */
- return -1;
- if (evmask == 0) {
- evmask = 0x01; /* Rev C and later: #instrs */
- pmc_config->pm_caps |= PMC_CAP_QUALIFIER;
- }
- break;
- default:
- if (evmask == 0 && pmask != NULL) {
- for (pm = pmask; pm->pm_name; pm++)
- evmask |= pm->pm_value;
- pmc_config->pm_caps |= PMC_CAP_QUALIFIER;
- }
- }
+ if (PMC_CALL(GETDRIVERSTATS, &gms) < 0)
+ return -1;
- if (pmc_config->pm_caps & PMC_CAP_QUALIFIER)
- pmc_config->pm_amd_config = K8_PMC_TO_UNITMASK(evmask);
+ /* copy out fields in the current userland<->library interface */
+ ds->pm_intr_ignored = gms.pm_intr_ignored;
+ ds->pm_intr_processed = gms.pm_intr_processed;
+ ds->pm_intr_bufferfull = gms.pm_intr_bufferfull;
+ ds->pm_syscalls = gms.pm_syscalls;
+ ds->pm_syscall_errors = gms.pm_syscall_errors;
+ ds->pm_buffer_requests = gms.pm_buffer_requests;
+ ds->pm_buffer_requests_failed = gms.pm_buffer_requests_failed;
+ ds->pm_log_sweeps = gms.pm_log_sweeps;
return 0;
}
-#endif
-/*
- * API entry points
- */
+int
+pmc_get_msr(pmc_id_t pmc, uint32_t *msr)
+{
+ struct pmc_op_getmsr gm;
+
+ gm.pm_pmcid = pmc;
+ if (PMC_CALL(PMCGETMSR, &gm) < 0)
+ return -1;
+ *msr = gm.pm_msr;
+ return 0;
+}
int
pmc_init(void)
@@ -1633,12 +1909,13 @@ pmc_init(void)
pmc_syscall = pmc_modstat.data.intval;
- /* check ABI version against compiled-in version */
+ /* check the kernel module's ABI against our compiled-in version */
+ abi_version = PMC_VERSION;
if (PMC_CALL(GETMODULEVERSION, &abi_version) < 0)
return (pmc_syscall = -1);
- /* ignore patch numbers for the comparision */
- if ((abi_version & 0xFFFF0000) != (PMC_VERSION & 0xFFFF0000)) {
+ /* ignore patch & minor numbers for the comparision */
+ if ((abi_version & 0xFF000000) != (PMC_VERSION & 0xFF000000)) {
errno = EPROGMISMATCH;
return (pmc_syscall = -1);
}
@@ -1688,128 +1965,145 @@ pmc_init(void)
return 0;
}
-int
-pmc_allocate(const char *ctrspec, enum pmc_mode mode,
- uint32_t flags, int cpu, pmc_id_t *pmcid)
+const char *
+pmc_name_of_capability(enum pmc_caps cap)
{
- int retval;
- enum pmc_event pe;
- char *r, *spec_copy;
- const char *ctrname;
- const struct pmc_event_alias *p;
- struct pmc_op_pmcallocate pmc_config;
+ int i;
- spec_copy = NULL;
- retval = -1;
+ /*
+ * 'cap' should have a single bit set and should be in
+ * range.
+ */
- if (mode != PMC_MODE_SS && mode != PMC_MODE_TS &&
- mode != PMC_MODE_SC && mode != PMC_MODE_TC) {
+ if ((cap & (cap - 1)) || cap < PMC_CAP_FIRST ||
+ cap > PMC_CAP_LAST) {
errno = EINVAL;
- goto out;
+ return NULL;
}
- /* replace an event alias with the canonical event specifier */
- if (pmc_mdep_event_aliases)
- for (p = pmc_mdep_event_aliases; p->pm_alias; p++)
- if (!strcmp(ctrspec, p->pm_alias)) {
- spec_copy = strdup(p->pm_spec);
- break;
- }
-
- if (spec_copy == NULL)
- spec_copy = strdup(ctrspec);
+ i = ffs(cap);
- r = spec_copy;
- ctrname = strsep(&r, ",");
+ return pmc_capability_names[i - 1];
+}
- /* look for the given counter name */
+const char *
+pmc_name_of_class(enum pmc_class pc)
+{
+ if ((int) pc >= PMC_CLASS_FIRST &&
+ pc <= PMC_CLASS_LAST)
+ return pmc_class_names[pc];
- for (pe = PMC_EVENT_FIRST; pe < (PMC_EVENT_LAST+1); pe++)
- if (!strcmp(ctrname, pmc_event_table[pe].pm_ev_name))
- break;
+ errno = EINVAL;
+ return NULL;
+}
- if (pe > PMC_EVENT_LAST) {
- errno = EINVAL;
- goto out;
- }
+const char *
+pmc_name_of_cputype(enum pmc_cputype cp)
+{
+ if ((int) cp >= PMC_CPU_FIRST &&
+ cp <= PMC_CPU_LAST)
+ return pmc_cputype_names[cp];
+ errno = EINVAL;
+ return NULL;
+}
- bzero(&pmc_config, sizeof(pmc_config));
- pmc_config.pm_ev = pmc_event_table[pe].pm_ev_code;
- pmc_config.pm_class = pmc_event_table[pe].pm_ev_class;
- pmc_config.pm_cpu = cpu;
- pmc_config.pm_mode = mode;
- pmc_config.pm_flags = flags;
+const char *
+pmc_name_of_disposition(enum pmc_disp pd)
+{
+ if ((int) pd >= PMC_DISP_FIRST &&
+ pd <= PMC_DISP_LAST)
+ return pmc_disposition_names[pd];
- if (PMC_IS_SAMPLING_MODE(mode))
- pmc_config.pm_caps |= PMC_CAP_INTERRUPT;
+ errno = EINVAL;
+ return NULL;
+}
- if (pmc_mdep_allocate_pmc(pe, r, &pmc_config) < 0) {
- errno = EINVAL;
- goto out;
- }
+const char *
+pmc_name_of_event(enum pmc_event pe)
+{
+ if ((int) pe >= PMC_EVENT_FIRST &&
+ pe <= PMC_EVENT_LAST)
+ return pmc_event_table[pe].pm_ev_name;
- if (PMC_CALL(PMCALLOCATE, &pmc_config) < 0)
- goto out;
+ errno = EINVAL;
+ return NULL;
+}
- *pmcid = pmc_config.pm_pmcid;
+const char *
+pmc_name_of_mode(enum pmc_mode pm)
+{
+ if ((int) pm >= PMC_MODE_FIRST &&
+ pm <= PMC_MODE_LAST)
+ return pmc_mode_names[pm];
- retval = 0;
+ errno = EINVAL;
+ return NULL;
+}
- out:
- if (spec_copy)
- free(spec_copy);
+const char *
+pmc_name_of_state(enum pmc_state ps)
+{
+ if ((int) ps >= PMC_STATE_FIRST &&
+ ps <= PMC_STATE_LAST)
+ return pmc_state_names[ps];
- return retval;
+ errno = EINVAL;
+ return NULL;
}
int
-pmc_attach(pmc_id_t pmc, pid_t pid)
+pmc_ncpu(void)
{
- struct pmc_op_pmcattach pmc_attach_args;
-
- pmc_attach_args.pm_pmc = pmc;
- pmc_attach_args.pm_pid = pid;
+ if (pmc_syscall == -1) {
+ errno = ENXIO;
+ return -1;
+ }
- return PMC_CALL(PMCATTACH, &pmc_attach_args);
+ return cpu_info.pm_ncpu;
}
int
-pmc_detach(pmc_id_t pmc, pid_t pid)
+pmc_npmc(int cpu)
{
- struct pmc_op_pmcattach pmc_detach_args;
+ if (pmc_syscall == -1) {
+ errno = ENXIO;
+ return -1;
+ }
- pmc_detach_args.pm_pmc = pmc;
- pmc_detach_args.pm_pid = pid;
+ if (cpu < 0 || cpu >= (int) cpu_info.pm_ncpu) {
+ errno = EINVAL;
+ return -1;
+ }
- return PMC_CALL(PMCDETACH, &pmc_detach_args);
+ return cpu_info.pm_npmc;
}
int
-pmc_release(pmc_id_t pmc)
+pmc_pmcinfo(int cpu, struct pmc_pmcinfo **ppmci)
{
- struct pmc_op_simple pmc_release_args;
+ int nbytes, npmc;
+ struct pmc_op_getpmcinfo *pmci;
- pmc_release_args.pm_pmcid = pmc;
+ if ((npmc = pmc_npmc(cpu)) < 0)
+ return -1;
- return PMC_CALL(PMCRELEASE, &pmc_release_args);
-}
+ nbytes = sizeof(struct pmc_op_getpmcinfo) +
+ npmc * sizeof(struct pmc_info);
-int
-pmc_start(pmc_id_t pmc)
-{
- struct pmc_op_simple pmc_start_args;
+ if ((pmci = calloc(1, nbytes)) == NULL)
+ return -1;
- pmc_start_args.pm_pmcid = pmc;
- return PMC_CALL(PMCSTART, &pmc_start_args);
-}
+ pmci->pm_cpu = cpu;
-int
-pmc_stop(pmc_id_t pmc)
-{
- struct pmc_op_simple pmc_stop_args;
+ if (PMC_CALL(GETPMCINFO, pmci) < 0) {
+ free(pmci);
+ return -1;
+ }
- pmc_stop_args.pm_pmcid = pmc;
- return PMC_CALL(PMCSTOP, &pmc_stop_args);
+ /* kernel<->library, library<->userland interfaces are identical */
+ *ppmci = (struct pmc_pmcinfo *) pmci;
+
+ return 0;
}
int
@@ -1830,15 +2124,13 @@ pmc_read(pmc_id_t pmc, pmc_value_t *value)
}
int
-pmc_write(pmc_id_t pmc, pmc_value_t value)
+pmc_release(pmc_id_t pmc)
{
- struct pmc_op_pmcrw pmc_write_op;
+ struct pmc_op_simple pmc_release_args;
- pmc_write_op.pm_pmcid = pmc;
- pmc_write_op.pm_flags = PMC_F_NEWVALUE;
- pmc_write_op.pm_value = value;
+ pmc_release_args.pm_pmcid = pmc;
- return PMC_CALL(PMCRW, &pmc_write_op);
+ return PMC_CALL(PMCRELEASE, &pmc_release_args);
}
int
@@ -1874,111 +2166,21 @@ pmc_set(pmc_id_t pmc, pmc_value_t value)
}
int
-pmc_configure_logfile(int fd)
-{
- struct pmc_op_configurelog cla;
-
- cla.pm_logfd = fd;
- if (PMC_CALL(CONFIGURELOG, &cla) < 0)
- return -1;
-
- return 0;
-}
-
-int
-pmc_get_driver_stats(struct pmc_op_getdriverstats *gms)
-{
- return PMC_CALL(GETDRIVERSTATS, gms);
-}
-
-int
-pmc_ncpu(void)
-{
- if (pmc_syscall == -1) {
- errno = ENXIO;
- return -1;
- }
-
- return cpu_info.pm_ncpu;
-}
-
-int
-pmc_npmc(int cpu)
-{
- if (pmc_syscall == -1) {
- errno = ENXIO;
- return -1;
- }
-
- if (cpu < 0 || cpu >= (int) cpu_info.pm_ncpu) {
- errno = EINVAL;
- return -1;
- }
-
- return cpu_info.pm_npmc;
-}
-
-int
-pmc_enable(int cpu, int pmc)
-{
- struct pmc_op_pmcadmin ssa;
-
- ssa.pm_cpu = cpu;
- ssa.pm_pmc = pmc;
- ssa.pm_state = PMC_STATE_FREE;
- return PMC_CALL(PMCADMIN, &ssa);
-}
-
-int
-pmc_disable(int cpu, int pmc)
-{
- struct pmc_op_pmcadmin ssa;
-
- ssa.pm_cpu = cpu;
- ssa.pm_pmc = pmc;
- ssa.pm_state = PMC_STATE_DISABLED;
- return PMC_CALL(PMCADMIN, &ssa);
-}
-
-
-int
-pmc_pmcinfo(int cpu, struct pmc_op_getpmcinfo **ppmci)
+pmc_start(pmc_id_t pmc)
{
- int nbytes, npmc, saved_errno;
- struct pmc_op_getpmcinfo *pmci;
-
- if ((npmc = pmc_npmc(cpu)) < 0)
- return -1;
-
- nbytes = sizeof(struct pmc_op_getpmcinfo) +
- npmc * sizeof(struct pmc_info);
-
- if ((pmci = calloc(1, nbytes)) == NULL)
- return -1;
-
- pmci->pm_cpu = cpu;
-
- if (PMC_CALL(GETPMCINFO, pmci) < 0) {
- saved_errno = errno;
- free(pmci);
- errno = saved_errno;
- return -1;
- }
+ struct pmc_op_simple pmc_start_args;
- *ppmci = pmci;
- return 0;
+ pmc_start_args.pm_pmcid = pmc;
+ return PMC_CALL(PMCSTART, &pmc_start_args);
}
int
-pmc_cpuinfo(const struct pmc_op_getcpuinfo **pci)
+pmc_stop(pmc_id_t pmc)
{
- if (pmc_syscall == -1) {
- errno = ENXIO;
- return -1;
- }
+ struct pmc_op_simple pmc_stop_args;
- *pci = &cpu_info;
- return 0;
+ pmc_stop_args.pm_pmcid = pmc;
+ return PMC_CALL(PMCSTOP, &pmc_stop_args);
}
int
@@ -1997,182 +2199,22 @@ pmc_width(pmc_id_t pmcid, uint32_t *width)
}
int
-pmc_capabilities(pmc_id_t pmcid, uint32_t *caps)
-{
- unsigned int i;
- enum pmc_class cl;
-
- cl = PMC_ID_TO_CLASS(pmcid);
- for (i = 0; i < cpu_info.pm_nclass; i++)
- if (cpu_info.pm_classes[i].pm_class == cl) {
- *caps = cpu_info.pm_classes[i].pm_caps;
- return 0;
- }
- return EINVAL;
-}
-
-const char *
-pmc_name_of_cputype(enum pmc_cputype cp)
-{
- if ((int) cp >= PMC_CPU_FIRST &&
- cp <= PMC_CPU_LAST)
- return pmc_cputype_names[cp];
- errno = EINVAL;
- return NULL;
-}
-
-const char *
-pmc_name_of_class(enum pmc_class pc)
-{
- if ((int) pc >= PMC_CLASS_FIRST &&
- pc <= PMC_CLASS_LAST)
- return pmc_class_names[pc];
-
- errno = EINVAL;
- return NULL;
-}
-
-const char *
-pmc_name_of_mode(enum pmc_mode pm)
-{
- if ((int) pm >= PMC_MODE_FIRST &&
- pm <= PMC_MODE_LAST)
- return pmc_mode_names[pm];
-
- errno = EINVAL;
- return NULL;
-}
-
-const char *
-pmc_name_of_event(enum pmc_event pe)
-{
- if ((int) pe >= PMC_EVENT_FIRST &&
- pe <= PMC_EVENT_LAST)
- return pmc_event_table[pe].pm_ev_name;
-
- errno = EINVAL;
- return NULL;
-}
-
-const char *
-pmc_name_of_state(enum pmc_state ps)
-{
- if ((int) ps >= PMC_STATE_FIRST &&
- ps <= PMC_STATE_LAST)
- return pmc_state_names[ps];
-
- errno = EINVAL;
- return NULL;
-}
-
-const char *
-pmc_name_of_disposition(enum pmc_disp pd)
-{
- if ((int) pd >= PMC_DISP_FIRST &&
- pd <= PMC_DISP_LAST)
- return pmc_disposition_names[pd];
-
- errno = EINVAL;
- return NULL;
-}
-
-const char *
-pmc_name_of_capability(enum pmc_caps cap)
-{
- int i;
-
- /*
- * 'cap' should have a single bit set and should be in
- * range.
- */
-
- if ((cap & (cap - 1)) || cap < PMC_CAP_FIRST ||
- cap > PMC_CAP_LAST) {
- errno = EINVAL;
- return NULL;
- }
-
- i = ffs(cap);
-
- return pmc_capability_names[i - 1];
-}
-
-/*
- * Return a list of events known to a given PMC class. 'cl' is the
- * PMC class identifier, 'eventnames' is the returned list of 'const
- * char *' pointers pointing to the names of the events. 'nevents' is
- * the number of event name pointers returned.
- *
- * The space for 'eventnames' is allocated using malloc(3). The caller
- * is responsible for freeing this space when done.
- */
-
-int
-pmc_event_names_of_class(enum pmc_class cl, const char ***eventnames,
- int *nevents)
+pmc_write(pmc_id_t pmc, pmc_value_t value)
{
- int count;
- const char **names;
- const struct pmc_event_descr *ev;
-
- switch (cl)
- {
- case PMC_CLASS_TSC:
- ev = &pmc_event_table[PMC_EV_TSC_TSC];
- count = 1;
- break;
- case PMC_CLASS_K7:
- ev = &pmc_event_table[PMC_EV_K7_FIRST];
- count = PMC_EV_K7_LAST - PMC_EV_K7_FIRST + 1;
- break;
- case PMC_CLASS_K8:
- ev = &pmc_event_table[PMC_EV_K8_FIRST];
- count = PMC_EV_K8_LAST - PMC_EV_K8_FIRST + 1;
- break;
- case PMC_CLASS_P5:
- ev = &pmc_event_table[PMC_EV_P5_FIRST];
- count = PMC_EV_P5_LAST - PMC_EV_P5_FIRST + 1;
- break;
- case PMC_CLASS_P6:
- ev = &pmc_event_table[PMC_EV_P6_FIRST];
- count = PMC_EV_P6_LAST - PMC_EV_P6_FIRST + 1;
- break;
- case PMC_CLASS_P4:
- ev = &pmc_event_table[PMC_EV_P4_FIRST];
- count = PMC_EV_P4_LAST - PMC_EV_P4_FIRST + 1;
- break;
- default:
- errno = EINVAL;
- return -1;
- }
-
- if ((names = malloc(count * sizeof(const char *))) == NULL)
- return -1;
+ struct pmc_op_pmcrw pmc_write_op;
- *eventnames = names;
- *nevents = count;
+ pmc_write_op.pm_pmcid = pmc;
+ pmc_write_op.pm_flags = PMC_F_NEWVALUE;
+ pmc_write_op.pm_value = value;
- for (;count--; ev++, names++)
- *names = ev->pm_ev_name;
- return 0;
+ return PMC_CALL(PMCRW, &pmc_write_op);
}
-/*
- * Architecture specific APIs
- */
-
-#if defined(__i386__) || defined(__amd64__)
-
int
-pmc_x86_get_msr(pmc_id_t pmc, uint32_t *msr)
+pmc_writelog(uint32_t userdata)
{
- struct pmc_op_x86_getmsr gm;
+ struct pmc_op_writelog wl;
- gm.pm_pmcid = pmc;
- if (PMC_CALL(PMCX86GETMSR, &gm) < 0)
- return -1;
- *msr = gm.pm_msr;
- return 0;
+ wl.pm_userdata = userdata;
+ return PMC_CALL(WRITELOG, &wl);
}
-
-#endif
diff --git a/lib/libpmc/pmc.3 b/lib/libpmc/pmc.3
index 0612ce7..7a771d4 100644
--- a/lib/libpmc/pmc.3
+++ b/lib/libpmc/pmc.3
@@ -1,4 +1,4 @@
-.\" Copyright (c) 2003 Joseph Koshy. All rights reserved.
+.\" Copyright (c) 2003-2005 Joseph Koshy. All rights reserved.
.\"
.\" Redistribution and use in source and binary forms, with or without
.\" modification, are permitted provided that the following conditions
@@ -36,7 +36,9 @@
.Nm pmc_disable ,
.Nm pmc_enable ,
.Nm pmc_event_names_of_class ,
+.Nm pmc_flush_logfile ,
.Nm pmc_get_driver_stats ,
+.Nm pmc_get_msr ,
.Nm pmc_init ,
.Nm pmc_name_of_capability ,
.Nm pmc_name_of_class ,
@@ -53,9 +55,9 @@
.Nm pmc_set ,
.Nm pmc_start ,
.Nm pmc_stop ,
-.Nm pmc_write ,
.Nm pmc_width ,
-.Nm pmc_x86_get_msr
+.Nm pmc_write ,
+.Nm pmc_writelog
.Nd programming API for using hardware performance monitoring counters
.Sh LIBRARY
.Lb libpmc
@@ -79,7 +81,7 @@
.Ft int
.Fn pmc_configure_logfile "int fd"
.Ft int
-.Fn pmc_cpuinfo "const struct pmc_op_getcpuinfo **cpu_info"
+.Fn pmc_cpuinfo "const struct pmc_cpuinfo **cpu_info"
.Ft int
.Fo pmc_detach
.Fa "pmc_id_t pmcid"
@@ -96,7 +98,11 @@
.Fa "int *nevents"
.Fc
.Ft int
-.Fn pmc_get_driver_stats "struct pmc_op_getdriverstats *gms"
+.Fn pmc_flush_logfile "void"
+.Ft int
+.Fn pmc_get_driver_stats "struct pmc_driverstats *gms"
+.Ft int
+.Fn pmc_get_msr "pmc_id_t pmc" "uint32_t *msr"
.Ft int
.Fn pmc_init "void"
.Ft "const char *"
@@ -118,7 +124,7 @@
.Ft int
.Fn pmc_npmc "uint32_t cpu"
.Ft int
-.Fn pmc_pmcinfo "uint32_t cpu" "struct pmc_op_getpmcinfo **pmc_info"
+.Fn pmc_pmcinfo "uint32_t cpu" "struct pmc_pmcinfo **pmc_info"
.Ft int
.Fn pmc_read "pmc_id_t pmc" "pmc_value_t *value"
.Ft int
@@ -134,9 +140,9 @@
.Ft int
.Fn pmc_write "pmc_id_t pmc" "pmc_value_t value"
.Ft int
-.Fn pmc_width "pmc_id_t pmc" "uint32_t *width"
+.Fn pmc_writelog "uint32_t userdata"
.Ft int
-.Fn pmc_x86_get_msr "int pmc" "uint32_t *msr"
+.Fn pmc_width "pmc_id_t pmc" "uint32_t *width"
.Sh DESCRIPTION
These functions implement a high-level library for using the
system's hardware performance counters.
@@ -276,9 +282,24 @@ The
.Fn pmc_configure_logfile
function causes the
.Xr hwpmc 4
-driver to log system wide performance data to file corresponding
+driver to log performance data to file corresponding
to the process' file handle
.Fa fd .
+If argument
+.Fa fd
+is -1, then any previously configured logging is reset
+and all data queued to be written are discarded.
+.Pp
+The
+.Fn pmc_flush_logfile
+function will send all data queued inside the
+.Xr hwpmc 4
+driver to the configured log file before returning.
+The
+.Fn pmc_writelog
+function will append a log entry containing the argument
+.Fa userdata
+to the log file.
.Pp
.Fn pmc_set
configures an sampling PMC
@@ -307,8 +328,19 @@ module is unloaded using
processes that have PMCs allocated to them will be sent a
SIGBUS signal.
.It SIGIO
-Attempting to read a PMC that is not currently attached to a running
-process will cause a SIGIO signal to be sent to the reader.
+The
+.Xr hwpmc 4
+driver will send a PMC owning process a SIGIO signal if:
+.Bl -bullet
+.It
+If any process-mode PMC allocated by it loses all its
+target processes.
+.It
+If the driver encounters an error when writing log data to a
+configured log file.
+This error may be retrieved by a subsequent call to
+.Fn pmc_flush_logfile .
+.El
.El
.Ss CONVENIENCE FUNCTIONS
.Fn pmc_ncpu
@@ -321,10 +353,18 @@ returns the number of PMCs supported on CPU
sets argument
.Fa cpu_info
to point to a structure with information about the system's CPUs.
+Function
.Fn pmc_pmcinfo
returns information about the current state of CPU
.Fa cpu Ap s
PMCs.
+This function sets argument
+.Fa *pmc_info
+to point to a memory area allocated with
+.Xr calloc 3 .
+The caller is expected to
+.Fn free
+the area when done.
.Pp
The functions
.Fn pmc_name_of_capability ,
@@ -370,7 +410,7 @@ is the index of the PMC to be operated on.
Only the super-user is allowed to enable and disable PMCs.
.Ss X86 ARCHITECTURE SPECIFIC API
The
-.Fn pmc_x86_get_msr
+.Fn pmc_get_msr
function returns the processor model specific register number
associated with
.Fa pmc .
@@ -3096,25 +3136,39 @@ was unrecognized for this cpu type.
.Pp
Calls to
.Fn pmc_attach ,
+.Fn pmc_configure_logfile ,
.Fn pmc_detach ,
+.Fn pmc_disable ,
+.Fn pmc_enable ,
+.Fn pmc_get_driver_stats ,
+.Fn pmc_get_msr ,
+.Fn pmc_read ,
.Fn pmc_release ,
+.Fn pmc_rw ,
+.Fn pmc_set ,
.Fn pmc_start ,
.Fn pmc_stop ,
-.Fn pmc_read ,
.Fn pmc_write ,
-.Fn pmc_rw ,
-.Fn pmc_set ,
-.Fn pmc_configure_logfile ,
-.Fn pmc_get_driver_stats ,
-.Fn pmc_enable ,
-.Fn pmc_disable ,
and
-.Fn pmc_x86_get_msr
+.Fn pmc_writelog
may fail with the errors described in
.Xr hwpmc 4 .
+.Pp
+If a log file was configured using
+.Fn pmc_configure_logfile
+and the
+.Xr hwpmc 4
+driver encountered an error while logging data to it, then
+logging will be stopped and a subsequent call to
+.Fn pmc_flush_logfile
+will fail with the error code seen by the
+.Xr hwpmc 4
+driver.
.Sh SEE ALSO
.Xr modfind 2 ,
.Xr modstat 2 ,
+.Xr calloc 3 ,
+.Xr pmclog 3 ,
.Xr hwpmc 4 ,
.Xr pmccontrol 8 ,
.Xr pmcreport 8 ,
@@ -3126,12 +3180,6 @@ The information returned by
and possibly
.Fn pmc_npmc
should really be available all the time, through a better designed
-interface.
-.Pp
-The API for
-.Fn pmc_cpuinfo
-and
-.Fn pmc_pmcinfo
-expose too much of the underlying
+interface and not just when
.Xr hwpmc 4
-driver's internals to userland.
+is present in the kernel.
diff --git a/lib/libpmc/pmc.h b/lib/libpmc/pmc.h
index 7ee257b..ee3f772 100644
--- a/lib/libpmc/pmc.h
+++ b/lib/libpmc/pmc.h
@@ -32,6 +32,39 @@
#include <sys/pmc.h>
/*
+ * Driver statistics.
+ */
+struct pmc_driverstats {
+ int pm_intr_ignored; /* #interrupts ignored */
+ int pm_intr_processed; /* #interrupts processed */
+ int pm_intr_bufferfull; /* #interrupts with ENOSPC */
+ int pm_syscalls; /* #syscalls */
+ int pm_syscall_errors; /* #syscalls with errors */
+ int pm_buffer_requests; /* #buffer requests */
+ int pm_buffer_requests_failed; /* #failed buffer requests */
+ int pm_log_sweeps; /* #sample buffer processing passes */
+};
+
+/*
+ * CPU information.
+ */
+struct pmc_cpuinfo {
+ enum pmc_cputype pm_cputype; /* the kind of CPU */
+ uint32_t pm_ncpu; /* number of CPUs */
+ uint32_t pm_npmc; /* #PMCs per CPU */
+ uint32_t pm_nclass; /* #classes of PMCs */
+ struct pmc_classinfo pm_classes[PMC_CLASS_MAX];
+};
+
+/*
+ * Current PMC state.
+ */
+struct pmc_pmcinfo {
+ int32_t pm_cpu; /* CPU number */
+ struct pmc_info pm_pmcs[]; /* NPMC structs */
+};
+
+/*
* Prototypes
*/
@@ -40,10 +73,12 @@ int pmc_allocate(const char *_ctrspec, enum pmc_mode _mode, uint32_t _flags,
int pmc_attach(pmc_id_t _pmcid, pid_t _pid);
int pmc_capabilities(pmc_id_t _pmc, uint32_t *_caps);
int pmc_configure_logfile(int _fd);
+int pmc_flush_logfile(void);
int pmc_detach(pmc_id_t _pmcid, pid_t _pid);
int pmc_disable(int _cpu, int _pmc);
int pmc_enable(int _cpu, int _pmc);
-int pmc_get_driver_stats(struct pmc_op_getdriverstats *_gms);
+int pmc_get_driver_stats(struct pmc_driverstats *_gms);
+int pmc_get_msr(pmc_id_t _pmc, uint32_t *_msr);
int pmc_init(void);
int pmc_read(pmc_id_t _pmc, pmc_value_t *_value);
int pmc_release(pmc_id_t _pmc);
@@ -53,11 +88,12 @@ int pmc_start(pmc_id_t _pmc);
int pmc_stop(pmc_id_t _pmc);
int pmc_width(pmc_id_t _pmc, uint32_t *_width);
int pmc_write(pmc_id_t _pmc, pmc_value_t _value);
+int pmc_writelog(uint32_t _udata);
int pmc_ncpu(void);
int pmc_npmc(int _cpu);
-int pmc_cpuinfo(const struct pmc_op_getcpuinfo **_cpu_info);
-int pmc_pmcinfo(int _cpu, struct pmc_op_getpmcinfo **_pmc_info);
+int pmc_cpuinfo(const struct pmc_cpuinfo **_cpu_info);
+int pmc_pmcinfo(int _cpu, struct pmc_pmcinfo **_pmc_info);
const char *pmc_name_of_capability(uint32_t _c);
const char *pmc_name_of_class(enum pmc_class _pc);
@@ -70,12 +106,4 @@ const char *pmc_name_of_state(enum pmc_state _ps);
int pmc_event_names_of_class(enum pmc_class _cl, const char ***_eventnames,
int *_nevents);
-/*
- * Architecture specific extensions
- */
-
-#if __i386__ || __amd64__
-int pmc_x86_get_msr(pmc_id_t _pmc, uint32_t *_msr);
-#endif
-
#endif
diff --git a/lib/libpmc/pmclog.3 b/lib/libpmc/pmclog.3
new file mode 100644
index 0000000..1487e90
--- /dev/null
+++ b/lib/libpmc/pmclog.3
@@ -0,0 +1,276 @@
+.\" Copyright (c) 2005 Joseph Koshy. All rights reserved.
+.\"
+.\" Redistribution and use in source and binary forms, with or without
+.\" modification, are permitted provided that the following conditions
+.\" are met:
+.\" 1. Redistributions of source code must retain the above copyright
+.\" notice, this list of conditions and the following disclaimer.
+.\" 2. Redistributions in binary form must reproduce the above copyright
+.\" notice, this list of conditions and the following disclaimer in the
+.\" documentation and/or other materials provided with the distribution.
+.\"
+.\" This software is provided by Joseph Koshy ``as is'' and
+.\" any express or implied warranties, including, but not limited to, the
+.\" implied warranties of merchantability and fitness for a particular purpose
+.\" are disclaimed. in no event shall Joseph Koshy be liable
+.\" for any direct, indirect, incidental, special, exemplary, or consequential
+.\" damages (including, but not limited to, procurement of substitute goods
+.\" or services; loss of use, data, or profits; or business interruption)
+.\" however caused and on any theory of liability, whether in contract, strict
+.\" liability, or tort (including negligence or otherwise) arising in any way
+.\" out of the use of this software, even if advised of the possibility of
+.\" such damage.
+.\"
+.\" $FreeBSD$
+.\"
+.Dd Jun 1, 2005
+.Os
+.Dt PMCLOG 3
+.Sh NAME
+.Nm pmclog_open ,
+.Nm pmclog_close ,
+.Nm pmclog_read ,
+.Nm pmclog_feed
+.Nd parse event log data generated by
+.Xr hwpmc 4
+.Sh LIBRARY
+.Lb libpmc
+.Sh SYNOPSIS
+.In pmclog.h
+.Ft "void *"
+.Fn pmclog_open "int fd"
+.Ft void
+.Fn pmclog_close "void *cookie"
+.Ft int
+.Fn pmclog_read "void *cookie" "struct pmclog_ev *ev"
+.Ft int
+.Fn pmclog_feed "void *cookie" "char *data" "int len"
+.Sh DESCRIPTION
+These functions provide a way for application programs to extract
+events from an event stream generated by
+.Xr hwpmc 4 .
+.Pp
+A new event log parser is allocated using
+.Fn pmclog_open .
+Argument
+.Fa fd
+may be a file descriptor opened for reading if the event stream is
+present in a file, or the constant
+.Dv PMCLOG_FD_NONE
+for an event stream present in memory.
+This function returns a cookie that is passed into the other functions
+in this API set.
+.Pp
+Function
+.Fn pmclog_read
+returns the next available event in the event stream associated with
+argument
+.Fa cookie .
+Argument
+.Fa ev
+points to an event descriptor that which will contain the result of a
+successfully parsed event.
+.Pp
+An event descriptor returned by
+.Fn pmclog_read
+has the following structure:
+.Bd -literal
+struct pmclog_ev {
+ enum pmclog_state pl_state; /* parser state after 'get_event()' */
+ off_t pl_offset; /* byte offset in stream */
+ size_t pl_count; /* count of records so far */
+ struct timespec pl_ts; /* log entry timestamp */
+ enum pmclog_type pl_type; /* log entry kind */
+ union { /* log entry data */
+ struct pmclog_ev_allocate pl_a;
+ struct pmclog_ev_proccsw pl_c;
+ struct pmclog_ev_dropnotify pl_d;
+ struct pmclog_ev_procexit pl_e;
+ struct pmclog_ev_initialize pl_i;
+ struct pmclog_ev_pcsample pl_s;
+ struct pmclog_ev_pmcattach pl_t;
+ struct pmclog_ev_userdata pl_u;
+ struct pmclog_ev_procexec pl_x;
+ } pl_u;
+};
+.Ed
+.Pp
+The current state of the parser is recorded in
+.Va pl_state .
+This field can take on the following values:
+.Bl -tag -width "PMCLOG_REQUIRE_DATA" -compact
+.It Dv PMCLOG_EOF
+.Pq For file based parsers only
+An end-of-file condition was encountered on the configured file
+descriptor.
+.It Dv PMCLOG_ERROR
+An error occurred during parsing.
+.It Dv PMCLOG_OK
+A complete event record was read into
+.Fa "*ev" .
+.It Dv PMCLOG_REQUIRE_DATA
+There was insufficient data in the event stream to assemble a complete
+event record.
+For memory based parsers, more data can be fed to the
+parser using function
+.Fn pmclog_feed .
+For file based parsers, function
+.Fn pmclog_read
+may be retried when data is available on the configured file
+descriptor.
+.El
+.Pp
+The rest of the event structure is valid only if field
+.Va pl_state
+contains
+.Dv PMCLOG_OK .
+Field
+.Va pl_offset
+contains the offset of the current record in the byte stream.
+Field
+.Va pl_count
+contains the serial number of this event.
+Field
+.Va pl_ts
+contains a timestamp with the system time when the event occurred.
+Field
+.Va pl_type
+denotes the kind of the event returned in argument
+.Fa *ev
+and is one of the following:
+.Bl -tag -width XXXXXXXXXXXXXXXXXXXXXXX -compact
+.It Dv PMCLOG_TYPE_DROPNOTIFY
+a marker indicating that
+.Xr hwpmc 4
+had to drop data due to a resource constraint.
+.It Dv PMCLOG_TYPE_INITIALIZE
+an initialization record.
+This is usually the first record in a log file.
+.It Dv PMCLOG_TYPE_PCSAMPLE
+A record containing an instruction pointer sample.
+.It Dv PMCLOG_TYPE_PMCALLOCATE
+A record describing a PMC allocation operation.
+.It Dv PMCLOG_TYPE_PMCATTACH
+A record describing a PMC attach operation.
+.It Dv PMCLOG_TYPE_PROCCSW
+A record describing a PMC reading at the time of a process context switch.
+.It Dv PMCLOG_TYPE_PROCEXIT
+A record describing the accumulated PMC reading for a process at the
+time of
+.Xr _exit 2 .
+.It Dv PMCLOG_TYPE_PROCEXEC
+A record describing an
+.Xr execve 2
+by a target process.
+.It Dv PMCLOG_TYPE_USERDATA
+A record containing user data.
+.El
+.Pp
+Function
+.Fn pmclog_feed
+is used with parsers configured to parse memory based event streams.
+It is intended to be called when function
+.Fn pmclog_read
+indicates the need for more data by a returning
+.Dv PMCLOG_REQUIRE_DATA
+in field
+.Va pl_state
+of its event structure argument.
+Argument
+.Fa data
+points to the start of a memory buffer containing fresh event data.
+Argument
+.Fa len
+indicates the number of bytes of data available.
+The memory range
+.Bq data , data+len
+must remain valid till the next time
+.Fn pmclog_read
+returns an error.
+It is an error to use
+.Fn pmclog_feed
+on a parser configured to parse file data.
+.Pp
+Function
+.Fn pmclog_close
+releases the internal state allocated by a prior call
+to
+.Fn pmclog_open .
+.Sh RETURN VALUES
+Function
+.Fn pmclog_open
+will return a non-NULL value if successful or NULL otherwise.
+.Pp
+Function
+.Fn pmclog_read
+will return 0 in case a complete event record was successfully read,
+or will return -1 and will set the
+.Va pl_state
+field of the event record to the appropriate code in case of an error.
+.Pp
+Function
+.Fn pmclog_feed
+will return 0 on success or -1 in case of failure.
+.Sh EXAMPLES
+A template for using the log file parsing API is shown below in psuedocode:
+.Bd -literal
+void *parser; /* cookie */
+struct pmclog_ev ev; /* parsed event */
+int fd; /* file descriptor */
+
+fd = open(filename, O_RDONLY); /* open log file */
+parser = pmclog_open(fd); /* initialize parser */
+if (parser == NULL)
+ --handle an out of memory error--;
+
+/* read and parse data */
+while (pmclog_read(parser, &ev) == 0) {
+ assert(ev.pl_state == PMCLOG_OK);
+ /* process the event */
+ switch (ev.pl_type) {
+ case PMCLOG_TYPE_ALLOCATE:
+ --process a pmc allocation record--
+ break;
+ case PMCLOG_TYPE_PROCCSW:
+ --process a thread context switch record--
+ break;
+ case PMCLOG_TYPE_PCSAMPLE:
+ --process a PC sample--
+ break;
+ --and so on--
+ }
+}
+
+/* examine parser state */
+switch (ev.pl_state) {
+case PMCLOG_EOF:
+ --normal termination--
+ break;
+case PMCLOG_ERROR:
+ --look at errno here--
+ break;
+case PMCLOG_REQUIRE_DATA:
+ --arrange for more data to be available for parsing--
+ break;
+default:
+ assert(0);
+ /*NOTREACHED*/
+}
+
+pmclog_close(parser); /* cleanup */
+.Ed
+.Sh ERRORS
+A call to
+.Fn pmclog_init_parser
+may fail with any of the errors returned by
+.Xr malloc 3 .
+.Pp
+A call to
+.Fn pmclog_read
+for a file based parser may fail with any of the errors returned by
+.Xr read 2 .
+.Sh SEE ALSO
+.Xr read 2 ,
+.Xr malloc 3 ,
+.Xr pmc 3 ,
+.Xr hwpmc 4
diff --git a/lib/libpmc/pmclog.c b/lib/libpmc/pmclog.c
new file mode 100644
index 0000000..8772c58
--- /dev/null
+++ b/lib/libpmc/pmclog.c
@@ -0,0 +1,532 @@
+/*-
+ * Copyright (c) 2005 Joseph Koshy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/pmc.h>
+#include <sys/pmclog.h>
+
+#include <assert.h>
+#include <errno.h>
+#include <pmc.h>
+#include <pmclog.h>
+#include <stddef.h>
+#include <stdlib.h>
+#include <string.h>
+#include <strings.h>
+#include <unistd.h>
+
+#include <machine/pmc_mdep.h>
+
+#define PMCLOG_BUFFER_SIZE 4096
+
+/*
+ * API NOTES
+ *
+ * The pmclog(3) API is oriented towards parsing an event stream in
+ * "realtime", i.e., from an data source that may or may not preserve
+ * record boundaries -- for example when the data source is elsewhere
+ * on a network. The API allows data to be fed into the parser zero
+ * or more bytes at a time.
+ *
+ * The state for a log file parser is maintained in a 'struct
+ * pmclog_parse_state'. Parser invocations are done by calling
+ * 'pmclog_read()'; this function will inform the caller when a
+ * complete event is parsed.
+ *
+ * The parser first assembles a complete log file event in an internal
+ * work area (see "ps_saved" below). Once a complete log file event
+ * is read, the parser then parses it and converts it to an event
+ * descriptor usable by the client. We could possibly avoid this two
+ * step process by directly parsing the input log to set fields in the
+ * event record. However the parser's state machine would get
+ * insanely complicated, and this code is unlikely to be used in
+ * performance critical paths.
+ */
+
+enum pmclog_parser_state {
+ PL_STATE_NEW_RECORD, /* in-between records */
+ PL_STATE_EXPECTING_HEADER, /* header being read */
+ PL_STATE_PARTIAL_RECORD, /* header present but not the record */
+ PL_STATE_ERROR /* parsing error encountered */
+};
+
+struct pmclog_parse_state {
+ enum pmclog_parser_state ps_state;
+ enum pmc_cputype ps_arch; /* log file architecture */
+ uint32_t ps_version; /* hwpmc version */
+ int ps_initialized; /* whether initialized */
+ int ps_count; /* count of records processed */
+ off_t ps_offset; /* stream byte offset */
+ union pmclog_entry ps_saved; /* saved partial log entry */
+ int ps_svcount; /* #bytes saved */
+ int ps_fd; /* active fd or -1 */
+ char *ps_buffer; /* scratch buffer if fd != -1 */
+ char *ps_data; /* current parse pointer */
+ size_t ps_len; /* length of buffered data */
+};
+
+#define PMCLOG_HEADER_FROM_SAVED_STATE(PS) \
+ (* ((uint32_t *) &(PS)->ps_saved))
+
+#define PMCLOG_INITIALIZE_READER(LE,A) LE = (uint32_t *) &(A)
+#define PMCLOG_READ32(LE,V) do { \
+ (V) = *(LE)++; \
+ } while (0)
+#define PMCLOG_READ64(LE,V) do { \
+ uint64_t _v; \
+ _v = (uint64_t) *(LE)++; \
+ _v |= ((uint64_t) *(LE)++) << 32; \
+ (V) = _v; \
+ } while (0)
+
+#define PMCLOG_READSTRING(LE,DST,LEN) strlcpy((DST), (char *) (LE), (LEN))
+
+/*
+ * Assemble a log record from '*len' octets starting from address '*data'.
+ * Update 'data' and 'len' to reflect the number of bytes consumed.
+ *
+ * '*data' is potentially an unaligned address and '*len' octets may
+ * not be enough to complete a event record.
+ */
+
+static enum pmclog_parser_state
+pmclog_get_record(struct pmclog_parse_state *ps, char **data, ssize_t *len)
+{
+ int avail, copylen, recordsize, used;
+ uint32_t h;
+ const int HEADERSIZE = sizeof(uint32_t);
+ char *src, *dst;
+
+ if ((avail = *len) <= 0)
+ return (ps->ps_state = PL_STATE_ERROR);
+
+ src = *data;
+ h = used = 0;
+
+ if (ps->ps_state == PL_STATE_NEW_RECORD)
+ ps->ps_svcount = 0;
+
+ dst = (char *) &ps->ps_saved + ps->ps_svcount;
+
+ switch (ps->ps_state) {
+ case PL_STATE_NEW_RECORD:
+
+ /*
+ * Transitions:
+ *
+ * Case A: avail < headersize
+ * -> 'expecting header'
+ *
+ * Case B: avail >= headersize
+ * B.1: avail < recordsize
+ * -> 'partial record'
+ * B.2: avail >= recordsize
+ * -> 'new record'
+ */
+
+ copylen = avail < HEADERSIZE ? avail : HEADERSIZE;
+ bcopy(src, dst, copylen);
+ ps->ps_svcount = used = copylen;
+
+ if (copylen < HEADERSIZE) {
+ ps->ps_state = PL_STATE_EXPECTING_HEADER;
+ goto done;
+ }
+
+ src += copylen;
+ dst += copylen;
+
+ h = PMCLOG_HEADER_FROM_SAVED_STATE(ps);
+ recordsize = PMCLOG_HEADER_TO_LENGTH(h);
+
+ if (recordsize <= 0)
+ goto error;
+
+ if (recordsize <= avail) { /* full record available */
+ bcopy(src, dst, recordsize - copylen);
+ ps->ps_svcount = used = recordsize;
+ goto done;
+ }
+
+ /* header + a partial record is available */
+ bcopy(src, dst, avail - copylen);
+ ps->ps_svcount = used = avail;
+ ps->ps_state = PL_STATE_PARTIAL_RECORD;
+
+ break;
+
+ case PL_STATE_EXPECTING_HEADER:
+
+ /*
+ * Transitions:
+ *
+ * Case C: avail+saved < headersize
+ * -> 'expecting header'
+ *
+ * Case D: avail+saved >= headersize
+ * D.1: avail+saved < recordsize
+ * -> 'partial record'
+ * D.2: avail+saved >= recordsize
+ * -> 'new record'
+ * (see PARTIAL_RECORD handling below)
+ */
+
+ if (avail + ps->ps_svcount < HEADERSIZE) {
+ bcopy(src, dst, avail);
+ ps->ps_svcount += avail;
+ used = avail;
+ break;
+ }
+
+ used = copylen = HEADERSIZE - ps->ps_svcount;
+ bcopy(src, dst, copylen);
+ src += copylen;
+ dst += copylen;
+ avail -= copylen;
+ ps->ps_svcount += copylen;
+
+ /*FALLTHROUGH*/
+
+ case PL_STATE_PARTIAL_RECORD:
+
+ /*
+ * Transitions:
+ *
+ * Case E: avail+saved < recordsize
+ * -> 'partial record'
+ *
+ * Case F: avail+saved >= recordsize
+ * -> 'new record'
+ */
+
+ h = PMCLOG_HEADER_FROM_SAVED_STATE(ps);
+ recordsize = PMCLOG_HEADER_TO_LENGTH(h);
+
+ if (recordsize <= 0)
+ goto error;
+
+ if (avail + ps->ps_svcount < recordsize) {
+ copylen = avail;
+ ps->ps_state = PL_STATE_PARTIAL_RECORD;
+ } else {
+ copylen = recordsize - ps->ps_svcount;
+ ps->ps_state = PL_STATE_NEW_RECORD;
+ }
+
+ bcopy(src, dst, copylen);
+ ps->ps_svcount += copylen;
+ used += copylen;
+ break;
+
+ default:
+ goto error;
+ }
+
+ done:
+ *data += used;
+ *len -= used;
+ return ps->ps_state;
+
+ error:
+ ps->ps_state = PL_STATE_ERROR;
+ return ps->ps_state;
+}
+
+/*
+ * Get an event from the stream pointed to by '*data'. '*len'
+ * indicates the number of bytes available to parse. Arguments
+ * '*data' and '*len' are updated to indicate the number of bytes
+ * consumed.
+ */
+
+static int
+pmclog_get_event(void *cookie, char **data, ssize_t *len,
+ struct pmclog_ev *ev)
+{
+ int evlen, pathlen;
+ uint32_t h, *le;
+ enum pmclog_parser_state e;
+ struct pmclog_parse_state *ps;
+
+ ps = (struct pmclog_parse_state *) cookie;
+
+ assert(ps->ps_state != PL_STATE_ERROR);
+
+ if ((e = pmclog_get_record(ps,data,len)) == PL_STATE_ERROR) {
+ ev->pl_state = PMCLOG_ERROR;
+ return -1;
+ }
+
+ if (e != PL_STATE_NEW_RECORD) {
+ ev->pl_state = PMCLOG_REQUIRE_DATA;
+ return -1;
+ }
+
+ PMCLOG_INITIALIZE_READER(le, ps->ps_saved);
+
+ PMCLOG_READ32(le,h);
+
+ if (!PMCLOG_HEADER_CHECK_MAGIC(h)) {
+ ps->ps_state = PL_STATE_ERROR;
+ ev->pl_state = PMCLOG_ERROR;
+ return -1;
+ }
+
+ /* copy out the time stamp */
+ PMCLOG_READ32(le,ev->pl_ts.tv_sec);
+ PMCLOG_READ32(le,ev->pl_ts.tv_nsec);
+
+ evlen = PMCLOG_HEADER_TO_LENGTH(h);
+
+#define PMCLOG_GET_PATHLEN(P,E,TYPE) do { \
+ (P) = (E) - offsetof(struct TYPE, pl_pathname); \
+ if ((P) > PATH_MAX || (P) < 0) \
+ goto error; \
+ } while (0)
+
+ switch (ev->pl_type = PMCLOG_HEADER_TO_TYPE(h)) {
+ case PMCLOG_TYPE_CLOSELOG:
+ case PMCLOG_TYPE_DROPNOTIFY:
+ /* nothing to do */
+ break;
+ case PMCLOG_TYPE_INITIALIZE:
+ PMCLOG_READ32(le,ev->pl_u.pl_i.pl_version);
+ PMCLOG_READ32(le,ev->pl_u.pl_i.pl_arch);
+ ps->ps_version = ev->pl_u.pl_i.pl_version;
+ ps->ps_arch = ev->pl_u.pl_i.pl_arch;
+ ps->ps_initialized = 1;
+ break;
+ case PMCLOG_TYPE_MAPPINGCHANGE:
+ PMCLOG_GET_PATHLEN(pathlen,evlen,pmclog_mappingchange);
+ PMCLOG_READ32(le,ev->pl_u.pl_m.pl_type);
+ PMCLOG_READADDR(le,ev->pl_u.pl_m.pl_start);
+ PMCLOG_READADDR(le,ev->pl_u.pl_m.pl_end);
+ PMCLOG_READ32(le,ev->pl_u.pl_m.pl_pid);
+ PMCLOG_READSTRING(le, ev->pl_u.pl_m.pl_pathname, pathlen);
+ break;
+ case PMCLOG_TYPE_PCSAMPLE:
+ PMCLOG_READ32(le,ev->pl_u.pl_s.pl_pid);
+ PMCLOG_READADDR(le,ev->pl_u.pl_s.pl_pc);
+ PMCLOG_READ32(le,ev->pl_u.pl_s.pl_pmcid);
+ break;
+ case PMCLOG_TYPE_PMCALLOCATE:
+ PMCLOG_READ32(le,ev->pl_u.pl_a.pl_pmcid);
+ PMCLOG_READ32(le,ev->pl_u.pl_a.pl_event);
+ PMCLOG_READ32(le,ev->pl_u.pl_a.pl_flags);
+ if ((ev->pl_u.pl_a.pl_evname =
+ pmc_name_of_event(ev->pl_u.pl_a.pl_event)) == NULL)
+ goto error;
+ break;
+ case PMCLOG_TYPE_PMCATTACH:
+ PMCLOG_GET_PATHLEN(pathlen,evlen,pmclog_pmcattach);
+ PMCLOG_READ32(le,ev->pl_u.pl_t.pl_pmcid);
+ PMCLOG_READ32(le,ev->pl_u.pl_t.pl_pid);
+ PMCLOG_READSTRING(le,ev->pl_u.pl_t.pl_pathname,pathlen);
+ break;
+ case PMCLOG_TYPE_PMCDETACH:
+ PMCLOG_READ32(le,ev->pl_u.pl_d.pl_pmcid);
+ PMCLOG_READ32(le,ev->pl_u.pl_d.pl_pid);
+ break;
+ case PMCLOG_TYPE_PROCCSW:
+ PMCLOG_READ32(le,ev->pl_u.pl_c.pl_pmcid);
+ PMCLOG_READ64(le,ev->pl_u.pl_c.pl_value);
+ PMCLOG_READ32(le,ev->pl_u.pl_c.pl_pid);
+ break;
+ case PMCLOG_TYPE_PROCEXEC:
+ PMCLOG_GET_PATHLEN(pathlen,evlen,pmclog_procexec);
+ PMCLOG_READ32(le,ev->pl_u.pl_x.pl_pid);
+ PMCLOG_READSTRING(le,ev->pl_u.pl_x.pl_pathname,pathlen);
+ break;
+ case PMCLOG_TYPE_PROCEXIT:
+ PMCLOG_READ32(le,ev->pl_u.pl_e.pl_pmcid);
+ PMCLOG_READ64(le,ev->pl_u.pl_e.pl_value);
+ PMCLOG_READ32(le,ev->pl_u.pl_e.pl_pid);
+ break;
+ case PMCLOG_TYPE_PROCFORK:
+ PMCLOG_READ32(le,ev->pl_u.pl_f.pl_oldpid);
+ PMCLOG_READ32(le,ev->pl_u.pl_f.pl_newpid);
+ break;
+ case PMCLOG_TYPE_SYSEXIT:
+ PMCLOG_READ32(le,ev->pl_u.pl_se.pl_pid);
+ break;
+ case PMCLOG_TYPE_USERDATA:
+ PMCLOG_READ32(le,ev->pl_u.pl_u.pl_userdata);
+ break;
+ default: /* unknown record type */
+ ps->ps_state = PL_STATE_ERROR;
+ ev->pl_state = PMCLOG_ERROR;
+ return -1;
+ }
+
+ ev->pl_offset = (ps->ps_offset += evlen);
+ ev->pl_count = (ps->ps_count += 1);
+ ev->pl_state = PMCLOG_OK;
+ return 0;
+
+ error:
+ ev->pl_state = PMCLOG_ERROR;
+ ps->ps_state = PL_STATE_ERROR;
+ return -1;
+}
+
+/*
+ * Extract and return the next event from the byte stream.
+ *
+ * Returns 0 and sets the event's state to PMCLOG_OK in case an event
+ * was successfully parsed. Otherwise this function returns -1 and
+ * sets the event's state to one of PMCLOG_REQUIRE_DATA (if more data
+ * is needed) or PMCLOG_EOF (if an EOF was seen) or PMCLOG_ERROR if
+ * a parse error was encountered.
+ */
+
+int
+pmclog_read(void *cookie, struct pmclog_ev *ev)
+{
+ ssize_t nread;
+ struct pmclog_parse_state *ps;
+
+ ps = (struct pmclog_parse_state *) cookie;
+
+ if (ps->ps_state == PL_STATE_ERROR) {
+ ev->pl_state = PMCLOG_ERROR;
+ return -1;
+ }
+
+ /*
+ * If there isn't enough data left for a new event try and get
+ * more data.
+ */
+ if (ps->ps_len == 0) {
+ ev->pl_state = PMCLOG_REQUIRE_DATA;
+
+ /*
+ * If we have a valid file descriptor to read from, attempt
+ * to read from that. This read may return with an error,
+ * (which may be EAGAIN or other recoverable error), or
+ * can return EOF.
+ */
+ if (ps->ps_fd != PMCLOG_FD_NONE) {
+ nread = read(ps->ps_fd, ps->ps_buffer,
+ PMCLOG_BUFFER_SIZE);
+
+ if (nread <= 0) {
+ ev->pl_state = nread < 0 ? PMCLOG_ERROR :
+ PMCLOG_EOF;
+ return -1;
+ }
+
+ ps->ps_len = nread;
+ ps->ps_data = ps->ps_buffer;
+ } else
+ return -1;
+ }
+
+ assert(ps->ps_len > 0);
+
+ /*
+ * Retrieve one event from the byte stream.
+ */
+ return pmclog_get_event(ps, &ps->ps_data, &ps->ps_len, ev);
+}
+
+/*
+ * Feed data to a memory based parser.
+ *
+ * The memory area pointed to by 'data' needs to be valid till the
+ * next error return from pmclog_next_event().
+ */
+
+int
+pmclog_feed(void *cookie, char *data, int len)
+{
+ struct pmclog_parse_state *ps;
+
+ ps = (struct pmclog_parse_state *) cookie;
+
+ if (len < 0 || /* invalid length */
+ ps->ps_buffer || /* called for a file parser */
+ ps->ps_len != 0) /* unnecessary call */
+ return -1;
+
+ ps->ps_data = data;
+ ps->ps_len = len;
+
+ return 0;
+}
+
+/*
+ * Allocate and initialize parser state.
+ */
+
+void *
+pmclog_open(int fd)
+{
+ struct pmclog_parse_state *ps;
+
+ if ((ps = (struct pmclog_parse_state *) malloc(sizeof(*ps))) == NULL)
+ return NULL;
+
+ ps->ps_state = PL_STATE_NEW_RECORD;
+ ps->ps_arch = -1;
+ ps->ps_initialized = 0;
+ ps->ps_count = 0;
+ ps->ps_offset = (off_t) 0;
+ bzero(&ps->ps_saved, sizeof(ps->ps_saved));
+ ps->ps_svcount = 0;
+ ps->ps_fd = fd;
+ ps->ps_data = NULL;
+ ps->ps_buffer = NULL;
+ ps->ps_len = 0;
+
+ /* allocate space for a work area */
+ if (ps->ps_fd != PMCLOG_FD_NONE) {
+ if ((ps->ps_buffer = malloc(PMCLOG_BUFFER_SIZE)) == NULL)
+ return NULL;
+ }
+
+ return ps;
+}
+
+
+/*
+ * Free up parser state.
+ */
+
+void
+pmclog_close(void *cookie)
+{
+ struct pmclog_parse_state *ps;
+
+ ps = (struct pmclog_parse_state *) cookie;
+
+ if (ps->ps_buffer)
+ free(ps->ps_buffer);
+
+ free(ps);
+}
diff --git a/lib/libpmc/pmclog.h b/lib/libpmc/pmclog.h
new file mode 100644
index 0000000..3e3119e
--- /dev/null
+++ b/lib/libpmc/pmclog.h
@@ -0,0 +1,146 @@
+/*-
+ * Copyright (c) 2005 Joseph Koshy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _PMCLOG_H_
+#define _PMCLOG_H_
+
+#include <sys/pmclog.h>
+
+enum pmclog_state {
+ PMCLOG_OK,
+ PMCLOG_EOF,
+ PMCLOG_REQUIRE_DATA,
+ PMCLOG_ERROR
+};
+
+struct pmclog_ev_dropnotify {
+};
+
+struct pmclog_ev_closelog {
+};
+
+struct pmclog_ev_initialize {
+ uint32_t pl_version;
+ uint32_t pl_arch;
+};
+
+struct pmclog_ev_mappingchange {
+ uint32_t pl_type;
+ pid_t pl_pid;
+ uintfptr_t pl_start;
+ uintfptr_t pl_end;
+ char pl_pathname[PATH_MAX];
+};
+
+struct pmclog_ev_pcsample {
+ uintfptr_t pl_pc;
+ pid_t pl_pid;
+ pmc_id_t pl_pmcid;
+};
+
+struct pmclog_ev_pmcallocate {
+ uint32_t pl_event;
+ const char * pl_evname;
+ uint32_t pl_flags;
+ pmc_id_t pl_pmcid;
+};
+
+struct pmclog_ev_pmcattach {
+ pmc_id_t pl_pmcid;
+ pid_t pl_pid;
+ char pl_pathname[PATH_MAX];
+};
+
+struct pmclog_ev_pmcdetach {
+ pmc_id_t pl_pmcid;
+ pid_t pl_pid;
+};
+
+struct pmclog_ev_proccsw {
+ pid_t pl_pid;
+ pmc_id_t pl_pmcid;
+ pmc_value_t pl_value;
+};
+
+struct pmclog_ev_procexec {
+ pid_t pl_pid;
+ char pl_pathname[PATH_MAX];
+};
+
+struct pmclog_ev_procexit {
+ uint32_t pl_pid;
+ pmc_id_t pl_pmcid;
+ pmc_value_t pl_value;
+};
+
+struct pmclog_ev_procfork {
+ pid_t pl_oldpid;
+ pid_t pl_newpid;
+};
+
+struct pmclog_ev_sysexit {
+ pid_t pl_pid;
+};
+
+struct pmclog_ev_userdata {
+ uint32_t pl_userdata;
+};
+
+struct pmclog_ev {
+ enum pmclog_state pl_state; /* state after 'get_event()' */
+ off_t pl_offset; /* byte offset in stream */
+ size_t pl_count; /* count of records so far */
+ struct timespec pl_ts; /* log entry timestamp */
+ enum pmclog_type pl_type; /* type of log entry */
+ union { /* log entry data */
+ struct pmclog_ev_closelog pl_cl;
+ struct pmclog_ev_dropnotify pl_dn;
+ struct pmclog_ev_initialize pl_i;
+ struct pmclog_ev_mappingchange pl_m;
+ struct pmclog_ev_pcsample pl_s;
+ struct pmclog_ev_pmcallocate pl_a;
+ struct pmclog_ev_pmcattach pl_t;
+ struct pmclog_ev_pmcdetach pl_d;
+ struct pmclog_ev_proccsw pl_c;
+ struct pmclog_ev_procexec pl_x;
+ struct pmclog_ev_procexit pl_e;
+ struct pmclog_ev_procfork pl_f;
+ struct pmclog_ev_sysexit pl_se;
+ struct pmclog_ev_userdata pl_u;
+ } pl_u;
+};
+
+#define PMCLOG_FD_NONE (-1)
+
+void *pmclog_open(int _fd);
+int pmclog_feed(void *_cookie, char *_data, int _len);
+int pmclog_read(void *_cookie, struct pmclog_ev *_ev);
+void pmclog_close(void *_cookie);
+
+#endif
+
diff --git a/share/man/man4/hwpmc.4 b/share/man/man4/hwpmc.4
index d2f7309..b9ae636 100644
--- a/share/man/man4/hwpmc.4
+++ b/share/man/man4/hwpmc.4
@@ -186,18 +186,48 @@ The
.Ic PMC_OP_PMCALLOCATE
operation supports the following flags that modify the behavior
of an allocated PMC:
-.Bl -tag -width indent
+.Bl -tag -width indent -compact
.It Dv PMC_F_DESCENDANTS
-This flag is valid only for a PMC being allocated in process-private
+This modifier is valid only for a PMC being allocated in process-private
mode.
It signifies that the PMC will track hardware events for its
target process and the target's current and future descendants.
+.It Dv PMC_F_KGMON
+This modifier is valid only for a PMC being allocated in system-wide
+sampling mode.
+It signifies that the PMC's sampling interrupt is to be used to drive
+kernel profiling via
+.Xr kgmon 8 .
+.It Dv PMC_F_LOG_PROCCSW
+This modifier is valid only for a PMC being allocated in process-private
+mode.
+When this modifier is present, at every process context switch time,
+.Nm
+will append a record containing the count of the hardware events
+seen by the process to the configured log file.
+.It Dv PMC_F_LOG_PROCEXIT
+This modifier is valid only for a PMC being allocated in process-private
+mode.
+With this modifier present,
+.Nm
+will maintain per-process counts for each target process attached to
+a PMC.
+At process exit time, a record containing the target process' pid and
+the accumulated per-process count for that process will be written to the
+configured log file.
.El
+Modifiers
+.Dv PMC_F_LOG_PROCEXIT
+and
+.Dv PMC_F_LOG_PROCCSW
+may be used in combination with modifier
+.Dv PMC_F_DESCENDANTS
+to track the behaviour of complex pipelines of processes.
.Ss SIGNALS
The
.Nm
driver may deliver signals to processes that have allocated PMCs:
-.Bl -tag -width indent
+.Bl -tag -width "XXXXXXXX" -compact
.It Bq SIGIO
A
.Ic PMC_OP_PMCRW
@@ -226,6 +256,12 @@ driver supports the following operations:
.Bl -tag -width indent
.It Ic PMC_OP_CONFIGURELOG
Configure a log file for sampling mode PMCs.
+.It Ic PMC_OP_FLUSHLOG
+Transfer buffered log data inside
+.Nm
+to a configured output file.
+This operation returns to the caller after the write operation
+has returned.
.It Ic PMC_OP_GETCPUINFO
Retrieve information about the number of CPUs on the system and
the number of hardware performance monitoring counters available per-CPU.
@@ -316,9 +352,11 @@ The behavior of
.Nm
is influenced by the following
.Xr sysctl 8
+and
+.Xr loader 8
tunables:
.Bl -tag -width indent
-.It Va kern.hwpmc.debugflags
+.It Va kern.hwpmc.debugflags Pq string, read-write
(Only available if the
.Nm
driver was compiled with
@@ -326,22 +364,34 @@ driver was compiled with
Control the verbosity of debug messages from the
.Nm
driver.
-.It Va kern.hwpmc.hashsize
+.It Va kern.hwpmc.hashsize Pq integer, read-only
The number of rows in the hash-tables used to keep track of owner and
target processes.
-.It Va kern.hwpmc.mtxpoolsize
+The default is 16.
+.It Va kern.hwpmc.logbuffersize Pq integer, read-only
+The size in kilobytes of each log buffer used by
+.Nm Ap s
+logging function.
+The default buffers size is 4KB.
+.It Va kern.hwpmc.mtxpoolsize Pq integer, read-only
The size of the spin mutex pool used by the PMC driver.
-.It Va kern.hwpmc.pcpubuffersize
-The size of the per-cpu hash table used when performing system-wide
-statistical profiling.
-.It Va security.bsd.unprivileged_syspmcs
+The default is 32.
+.It Va kern.hwpmc.nbuffers Pq integer, read-only
+The number of log buffers used by
+.Nm
+for logging.
+The default is 16.
+.It Va kern.hwpmc.nsamples Pq integer, read-only
+The number of entries in the per-cpu ring buffer used during sampling.
+The default is 16.
+.It Va security.bsd.unprivileged_syspmcs Pq boolean, read-write
If set to non-zero, allow unprivileged processes to allocate system-wide
PMCs.
The default value is 0.
-.It Va security.bsd.unprivileged_proc_debug
+.It Va security.bsd.unprivileged_proc_debug Pq boolean, read-write
If set to 0, the
.Nm
-driver will only allow privileged process to attach PMCs to other
+driver will only allow privileged processes to attach PMCs to other
processes.
.El
.Pp
@@ -361,11 +411,9 @@ Set the
tunable
.Va "security.bsd.unprivileged_syspmcs"
to 0.
-.Pp
This ensures that unprivileged processes cannot allocate system-wide
PMCs and thus cannot observe the hardware behavior of the system
as a whole.
-.Pp
This tunable may also be set at boot time using
.Xr loader 8 ,
or with
@@ -379,7 +427,6 @@ Set the
tunable
.Va "security.bsd.unprivileged_proc_debug"
to 0.
-.Pp
This will ensure that an unprivileged process cannot attach a PMC
to any process other than itself and thus cannot observe the hardware
behavior of other processes with the same credentials.
@@ -390,6 +437,9 @@ System administrators should note that on IA-32 platforms
makes the content of the IA-32 TSC counter available to all processes
via the RDTSC instruction.
.Sh IMPLEMENTATION NOTES
+.Ss SMP Symmetry
+The kernel driver requires all physical CPUs in an SMP system to have
+identical performance monitoring counter hardware.
.Ss i386 TSC Handling
Historically, on the x86 architecture,
.Fx
@@ -398,9 +448,6 @@ read the TSC using the RDTSC instruction.
The
.Nm
driver preserves this semantic.
-.Pp
-TSCs are treated as shared, read-only counters and hence are
-only allowed to be allocated in system-wide counting mode.
.Ss Intel P4/HTT Handling
On CPUs with HTT support, Intel P4 PMCs are capable of qualifying
only a subset of hardware events on a per-logical CPU basis.
@@ -410,6 +457,44 @@ PMCs, then the
driver will reject allocation requests for process-private PMCs that
request counting of hardware events that cannot be counted separately
for each logical CPU.
+.Ss Intel Pentium-Pro Handling
+Writing a value to the PMC MSRs found ing Intel Pentium-Pro style PMCs
+(found in
+.Tn "Intel Pentium Pro" ,
+.Tn "Pentium II" ,
+.Tn "Pentium III" ,
+.Tn "Pentium M"
+and
+.Tn "Celeron"
+processors) will replicate bit 31 of the
+value being written into the upper 8 bits of the MSR,
+bringing down the usable width of these PMCs to 31 bits.
+For process-virtual PMCs, the
+.Nm
+driver implements a workaround in software and makes the corrected 64
+bit count available via the
+.Ic PMC_OP_RW
+operation.
+Processes that intend to use RDPMC instructions directly or
+that intend to write values larger than 2^31 into these PMCs with
+.Ic PMC_OP_RW
+need to be aware of this hardware limitation.
+.Sh DIAGNOSTICS
+.Bl -diag
+.It hwpmc: tunable hashsize=%d must be greater than zero.
+A negative value was supplied for tunable
+.Va kern.hwpmc.hashsize .
+.It hwpmc: tunable logbuffersize=%d must be greater than zero.
+A negative value was supplied for tunable
+.Va kern.hwpmc.logbuffersize .
+.It hwpmc: tunable nlogbuffers=%d must be greater than zero.
+A negative value was supplied for tunable
+.Va kern.hwpmc.nlogbuffers .
+.It hwpmc: tunable nsamples=%d out of range.
+The value for tunable
+.Va kern.hwpmc.nsamples
+was negative or greater than 65535.
+.El
.Sh ERRORS
An command issued to the
.Nm
@@ -567,7 +652,11 @@ An
operation was issued on a PMC whose MSR has been retrieved using
.Ic PMC_OP_PMCX86GETMSR .
.It Bq Er ESRCH
-A process issued a PMC operation request without having allocated any PMCs.
+A process issued a PMC operation request without having allocated any
+PMCs.
+.It Bq Er ESRCH
+A process issued a PMC operation request after the PMC was detached
+from all of its target processes.
.It Bq Er ESRCH
A
.Ic PMC_OP_PMCATTACH
@@ -580,9 +669,6 @@ operation is not being monitored by the
driver.
.El
.Sh BUGS
-The kernel driver requires all CPUs in an SMP system to be symmetric
-with respect to their performance monitoring counter resources.
-.Pp
The driver samples the state of the kernel's logical processor support
at the time of initialization (i.e., at module load time).
On CPUs supporting logical processors, the driver could misbehave if
@@ -591,6 +677,7 @@ driver is active.
.Sh SEE ALSO
.Xr kenv 1 ,
.Xr pmc 3 ,
+.Xr kgmon 8 ,
.Xr kldload 8 ,
.Xr pmccontrol 8 ,
.Xr pmcstat 8 ,
diff --git a/sys/alpha/include/pmc_mdep.h b/sys/alpha/include/pmc_mdep.h
index 54100fb..7fc48f5 100644
--- a/sys/alpha/include/pmc_mdep.h
+++ b/sys/alpha/include/pmc_mdep.h
@@ -7,4 +7,17 @@
#ifndef _MACHINE_PMC_MDEP_H_
#define _MACHINE_PMC_MDEP_H_
+union pmc_md_op_pmcallocate {
+ uint64_t __pad[4];
+};
+
+/* Logging */
+#define PMCLOG_READADDR PMCLOG_READ64
+#define PMCLOG_EMITADDR PMCLOG_EMIT64
+
+#if _KERNEL
+union pmc_md_pmc {
+};
+
+#endif
#endif /* !_MACHINE_PMC_MDEP_H_ */
diff --git a/sys/amd64/include/pmc_mdep.h b/sys/amd64/include/pmc_mdep.h
index 19082ba..4a67658 100644
--- a/sys/amd64/include/pmc_mdep.h
+++ b/sys/amd64/include/pmc_mdep.h
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2003, Joseph Koshy
+ * Copyright (c) 2003-2005 Joseph Koshy
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -31,46 +31,33 @@
#ifndef _MACHINE_PMC_MDEP_H
#define _MACHINE_PMC_MDEP_H 1
-#include <machine/specialreg.h>
+#include <dev/hwpmc/hwpmc_amd.h>
+#include <dev/hwpmc/hwpmc_piv.h>
-/* AMD K7 PMCs */
+union pmc_md_op_pmcallocate {
+ struct pmc_md_amd_op_pmcallocate pm_amd;
+ struct pmc_md_p4_op_pmcallocate pm_p4;
+ uint64_t __pad[4];
+};
-#define K8_NPMCS 5 /* 1 TSC + 4 PMCs */
+/* Logging */
+#define PMCLOG_READADDR PMCLOG_READ64
+#define PMCLOG_EMITADDR PMCLOG_EMIT64
-#define K8_PMC_COUNTERMASK 0xFF000000
-#define K8_PMC_TO_COUNTER(x) (((x) << 24) & K8_PMC_COUNTERMASK)
-#define K8_PMC_INVERT (1 << 23)
-#define K8_PMC_ENABLE (1 << 22)
-#define K8_PMC_INT (1 << 20)
-#define K8_PMC_PC (1 << 19)
-#define K8_PMC_EDGE (1 << 18)
-#define K8_PMC_OS (1 << 17)
-#define K8_PMC_USR (1 << 16)
+#ifdef _KERNEL
-#define K8_PMC_UNITMASK_M 0x10
-#define K8_PMC_UNITMASK_O 0x08
-#define K8_PMC_UNITMASK_E 0x04
-#define K8_PMC_UNITMASK_S 0x02
-#define K8_PMC_UNITMASK_I 0x01
-#define K8_PMC_UNITMASK_MOESI 0x1F
+union pmc_md_pmc {
+ struct pmc_md_amd_pmc pm_amd;
+ struct pmc_md_p4_pmc pm_p4;
+};
-#define K8_PMC_UNITMASK 0xFF00
-#define K8_PMC_EVENTMASK 0x00FF
-#define K8_PMC_TO_UNITMASK(x) (((x) << 8) & K8_PMC_UNITMASK)
-#define K8_PMC_TO_EVENTMASK(x) ((x) & 0xFF)
-#define K8_VALID_BITS (K8_PMC_COUNTERMASK | K8_PMC_INVERT | \
- K8_PMC_ENABLE | K8_PMC_INT | K8_PMC_PC | K8_PMC_EDGE | K8_PMC_OS | \
- K8_PMC_USR | K8_PMC_UNITMASK | K8_PMC_EVENTMASK)
-
-#ifdef _KERNEL
+struct pmc;
/*
* Prototypes
*/
-#if defined(__amd64__)
-struct pmc_mdep *pmc_amd_initialize(void);
-#endif /* defined(__i386__) */
+void pmc_x86_lapic_enable_pmc_interrupt(void);
-#endif /* _KERNEL */
+#endif
#endif /* _MACHINE_PMC_MDEP_H */
diff --git a/sys/arm/include/pmc_mdep.h b/sys/arm/include/pmc_mdep.h
index 54100fb..4e02fc6 100644
--- a/sys/arm/include/pmc_mdep.h
+++ b/sys/arm/include/pmc_mdep.h
@@ -7,4 +7,18 @@
#ifndef _MACHINE_PMC_MDEP_H_
#define _MACHINE_PMC_MDEP_H_
+union pmc_md_op_pmcallocate {
+ uint64_t __pad[4];
+};
+
+/* Logging */
+#define PMCLOG_READADDR PMCLOG_READ32
+#define PMCLOG_EMITADDR PMCLOG_EMIT32
+
+#if _KERNEL
+union pmc_md_pmc {
+};
+
+#endif
+
#endif /* !_MACHINE_PMC_MDEP_H_ */
diff --git a/sys/conf/files b/sys/conf/files
index cedc74e..c8f24f3 100644
--- a/sys/conf/files
+++ b/sys/conf/files
@@ -1070,6 +1070,7 @@ gnu/ext2fs/ext2_vnops.c optional ext2fs
# Support for hardware performance monitoring counters
#
dev/hwpmc/hwpmc_mod.c optional hwpmc
+dev/hwpmc/hwpmc_logging.c optional hwpmc
#
# isdn4bsd device drivers
#
diff --git a/sys/conf/files.alpha b/sys/conf/files.alpha
index 8fee17a..a048627 100644
--- a/sys/conf/files.alpha
+++ b/sys/conf/files.alpha
@@ -179,6 +179,7 @@ dev/fb/splash.c optional splash
dev/fb/vga.c optional vga
dev/fdc/fdc.c optional fdc
dev/fdc/fdc_isa.c optional fdc isa
+dev/hwpmc/hwpmc_alpha.c optional hwpmc
dev/kbd/atkbd.c optional atkbd
dev/kbd/atkbdc.c optional atkbdc
dev/kbd/kbd.c optional atkbd
diff --git a/sys/conf/files.amd64 b/sys/conf/files.amd64
index aeabb60..fadaa16 100644
--- a/sys/conf/files.amd64
+++ b/sys/conf/files.amd64
@@ -147,6 +147,8 @@ dev/fdc/fdc_acpi.c optional fdc
dev/fdc/fdc_isa.c optional fdc isa
dev/fdc/fdc_pccard.c optional fdc pccard
dev/hwpmc/hwpmc_amd.c optional hwpmc
+dev/hwpmc/hwpmc_piv.c optional hwpmc
+dev/hwpmc/hwpmc_x86.c optional hwpmc
dev/kbd/atkbd.c optional atkbd
dev/kbd/atkbdc.c optional atkbdc
dev/kbd/kbd.c optional atkbd
diff --git a/sys/conf/files.arm b/sys/conf/files.arm
index 6856ac8..6f907f8 100644
--- a/sys/conf/files.arm
+++ b/sys/conf/files.arm
@@ -46,6 +46,7 @@ arm/arm/vm_machdep.c standard
arm/fpe-arm/armfpe_glue.S optional armfpe
arm/fpe-arm/armfpe_init.c optional armfpe
arm/fpe-arm/armfpe.S optional armfpe
+dev/hwpmc/hwpmc_arm.c optional hwpmc
geom/geom_bsd.c standard
geom/geom_bsd_enc.c standard
geom/geom_mbr.c standard
diff --git a/sys/conf/files.i386 b/sys/conf/files.i386
index d58439c..f9d6c34 100644
--- a/sys/conf/files.i386
+++ b/sys/conf/files.i386
@@ -174,10 +174,10 @@ dev/hptmv/gui_lib.c optional hptmv
dev/hptmv/hptproc.c optional hptmv
dev/hptmv/ioctl.c optional hptmv
dev/hwpmc/hwpmc_amd.c optional hwpmc
-dev/hwpmc/hwpmc_intel.c optional hwpmc
dev/hwpmc/hwpmc_pentium.c optional hwpmc
dev/hwpmc/hwpmc_piv.c optional hwpmc
dev/hwpmc/hwpmc_ppro.c optional hwpmc
+dev/hwpmc/hwpmc_x86.c optional hwpmc
dev/ichwd/ichwd.c optional ichwd
dev/if_ndis/if_ndis.c optional ndis
dev/if_ndis/if_ndis_pccard.c optional ndis pccard
diff --git a/sys/conf/files.ia64 b/sys/conf/files.ia64
index 304a0f0..991281e 100644
--- a/sys/conf/files.ia64
+++ b/sys/conf/files.ia64
@@ -52,6 +52,7 @@ dev/fb/fb.c optional fb
dev/fb/fb.c optional vga
dev/fb/splash.c optional splash
dev/fb/vga.c optional vga
+dev/hwpmc/hwpmc_ia64.c optional hwpmc
dev/kbd/atkbd.c optional atkbd
dev/kbd/atkbdc.c optional atkbdc
dev/kbd/kbd.c optional atkbd
diff --git a/sys/conf/files.pc98 b/sys/conf/files.pc98
index 843800b..ef74eff 100644
--- a/sys/conf/files.pc98
+++ b/sys/conf/files.pc98
@@ -118,10 +118,10 @@ dev/fb/splash.c optional splash
dev/fe/if_fe_cbus.c optional fe isa
dev/fe/if_fe_pccard.c optional fe card
dev/hwpmc/hwpmc_amd.c optional hwpmc
-dev/hwpmc/hwpmc_intel.c optional hwpmc
dev/hwpmc/hwpmc_pentium.c optional hwpmc
dev/hwpmc/hwpmc_piv.c optional hwpmc
dev/hwpmc/hwpmc_ppro.c optional hwpmc
+dev/hwpmc/hwpmc_x86.c optional hwpmc
dev/io/iodev.c optional io
dev/kbd/kbd.c optional pckbd
dev/kbd/kbd.c optional sc
diff --git a/sys/conf/files.powerpc b/sys/conf/files.powerpc
index d3004c3..5e674d4 100644
--- a/sys/conf/files.powerpc
+++ b/sys/conf/files.powerpc
@@ -15,6 +15,7 @@ font.h optional sc \
clean "font.h ${SC_DFLT_FONT}-8x14 ${SC_DFLT_FONT}-8x16 ${SC_DFLT_FONT}-8x8"
dev/fb/fb.c optional sc
+dev/hwpmc/hwpmc_powerpc.c optional hwpmc
dev/kbd/kbd.c optional sc
dev/syscons/scgfbrndr.c optional sc
dev/syscons/schistory.c optional sc
diff --git a/sys/conf/files.sparc64 b/sys/conf/files.sparc64
index 9b67308..50bc61f 100644
--- a/sys/conf/files.sparc64
+++ b/sys/conf/files.sparc64
@@ -24,6 +24,7 @@ dev/fb/creator.c optional creator sc
dev/fb/fb.c optional sc
dev/fb/machfb.c optional machfb sc
dev/fb/splash.c optional splash
+dev/hwpmc/hwpmc_sparc64.c optional hwpmc
dev/kbd/kbd.c optional sc
dev/kbd/kbd.c optional ukbd
dev/ofw/ofw_bus_if.m standard
diff --git a/sys/dev/hwpmc/hwpmc_alpha.c b/sys/dev/hwpmc/hwpmc_alpha.c
new file mode 100644
index 0000000..af905b6
--- /dev/null
+++ b/sys/dev/hwpmc/hwpmc_alpha.c
@@ -0,0 +1,40 @@
+/*-
+ * Copyright (c) 2005, Joseph Koshy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/pmc.h>
+
+#include <machine/pmc_mdep.h>
+
+struct pmc_mdep *
+pmc_md_initialize()
+{
+ return NULL;
+}
diff --git a/sys/dev/hwpmc/hwpmc_amd.c b/sys/dev/hwpmc/hwpmc_amd.c
index 1eb987d..0c7016a 100644
--- a/sys/dev/hwpmc/hwpmc_amd.c
+++ b/sys/dev/hwpmc/hwpmc_amd.c
@@ -38,93 +38,13 @@ __FBSDID("$FreeBSD$");
#include <sys/smp.h>
#include <sys/systm.h>
+#include <machine/cpufunc.h>
#include <machine/md_var.h>
+#include <machine/pmc_mdep.h>
+#include <machine/specialreg.h>
-/* AMD K7 and K8 PMCs */
-
-#define AMD_PMC_EVSEL_0 0xC0010000
-#define AMD_PMC_EVSEL_1 0xC0010001
-#define AMD_PMC_EVSEL_2 0xC0010002
-#define AMD_PMC_EVSEL_3 0xC0010003
-
-#define AMD_PMC_PERFCTR_0 0xC0010004
-#define AMD_PMC_PERFCTR_1 0xC0010005
-#define AMD_PMC_PERFCTR_2 0xC0010006
-#define AMD_PMC_PERFCTR_3 0xC0010007
-
-#define K7_VALID_EVENT_CODE(c) (((c) >= 0x40 && (c) <= 0x47) || \
- ((c) >= 0x80 && (c) <= 0x85) || ((c) >= 0xC0 && (c) <= 0xC7) || \
- ((c) >= 0xCD && (c) <= 0xCF))
-
-#define AMD_PMC_CAPS (PMC_CAP_INTERRUPT | PMC_CAP_USER | \
- PMC_CAP_SYSTEM | PMC_CAP_EDGE | PMC_CAP_THRESHOLD | \
- PMC_CAP_READ | PMC_CAP_WRITE | PMC_CAP_INVERT | PMC_CAP_QUALIFIER)
-
-/* reserved bits include bit 21 and the top two bits of the unit mask */
-#define K7_PMC_RESERVED ((1 << 21) | (3 << 13))
-
-#define K8_PMC_RESERVED (1 << 21)
-
-#define AMD_PMC_IS_STOPPED(evsel) ((rdmsr((evsel)) & AMD_PMC_ENABLE) == 0)
-#define AMD_PMC_HAS_OVERFLOWED(pmc) ((rdpmc(pmc) & (1ULL << 47)) == 0)
-
-#if __i386__
-#define AMD_NPMCS K7_NPMCS
-#define AMD_PMC_CLASS PMC_CLASS_K7
-#define AMD_PMC_COUNTERMASK K7_PMC_COUNTERMASK
-#define AMD_PMC_TO_COUNTER(x) K7_PMC_TO_COUNTER(x)
-#define AMD_PMC_INVERT K7_PMC_INVERT
-#define AMD_PMC_ENABLE K7_PMC_ENABLE
-#define AMD_PMC_INT K7_PMC_INT
-#define AMD_PMC_PC K7_PMC_PC
-#define AMD_PMC_EDGE K7_PMC_EDGE
-#define AMD_PMC_OS K7_PMC_OS
-#define AMD_PMC_USR K7_PMC_USR
-
-#define AMD_PMC_UNITMASK_M K7_PMC_UNITMASK_M
-#define AMD_PMC_UNITMASK_O K7_PMC_UNITMASK_O
-#define AMD_PMC_UNITMASK_E K7_PMC_UNITMASK_E
-#define AMD_PMC_UNITMASK_S K7_PMC_UNITMASK_S
-#define AMD_PMC_UNITMASK_I K7_PMC_UNITMASK_I
-
-#define AMD_PMC_UNITMASK K7_PMC_UNITMASK
-#define AMD_PMC_EVENTMASK K7_PMC_EVENTMASK
-#define AMD_PMC_TO_UNITMASK(x) K7_PMC_TO_UNITMASK(x)
-#define AMD_PMC_TO_EVENTMASK(x) K7_PMC_TO_EVENTMASK(x)
-#define AMD_VALID_BITS K7_VALID_BITS
-
-#define AMD_PMC_CLASS_NAME "K7-"
-
-#elif __amd64__
-
-#define AMD_NPMCS K8_NPMCS
-#define AMD_PMC_CLASS PMC_CLASS_K8
-#define AMD_PMC_COUNTERMASK K8_PMC_COUNTERMASK
-#define AMD_PMC_TO_COUNTER(x) K8_PMC_TO_COUNTER(x)
-#define AMD_PMC_INVERT K8_PMC_INVERT
-#define AMD_PMC_ENABLE K8_PMC_ENABLE
-#define AMD_PMC_INT K8_PMC_INT
-#define AMD_PMC_PC K8_PMC_PC
-#define AMD_PMC_EDGE K8_PMC_EDGE
-#define AMD_PMC_OS K8_PMC_OS
-#define AMD_PMC_USR K8_PMC_USR
-
-#define AMD_PMC_UNITMASK_M K8_PMC_UNITMASK_M
-#define AMD_PMC_UNITMASK_O K8_PMC_UNITMASK_O
-#define AMD_PMC_UNITMASK_E K8_PMC_UNITMASK_E
-#define AMD_PMC_UNITMASK_S K8_PMC_UNITMASK_S
-#define AMD_PMC_UNITMASK_I K8_PMC_UNITMASK_I
-
-#define AMD_PMC_UNITMASK K8_PMC_UNITMASK
-#define AMD_PMC_EVENTMASK K8_PMC_EVENTMASK
-#define AMD_PMC_TO_UNITMASK(x) K8_PMC_TO_UNITMASK(x)
-#define AMD_PMC_TO_EVENTMASK(x) K8_PMC_TO_EVENTMASK(x)
-#define AMD_VALID_BITS K8_VALID_BITS
-
-#define AMD_PMC_CLASS_NAME "K8-"
-
-#else
-#error Unsupported architecture.
+#if DEBUG
+enum pmc_class amd_pmc_class;
#endif
/* AMD K7 & K8 PMCs */
@@ -134,7 +54,7 @@ struct amd_descr {
uint32_t pm_perfctr; /* address of PERFCTR register */
};
-static const struct amd_descr amd_pmcdesc[AMD_NPMCS] =
+static struct amd_descr amd_pmcdesc[AMD_NPMCS] =
{
{
.pm_descr =
@@ -151,8 +71,8 @@ static const struct amd_descr amd_pmcdesc[AMD_NPMCS] =
{
.pm_descr =
{
- .pd_name = AMD_PMC_CLASS_NAME "0",
- .pd_class = AMD_PMC_CLASS,
+ .pd_name = "",
+ .pd_class = -1,
.pd_caps = AMD_PMC_CAPS,
.pd_width = 48
},
@@ -162,8 +82,8 @@ static const struct amd_descr amd_pmcdesc[AMD_NPMCS] =
{
.pm_descr =
{
- .pd_name = AMD_PMC_CLASS_NAME "1",
- .pd_class = AMD_PMC_CLASS,
+ .pd_name = "",
+ .pd_class = -1,
.pd_caps = AMD_PMC_CAPS,
.pd_width = 48
},
@@ -173,8 +93,8 @@ static const struct amd_descr amd_pmcdesc[AMD_NPMCS] =
{
.pm_descr =
{
- .pd_name = AMD_PMC_CLASS_NAME "2",
- .pd_class = AMD_PMC_CLASS,
+ .pd_name = "",
+ .pd_class = -1,
.pd_caps = AMD_PMC_CAPS,
.pd_width = 48
},
@@ -184,8 +104,8 @@ static const struct amd_descr amd_pmcdesc[AMD_NPMCS] =
{
.pm_descr =
{
- .pd_name = AMD_PMC_CLASS_NAME "3",
- .pd_class = AMD_PMC_CLASS,
+ .pd_name = "",
+ .pd_class = -1,
.pd_caps = AMD_PMC_CAPS,
.pd_width = 48
},
@@ -201,12 +121,12 @@ struct amd_event_code_map {
};
const struct amd_event_code_map amd_event_codes[] = {
-#if __i386__
+#if defined(__i386__) /* 32 bit Athlon (K7) only */
{ PMC_EV_K7_DC_ACCESSES, 0x40, 0 },
{ PMC_EV_K7_DC_MISSES, 0x41, 0 },
- { PMC_EV_K7_DC_REFILLS_FROM_L2, 0x42, K7_PMC_UNITMASK_MOESI },
- { PMC_EV_K7_DC_REFILLS_FROM_SYSTEM, 0x43, K7_PMC_UNITMASK_MOESI },
- { PMC_EV_K7_DC_WRITEBACKS, 0x44, K7_PMC_UNITMASK_MOESI },
+ { PMC_EV_K7_DC_REFILLS_FROM_L2, 0x42, AMD_PMC_UNITMASK_MOESI },
+ { PMC_EV_K7_DC_REFILLS_FROM_SYSTEM, 0x43, AMD_PMC_UNITMASK_MOESI },
+ { PMC_EV_K7_DC_WRITEBACKS, 0x44, AMD_PMC_UNITMASK_MOESI },
{ PMC_EV_K7_L1_DTLB_MISS_AND_L2_DTLB_HITS, 0x45, 0 },
{ PMC_EV_K7_L1_AND_L2_DTLB_MISSES, 0x46, 0 },
{ PMC_EV_K7_MISALIGNED_REFERENCES, 0x47, 0 },
@@ -227,10 +147,9 @@ const struct amd_event_code_map amd_event_codes[] = {
{ PMC_EV_K7_RETIRED_RESYNC_BRANCHES, 0xC7, 0 },
{ PMC_EV_K7_INTERRUPTS_MASKED_CYCLES, 0xCD, 0 },
{ PMC_EV_K7_INTERRUPTS_MASKED_WHILE_PENDING_CYCLES, 0xCE, 0 },
- { PMC_EV_K7_HARDWARE_INTERRUPTS, 0xCF, 0 }
+ { PMC_EV_K7_HARDWARE_INTERRUPTS, 0xCF, 0 },
#endif
-#if __amd64__
{ PMC_EV_K8_FP_DISPATCHED_FPU_OPS, 0x00, 0x3F },
{ PMC_EV_K8_FP_CYCLES_WITH_NO_FPU_OPS_RETIRED, 0x01, 0x00 },
{ PMC_EV_K8_FP_DISPATCHED_FPU_FAST_FLAG_OPS, 0x02, 0x00 },
@@ -327,7 +246,6 @@ const struct amd_event_code_map amd_event_codes[] = {
{ PMC_EV_K8_NB_HT_BUS0_BANDWIDTH, 0xF6, 0x0F },
{ PMC_EV_K8_NB_HT_BUS1_BANDWIDTH, 0xF7, 0x0F },
{ PMC_EV_K8_NB_HT_BUS2_BANDWIDTH, 0xF8, 0x0F }
-#endif
};
@@ -373,13 +291,15 @@ amd_read_pmc(int cpu, int ri, pmc_value_t *v)
return 0;
}
- KASSERT(pd->pm_descr.pd_class == AMD_PMC_CLASS,
+#if DEBUG
+ KASSERT(pd->pm_descr.pd_class == amd_pmc_class,
("[amd,%d] unknown PMC class (%d)", __LINE__,
pd->pm_descr.pd_class));
+#endif
tmp = rdmsr(pd->pm_perfctr); /* RDMSR serializes */
if (PMC_IS_SAMPLING_MODE(mode))
- *v = -tmp;
+ *v = AMD_PERFCTR_VALUE_TO_RELOAD_COUNT(tmp);
else
*v = tmp;
@@ -418,13 +338,15 @@ amd_write_pmc(int cpu, int ri, pmc_value_t v)
if (pd->pm_descr.pd_class == PMC_CLASS_TSC)
return 0;
- KASSERT(pd->pm_descr.pd_class == AMD_PMC_CLASS,
+#if DEBUG
+ KASSERT(pd->pm_descr.pd_class == amd_pmc_class,
("[amd,%d] unknown PMC class (%d)", __LINE__,
pd->pm_descr.pd_class));
+#endif
/* use 2's complement of the count for sampling mode PMCs */
if (PMC_IS_SAMPLING_MODE(mode))
- v = -v;
+ v = AMD_RELOAD_COUNT_TO_PERFCTR_VALUE(v);
PMCDBG(MDP,WRI,1,"amd-write cpu=%d ri=%d v=%jx", cpu, ri, v);
@@ -552,8 +474,10 @@ amd_allocate_pmc(int cpu, int ri, struct pmc *pm,
return 0;
}
- KASSERT(pd->pd_class == AMD_PMC_CLASS,
+#if DEBUG
+ KASSERT(pd->pd_class == amd_pmc_class,
("[amd,%d] Unknown PMC class (%d)", __LINE__, pd->pd_class));
+#endif
pe = a->pm_ev;
@@ -570,7 +494,7 @@ amd_allocate_pmc(int cpu, int ri, struct pmc *pm,
if (i == amd_event_codes_size)
return EINVAL;
- unitmask = a->pm_amd_config & AMD_PMC_UNITMASK;
+ unitmask = a->pm_md.pm_amd.pm_amd_config & AMD_PMC_UNITMASK;
if (unitmask & ~allowed_unitmask) /* disallow reserved bits */
return EINVAL;
@@ -578,7 +502,7 @@ amd_allocate_pmc(int cpu, int ri, struct pmc *pm,
config |= unitmask;
if (caps & PMC_CAP_THRESHOLD)
- config |= a->pm_amd_config & AMD_PMC_COUNTERMASK;
+ config |= a->pm_md.pm_amd.pm_amd_config & AMD_PMC_COUNTERMASK;
/* set at least one of the 'usr' or 'os' caps */
if (caps & PMC_CAP_USER)
@@ -631,7 +555,7 @@ amd_release_pmc(int cpu, int ri, struct pmc *pmc)
#if DEBUG
pd = &amd_pmcdesc[ri];
- if (pd->pm_descr.pd_class == AMD_PMC_CLASS)
+ if (pd->pm_descr.pd_class == amd_pmc_class)
KASSERT(AMD_PMC_IS_STOPPED(pd->pm_evsel),
("[amd,%d] PMC %d released while active", __LINE__, ri));
#endif
@@ -669,9 +593,11 @@ amd_start_pmc(int cpu, int ri)
if (pd->pm_descr.pd_class == PMC_CLASS_TSC)
return 0; /* TSCs are always running */
- KASSERT(pd->pm_descr.pd_class == AMD_PMC_CLASS,
+#if DEBUG
+ KASSERT(pd->pm_descr.pd_class == amd_pmc_class,
("[amd,%d] unknown PMC class (%d)", __LINE__,
pd->pm_descr.pd_class));
+#endif
KASSERT(AMD_PMC_IS_STOPPED(pd->pm_evsel),
("[amd,%d] pmc%d,cpu%d: Starting active PMC \"%s\"", __LINE__,
@@ -715,9 +641,11 @@ amd_stop_pmc(int cpu, int ri)
if (pd->pm_descr.pd_class == PMC_CLASS_TSC)
return 0;
- KASSERT(pd->pm_descr.pd_class == AMD_PMC_CLASS,
+#if DEBUG
+ KASSERT(pd->pm_descr.pd_class == amd_pmc_class,
("[amd,%d] unknown PMC class (%d)", __LINE__,
pd->pm_descr.pd_class));
+#endif
KASSERT(!AMD_PMC_IS_STOPPED(pd->pm_evsel),
("[amd,%d] PMC%d, CPU%d \"%s\" already stopped",
@@ -741,18 +669,18 @@ amd_stop_pmc(int cpu, int ri)
static int
amd_intr(int cpu, uintptr_t eip, int usermode)
{
- int i, retval;
- enum pmc_mode mode;
- uint32_t perfctr;
+ int i, error, retval, ri;
+ uint32_t config, evsel, perfctr;
struct pmc *pm;
struct pmc_cpu *pc;
struct pmc_hw *phw;
-
- (void) usermode;
+ pmc_value_t v;
KASSERT(cpu >= 0 && cpu < mp_ncpus,
("[amd,%d] out of range CPU %d", __LINE__, cpu));
+ PMCDBG(MDP,INT,1, "cpu=%d eip=%p", cpu, (void *) eip);
+
retval = 0;
pc = pmc_pcpu[cpu];
@@ -760,36 +688,53 @@ amd_intr(int cpu, uintptr_t eip, int usermode)
/*
* look for all PMCs that have interrupted:
* - skip over the TSC [PMC#0]
- * - look for a PMC with a valid 'struct pmc' association
- * - look for a PMC in (a) sampling mode and (b) which has
- * overflowed. If found, we update the process's
- * histogram or send it a profiling signal by calling
- * the appropriate helper function.
+ * - look for a running, sampling PMC which has overflowed
+ * and which has a valid 'struct pmc' association
+ *
+ * If found, we call a helper to process the interrupt.
*/
- for (i = 1; i < AMD_NPMCS; i++) {
+ for (i = 0; i < AMD_NPMCS-1; i++) {
+
+ ri = i + 1; /* row index; TSC is at ri == 0 */
+
+ if (!AMD_PMC_HAS_OVERFLOWED(i))
+ continue;
+
+ phw = pc->pc_hwpmcs[ri];
- phw = pc->pc_hwpmcs[i];
- perfctr = amd_pmcdesc[i].pm_perfctr;
KASSERT(phw != NULL, ("[amd,%d] null PHW pointer", __LINE__));
if ((pm = phw->phw_pmc) == NULL ||
- pm->pm_state != PMC_STATE_RUNNING) {
- atomic_add_int(&pmc_stats.pm_intr_ignored, 1);
+ pm->pm_state != PMC_STATE_RUNNING ||
+ !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) {
continue;
}
- mode = PMC_TO_MODE(pm);
- if (PMC_IS_SAMPLING_MODE(mode) &&
- AMD_PMC_HAS_OVERFLOWED(perfctr)) {
- atomic_add_int(&pmc_stats.pm_intr_processed, 1);
- if (PMC_IS_SYSTEM_MODE(mode))
- pmc_update_histogram(phw, eip);
- else if (PMC_IS_VIRTUAL_MODE(mode))
- pmc_send_signal(pm);
- retval = 1;
- }
+ /* stop the PMC, reload count */
+ evsel = AMD_PMC_EVSEL_0 + i;
+ perfctr = AMD_PMC_PERFCTR_0 + i;
+ v = pm->pm_sc.pm_reloadcount;
+ config = rdmsr(evsel);
+
+ KASSERT((config & ~AMD_PMC_ENABLE) ==
+ (pm->pm_md.pm_amd.pm_amd_evsel & ~AMD_PMC_ENABLE),
+ ("[amd,%d] config mismatch reg=0x%x pm=0x%x", __LINE__,
+ config, pm->pm_md.pm_amd.pm_amd_evsel));
+
+ wrmsr(evsel, config & ~AMD_PMC_ENABLE);
+ wrmsr(perfctr, AMD_RELOAD_COUNT_TO_PERFCTR_VALUE(v));
+
+ /* restart if there was no error during logging */
+ error = pmc_process_interrupt(cpu, pm, eip, usermode);
+ if (error == 0)
+ wrmsr(evsel, config | AMD_PMC_ENABLE);
+
+ retval = 1; /* found an interrupting PMC */
}
+
+ if (retval == 0)
+ atomic_add_int(&pmc_stats.pm_intr_ignored, 1);
return retval;
}
@@ -881,9 +826,6 @@ amd_init(int cpu)
MALLOC(pcs, struct amd_cpu *, sizeof(struct amd_cpu), M_PMC,
M_WAITOK|M_ZERO);
- if (pcs == NULL)
- return ENOMEM;
-
phw = &pcs->pc_amdpmcs[0];
/*
@@ -938,7 +880,8 @@ amd_cleanup(int cpu)
* Next, free up allocated space.
*/
- pcs = pmc_pcpu[cpu];
+ if ((pcs = pmc_pcpu[cpu]) == NULL)
+ return 0;
#if DEBUG
/* check the TSC */
@@ -951,8 +894,6 @@ amd_cleanup(int cpu)
("[amd,%d] CPU%d/PMC%d not stopped", __LINE__, cpu, i));
}
#endif
- KASSERT(pcs != NULL,
- ("[amd,%d] null per-cpu state pointer (cpu%d)", __LINE__, cpu));
pmc_pcpu[cpu] = NULL;
FREE(pcs, M_PMC);
@@ -966,30 +907,47 @@ amd_cleanup(int cpu)
struct pmc_mdep *
pmc_amd_initialize(void)
{
-
+ enum pmc_cputype cputype;
+ enum pmc_class class;
struct pmc_mdep *pmc_mdep;
+ char *name;
+ int i;
- /* The presence of hardware performance counters on the AMD
- Athlon, Duron or later processors, is _not_ indicated by
- any of the processor feature flags set by the 'CPUID'
- instruction, so we only check the 'instruction family'
- field returned by CPUID for instruction family >= 6. This
- test needs to be be refined. */
+ /*
+ * The presence of hardware performance counters on the AMD
+ * Athlon, Duron or later processors, is _not_ indicated by
+ * any of the processor feature flags set by the 'CPUID'
+ * instruction, so we only check the 'instruction family'
+ * field returned by CPUID for instruction family >= 6.
+ */
- if ((cpu_id & 0xF00) < 0x600)
+ cputype = -1;
+ switch (cpu_id & 0xF00) {
+ case 0x600: /* Athlon(tm) processor */
+ cputype = PMC_CPU_AMD_K7;
+ class = PMC_CLASS_K7;
+ name = "K7";
+ break;
+ case 0xF00: /* Athlon64/Opteron processor */
+ cputype = PMC_CPU_AMD_K8;
+ class = PMC_CLASS_K8;
+ name = "K8";
+ break;
+ }
+
+ if ((int) cputype == -1) {
+ (void) printf("pmc: Unknown AMD CPU.\n");
return NULL;
+ }
+
+#if DEBUG
+ amd_pmc_class = class;
+#endif
MALLOC(pmc_mdep, struct pmc_mdep *, sizeof(struct pmc_mdep),
M_PMC, M_WAITOK|M_ZERO);
-#if __i386__
- pmc_mdep->pmd_cputype = PMC_CPU_AMD_K7;
-#elif __amd64__
- pmc_mdep->pmd_cputype = PMC_CPU_AMD_K8;
-#else
-#error Unknown AMD CPU type.
-#endif
-
+ pmc_mdep->pmd_cputype = cputype;
pmc_mdep->pmd_npmc = AMD_NPMCS;
/* this processor has two classes of usable PMCs */
@@ -1001,13 +959,21 @@ pmc_amd_initialize(void)
pmc_mdep->pmd_classes[0].pm_width = 64;
/* AMD K7/K8 PMCs */
- pmc_mdep->pmd_classes[1].pm_class = AMD_PMC_CLASS;
+ pmc_mdep->pmd_classes[1].pm_class = class;
pmc_mdep->pmd_classes[1].pm_caps = AMD_PMC_CAPS;
pmc_mdep->pmd_classes[1].pm_width = 48;
pmc_mdep->pmd_nclasspmcs[0] = 1;
pmc_mdep->pmd_nclasspmcs[1] = (AMD_NPMCS-1);
+ /* fill in the correct pmc name and class */
+ for (i = 1; i < AMD_NPMCS; i++) {
+ (void) snprintf(amd_pmcdesc[i].pm_descr.pd_name,
+ sizeof(amd_pmcdesc[i].pm_descr.pd_name), "%s-%d",
+ name, i-1);
+ amd_pmcdesc[i].pm_descr.pd_class = class;
+ }
+
pmc_mdep->pmd_init = amd_init;
pmc_mdep->pmd_cleanup = amd_cleanup;
pmc_mdep->pmd_switch_in = amd_switch_in;
diff --git a/sys/dev/hwpmc/hwpmc_amd.h b/sys/dev/hwpmc/hwpmc_amd.h
new file mode 100644
index 0000000..aa6417b
--- /dev/null
+++ b/sys/dev/hwpmc/hwpmc_amd.h
@@ -0,0 +1,103 @@
+/*-
+ * Copyright (c) 2005, Joseph Koshy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/* Machine dependent interfaces */
+
+#ifndef _DEV_HWPMC_AMD_H_
+#define _DEV_HWPMC_AMD_H_ 1
+
+/* AMD K7 and K8 PMCs */
+
+#define AMD_PMC_EVSEL_0 0xC0010000
+#define AMD_PMC_EVSEL_1 0xC0010001
+#define AMD_PMC_EVSEL_2 0xC0010002
+#define AMD_PMC_EVSEL_3 0xC0010003
+
+#define AMD_PMC_PERFCTR_0 0xC0010004
+#define AMD_PMC_PERFCTR_1 0xC0010005
+#define AMD_PMC_PERFCTR_2 0xC0010006
+#define AMD_PMC_PERFCTR_3 0xC0010007
+
+
+#define AMD_NPMCS 5 /* 1 TSC + 4 PMCs */
+
+#define AMD_PMC_COUNTERMASK 0xFF000000
+#define AMD_PMC_TO_COUNTER(x) (((x) << 24) & AMD_PMC_COUNTERMASK)
+#define AMD_PMC_INVERT (1 << 23)
+#define AMD_PMC_ENABLE (1 << 22)
+#define AMD_PMC_INT (1 << 20)
+#define AMD_PMC_PC (1 << 19)
+#define AMD_PMC_EDGE (1 << 18)
+#define AMD_PMC_OS (1 << 17)
+#define AMD_PMC_USR (1 << 16)
+
+#define AMD_PMC_UNITMASK_M 0x10
+#define AMD_PMC_UNITMASK_O 0x08
+#define AMD_PMC_UNITMASK_E 0x04
+#define AMD_PMC_UNITMASK_S 0x02
+#define AMD_PMC_UNITMASK_I 0x01
+#define AMD_PMC_UNITMASK_MOESI 0x1F
+
+#define AMD_PMC_UNITMASK 0xFF00
+#define AMD_PMC_EVENTMASK 0x00FF
+
+#define AMD_PMC_TO_UNITMASK(x) (((x) << 8) & AMD_PMC_UNITMASK)
+#define AMD_PMC_TO_EVENTMASK(x) ((x) & 0xFF)
+#define AMD_VALID_BITS (AMD_PMC_COUNTERMASK | AMD_PMC_INVERT | \
+ AMD_PMC_ENABLE | AMD_PMC_INT | AMD_PMC_PC | AMD_PMC_EDGE | \
+ AMD_PMC_OS | AMD_PMC_USR | AMD_PMC_UNITMASK | AMD_PMC_EVENTMASK)
+
+#define AMD_PMC_CAPS (PMC_CAP_INTERRUPT | PMC_CAP_USER | \
+ PMC_CAP_SYSTEM | PMC_CAP_EDGE | PMC_CAP_THRESHOLD | \
+ PMC_CAP_READ | PMC_CAP_WRITE | PMC_CAP_INVERT | PMC_CAP_QUALIFIER)
+
+#define AMD_PMC_IS_STOPPED(evsel) ((rdmsr((evsel)) & AMD_PMC_ENABLE) == 0)
+#define AMD_PMC_HAS_OVERFLOWED(pmc) ((rdpmc(pmc) & (1ULL << 47)) == 0)
+
+#define AMD_RELOAD_COUNT_TO_PERFCTR_VALUE(V) (-(V))
+#define AMD_PERFCTR_VALUE_TO_RELOAD_COUNT(P) (-(P))
+
+struct pmc_md_amd_op_pmcallocate {
+ uint32_t pm_amd_config;
+};
+
+#ifdef _KERNEL
+
+/* MD extension for 'struct pmc' */
+struct pmc_md_amd_pmc {
+ uint32_t pm_amd_evsel;
+};
+
+/*
+ * Prototypes
+ */
+
+struct pmc_mdep *pmc_amd_initialize(void); /* AMD K7/K8 PMCs */
+
+#endif /* _KERNEL */
+#endif /* _DEV_HWPMC_AMD_H_ */
diff --git a/sys/dev/hwpmc/hwpmc_arm.c b/sys/dev/hwpmc/hwpmc_arm.c
new file mode 100644
index 0000000..af905b6
--- /dev/null
+++ b/sys/dev/hwpmc/hwpmc_arm.c
@@ -0,0 +1,40 @@
+/*-
+ * Copyright (c) 2005, Joseph Koshy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/pmc.h>
+
+#include <machine/pmc_mdep.h>
+
+struct pmc_mdep *
+pmc_md_initialize()
+{
+ return NULL;
+}
diff --git a/sys/dev/hwpmc/hwpmc_ia64.c b/sys/dev/hwpmc/hwpmc_ia64.c
new file mode 100644
index 0000000..af905b6
--- /dev/null
+++ b/sys/dev/hwpmc/hwpmc_ia64.c
@@ -0,0 +1,40 @@
+/*-
+ * Copyright (c) 2005, Joseph Koshy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/pmc.h>
+
+#include <machine/pmc_mdep.h>
+
+struct pmc_mdep *
+pmc_md_initialize()
+{
+ return NULL;
+}
diff --git a/sys/dev/hwpmc/hwpmc_logging.c b/sys/dev/hwpmc/hwpmc_logging.c
new file mode 100644
index 0000000..5ae7bc6
--- /dev/null
+++ b/sys/dev/hwpmc/hwpmc_logging.c
@@ -0,0 +1,978 @@
+/*-
+ * Copyright (c) 2005 Joseph Koshy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+/*
+ * Logging code for hwpmc(4)
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/file.h>
+#include <sys/kernel.h>
+#include <sys/kthread.h>
+#include <sys/lock.h>
+#include <sys/module.h>
+#include <sys/mutex.h>
+#include <sys/pmc.h>
+#include <sys/pmclog.h>
+#include <sys/proc.h>
+#include <sys/signalvar.h>
+#include <sys/sysctl.h>
+#include <sys/systm.h>
+#include <sys/uio.h>
+#include <sys/unistd.h>
+#include <sys/vnode.h>
+
+/*
+ * Sysctl tunables
+ */
+
+SYSCTL_DECL(_kern_hwpmc);
+
+/*
+ * kern.hwpmc.logbuffersize -- size of the per-cpu owner buffers.
+ */
+
+static int pmclog_buffer_size = PMC_LOG_BUFFER_SIZE;
+TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "logbuffersize", &pmclog_buffer_size);
+SYSCTL_INT(_kern_hwpmc, OID_AUTO, logbuffersize, CTLFLAG_TUN|CTLFLAG_RD,
+ &pmclog_buffer_size, 0, "size of log buffers in kilobytes");
+
+
+/*
+ * kern.hwpmc.nbuffer -- number of global log buffers
+ */
+
+static int pmc_nlogbuffers = PMC_NLOGBUFFERS;
+TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "nbuffers", &pmc_nlogbuffers);
+SYSCTL_INT(_kern_hwpmc, OID_AUTO, nbuffers, CTLFLAG_TUN|CTLFLAG_RD,
+ &pmc_nlogbuffers, 0, "number of global log buffers");
+
+/*
+ * Global log buffer list and associated spin lock.
+ */
+
+TAILQ_HEAD(, pmclog_buffer) pmc_bufferlist =
+ TAILQ_HEAD_INITIALIZER(pmc_bufferlist);
+static struct mtx pmc_bufferlist_mtx; /* spin lock */
+static struct mtx pmc_kthread_mtx; /* sleep lock */
+
+#define PMCLOG_INIT_BUFFER_DESCRIPTOR(D) do { \
+ const int __roundup = roundup(sizeof(*D), \
+ sizeof(uint32_t)); \
+ (D)->plb_fence = ((char *) (D)) + \
+ 1024*pmclog_buffer_size; \
+ (D)->plb_base = (D)->plb_ptr = ((char *) (D)) + \
+ __roundup; \
+ } while (0)
+
+
+/*
+ * Log file record constructors.
+ */
+
+/* reserve LEN bytes of space and initialize the entry header */
+#define _PMCLOG_RESERVE(PO,TYPE,LEN,ACTION) do { \
+ uint32_t *_le; \
+ int _len = roundup((LEN), sizeof(uint32_t)); \
+ if ((_le = pmclog_reserve((PO), _len)) == NULL) { \
+ ACTION; \
+ } \
+ *_le = (PMCLOG_HEADER_MAGIC << 24) | \
+ (PMCLOG_TYPE_ ## TYPE << 16) | \
+ (_len & 0xFFFF); \
+ _le += 3 /* skip over timestamp */
+
+#define PMCLOG_RESERVE(P,T,L) _PMCLOG_RESERVE(P,T,L,return)
+#define PMCLOG_RESERVE_WITH_ERROR(P,T,L) _PMCLOG_RESERVE(P,T,L, \
+ error=ENOMEM;goto error)
+
+#define PMCLOG_EMIT32(V) do { *_le++ = (V); } while (0)
+#define PMCLOG_EMIT64(V) do { \
+ *_le++ = (uint32_t) ((V) & 0xFFFFFFFF); \
+ *_le++ = (uint32_t) (((V) >> 32) & 0xFFFFFFFF); \
+ } while (0)
+
+
+/* Emit a string. Caution: does NOT update _le, so needs to be last */
+#define PMCLOG_EMITSTRING(S,L) do { bcopy((S), _le, (L)); } while (0)
+
+#define PMCLOG_DESPATCH(PO) \
+ pmclog_release((PO)); \
+ } while (0)
+
+
+/*
+ * Assertions about the log file format.
+ */
+
+CTASSERT(sizeof(struct pmclog_closelog) == 3*4);
+CTASSERT(sizeof(struct pmclog_dropnotify) == 3*4);
+CTASSERT(sizeof(struct pmclog_mappingchange) == PATH_MAX +
+ 5*4 + 2*sizeof(uintfptr_t));
+CTASSERT(offsetof(struct pmclog_mappingchange,pl_pathname) ==
+ 5*4 + 2*sizeof(uintfptr_t));
+CTASSERT(sizeof(struct pmclog_pcsample) == 5*4 + sizeof(uintfptr_t));
+CTASSERT(sizeof(struct pmclog_pmcallocate) == 6*4);
+CTASSERT(sizeof(struct pmclog_pmcattach) == 5*4 + PATH_MAX);
+CTASSERT(offsetof(struct pmclog_pmcattach,pl_pathname) == 5*4);
+CTASSERT(sizeof(struct pmclog_pmcdetach) == 5*4);
+CTASSERT(sizeof(struct pmclog_proccsw) == 5*4 + 8);
+CTASSERT(sizeof(struct pmclog_procexec) == 4*4 + PATH_MAX);
+CTASSERT(offsetof(struct pmclog_procexec,pl_pathname) == 4*4);
+CTASSERT(sizeof(struct pmclog_procexit) == 5*4 + 8);
+CTASSERT(sizeof(struct pmclog_procfork) == 5*4);
+CTASSERT(sizeof(struct pmclog_sysexit) == 4*4);
+CTASSERT(sizeof(struct pmclog_userdata) == 4*4);
+
+/*
+ * Log buffer structure
+ */
+
+struct pmclog_buffer {
+ TAILQ_ENTRY(pmclog_buffer) plb_next;
+ char *plb_base;
+ char *plb_ptr;
+ char *plb_fence;
+};
+
+/*
+ * Prototypes
+ */
+
+static int pmclog_get_buffer(struct pmc_owner *po);
+static void pmclog_loop(void *arg);
+static void pmclog_release(struct pmc_owner *po);
+static uint32_t *pmclog_reserve(struct pmc_owner *po, int length);
+static void pmclog_schedule_io(struct pmc_owner *po);
+static void pmclog_stop_kthread(struct pmc_owner *po);
+
+/*
+ * Helper functions
+ */
+
+/*
+ * Get a log buffer
+ */
+
+static int
+pmclog_get_buffer(struct pmc_owner *po)
+{
+ struct pmclog_buffer *plb;
+
+ mtx_assert(&po->po_mtx, MA_OWNED);
+
+ KASSERT(po->po_curbuf == NULL,
+ ("[pmc,%d] po=%p current buffer still valid", __LINE__, po));
+
+ mtx_lock_spin(&pmc_bufferlist_mtx);
+ if ((plb = TAILQ_FIRST(&pmc_bufferlist)) != NULL)
+ TAILQ_REMOVE(&pmc_bufferlist, plb, plb_next);
+ mtx_unlock_spin(&pmc_bufferlist_mtx);
+
+ PMCDBG(LOG,GTB,1, "po=%p plb=%p", po, plb);
+
+#if DEBUG
+ if (plb)
+ KASSERT(plb->plb_ptr == plb->plb_base &&
+ plb->plb_base < plb->plb_fence,
+ ("[pmc,%d] po=%p buffer invariants: ptr=%p "
+ "base=%p fence=%p", __LINE__, po, plb->plb_ptr,
+ plb->plb_base, plb->plb_fence));
+#endif
+
+ po->po_curbuf = plb;
+
+ /* update stats */
+ atomic_add_int(&pmc_stats.pm_buffer_requests, 1);
+ if (plb == NULL)
+ atomic_add_int(&pmc_stats.pm_buffer_requests_failed, 1);
+
+ return plb ? 0 : ENOMEM;
+}
+
+/*
+ * Log handler loop.
+ *
+ * This function is executed by each pmc owner's helper thread.
+ */
+
+static void
+pmclog_loop(void *arg)
+{
+ int error;
+ struct pmc_owner *po;
+ struct pmclog_buffer *lb;
+ struct ucred *ownercred;
+ struct ucred *mycred;
+ struct thread *td;
+ struct uio auio;
+ struct iovec aiov;
+ size_t nbytes;
+
+ po = (struct pmc_owner *) arg;
+ td = curthread;
+ mycred = td->td_ucred;
+
+ PROC_LOCK(po->po_owner);
+ ownercred = crhold(po->po_owner->p_ucred);
+ PROC_UNLOCK(po->po_owner);
+
+ PMCDBG(LOG,INI,1, "po=%p kt=%p", po, po->po_kthread);
+ KASSERT(po->po_kthread == curthread->td_proc,
+ ("[pmc,%d] proc mismatch po=%p po/kt=%p curproc=%p", __LINE__,
+ po, po->po_kthread, curthread->td_proc));
+
+ lb = NULL;
+
+
+ /*
+ * Loop waiting for I/O requests to be added to the owner
+ * struct's queue. The loop is exited when the log file
+ * is deconfigured.
+ */
+
+ mtx_lock(&pmc_kthread_mtx);
+
+ for (;;) {
+
+ /* check if we've been asked to exit */
+ if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0)
+ break;
+
+ if (lb == NULL) { /* look for a fresh buffer to write */
+ mtx_lock_spin(&po->po_mtx);
+ if ((lb = TAILQ_FIRST(&po->po_logbuffers)) == NULL) {
+ mtx_unlock_spin(&po->po_mtx);
+
+ /* wakeup any processes waiting for a FLUSH */
+ if (po->po_flags & PMC_PO_IN_FLUSH) {
+ po->po_flags &= ~PMC_PO_IN_FLUSH;
+ wakeup_one(po->po_kthread);
+ }
+
+ (void) msleep(po, &pmc_kthread_mtx, PWAIT,
+ "pmcloop", 0);
+ continue;
+ }
+
+ TAILQ_REMOVE(&po->po_logbuffers, lb, plb_next);
+ mtx_unlock_spin(&po->po_mtx);
+ }
+
+ mtx_unlock(&pmc_kthread_mtx);
+
+ /* process the request */
+ PMCDBG(LOG,WRI,2, "po=%p base=%p ptr=%p", po,
+ lb->plb_base, lb->plb_ptr);
+ /* change our thread's credentials before issuing the I/O */
+
+ aiov.iov_base = lb->plb_base;
+ aiov.iov_len = nbytes = lb->plb_ptr - lb->plb_base;
+
+ auio.uio_iov = &aiov;
+ auio.uio_iovcnt = 1;
+ auio.uio_offset = -1;
+ auio.uio_resid = nbytes;
+ auio.uio_rw = UIO_WRITE;
+ auio.uio_segflg = UIO_SYSSPACE;
+ auio.uio_td = td;
+
+ /* switch thread credentials -- see kern_ktrace.c */
+ td->td_ucred = ownercred;
+ error = fo_write(po->po_file, &auio, ownercred, 0, td);
+ td->td_ucred = mycred;
+
+ mtx_lock(&pmc_kthread_mtx);
+
+ if (error) {
+ /* XXX some errors are recoverable */
+ /* XXX also check for SIGPIPE if a socket */
+
+ /* send a SIGIO to the owner and exit */
+ PROC_LOCK(po->po_owner);
+ psignal(po->po_owner, SIGIO);
+ PROC_UNLOCK(po->po_owner);
+
+ po->po_error = error; /* save for flush log */
+
+ PMCDBG(LOG,WRI,2, "po=%p error=%d", po, error);
+
+ break;
+ }
+
+ /* put the used buffer back into the global pool */
+ PMCLOG_INIT_BUFFER_DESCRIPTOR(lb);
+
+ mtx_lock_spin(&pmc_bufferlist_mtx);
+ TAILQ_INSERT_HEAD(&pmc_bufferlist, lb, plb_next);
+ mtx_unlock_spin(&pmc_bufferlist_mtx);
+
+ lb = NULL;
+ }
+
+ po->po_kthread = NULL;
+
+ mtx_unlock(&pmc_kthread_mtx);
+
+ /* return the current I/O buffer to the global pool */
+ if (lb) {
+ PMCLOG_INIT_BUFFER_DESCRIPTOR(lb);
+
+ mtx_lock_spin(&pmc_bufferlist_mtx);
+ TAILQ_INSERT_HEAD(&pmc_bufferlist, lb, plb_next);
+ mtx_unlock_spin(&pmc_bufferlist_mtx);
+ }
+
+ /*
+ * Exit this thread, signalling the waiter
+ */
+
+ crfree(ownercred);
+
+ kthread_exit(0);
+}
+
+/*
+ * Release and log entry and schedule an I/O if needed.
+ */
+
+static void
+pmclog_release(struct pmc_owner *po)
+{
+ KASSERT(po->po_curbuf->plb_ptr >= po->po_curbuf->plb_base,
+ ("[pmc,%d] buffer invariants po=%p ptr=%p base=%p", __LINE__,
+ po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_base));
+ KASSERT(po->po_curbuf->plb_ptr <= po->po_curbuf->plb_fence,
+ ("[pmc,%d] buffer invariants po=%p ptr=%p fenc=%p", __LINE__,
+ po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_fence));
+
+ /* schedule an I/O if we've filled a buffer */
+ if (po->po_curbuf->plb_ptr >= po->po_curbuf->plb_fence)
+ pmclog_schedule_io(po);
+
+ mtx_unlock_spin(&po->po_mtx);
+
+ PMCDBG(LOG,REL,1, "po=%p", po);
+}
+
+
+/*
+ * Attempt to reserve 'length' bytes of space in an owner's log
+ * buffer. The function returns a pointer to 'length' bytes of space
+ * if there was enough space or returns NULL if no space was
+ * available. Non-null returns do so with the po mutex locked. The
+ * caller must invoke pmclog_release() on the pmc owner structure
+ * when done.
+ */
+
+static uint32_t *
+pmclog_reserve(struct pmc_owner *po, int length)
+{
+ char *newptr, *oldptr;
+ uint32_t *lh;
+ struct timespec ts;
+
+ PMCDBG(LOG,ALL,1, "po=%p len=%d", po, length);
+
+ KASSERT(length % sizeof(uint32_t) == 0,
+ ("[pmclog,%d] length not a multiple of word size", __LINE__));
+
+ mtx_lock_spin(&po->po_mtx);
+
+ if (po->po_curbuf == NULL)
+ if (pmclog_get_buffer(po) != 0) {
+ mtx_unlock_spin(&po->po_mtx);
+ return NULL;
+ }
+
+ KASSERT(po->po_curbuf != NULL,
+ ("[pmc,%d] po=%p no current buffer", __LINE__, po));
+
+ KASSERT(po->po_curbuf->plb_ptr >= po->po_curbuf->plb_base &&
+ po->po_curbuf->plb_ptr <= po->po_curbuf->plb_fence,
+ ("[pmc,%d] po=%p buffer invariants: ptr=%p base=%p fence=%p",
+ __LINE__, po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_base,
+ po->po_curbuf->plb_fence));
+
+ oldptr = po->po_curbuf->plb_ptr;
+ newptr = oldptr + length;
+
+ KASSERT(oldptr != NULL,
+ ("[pmc,%d] po=%p Null log buffer pointer", __LINE__, po));
+
+ /*
+ * If we have space in the current buffer, return a pointer to
+ * available space with the PO structure locked.
+ */
+ if (newptr <= po->po_curbuf->plb_fence) {
+ po->po_curbuf->plb_ptr = newptr;
+ goto done;
+ }
+
+ /* otherwise, schedule the current buffer and get a fresh buffer */
+ pmclog_schedule_io(po);
+
+ if (pmclog_get_buffer(po) != 0) {
+ mtx_unlock_spin(&po->po_mtx);
+ return NULL;
+ }
+
+ KASSERT(po->po_curbuf != NULL,
+ ("[pmc,%d] po=%p no current buffer", __LINE__, po));
+
+ KASSERT(po->po_curbuf->plb_ptr != NULL,
+ ("[pmc,%d] null return from pmc_get_log_buffer", __LINE__));
+
+ KASSERT(po->po_curbuf->plb_ptr == po->po_curbuf->plb_base &&
+ po->po_curbuf->plb_ptr <= po->po_curbuf->plb_fence,
+ ("[pmc,%d] po=%p buffer invariants: ptr=%p base=%p fence=%p",
+ __LINE__, po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_base,
+ po->po_curbuf->plb_fence));
+
+ oldptr = po->po_curbuf->plb_ptr;
+
+ done:
+ lh = (uint32_t *) oldptr; lh++;
+ /* fill in the timestamp */
+ getnanotime(&ts);
+ *lh++ = ts.tv_sec & 0xFFFFFFFF;
+ *lh++ = ts.tv_nsec & 0xFFFFFFF;
+ return (uint32_t *) oldptr;
+}
+
+/*
+ * Schedule an I/O.
+ *
+ * Transfer the current buffer to the helper kthread.
+ */
+
+static void
+pmclog_schedule_io(struct pmc_owner *po)
+{
+ KASSERT(po->po_curbuf != NULL,
+ ("[pmc,%d] schedule_io with null buffer po=%p", __LINE__, po));
+
+ KASSERT(po->po_curbuf->plb_ptr >= po->po_curbuf->plb_base,
+ ("[pmc,%d] buffer invariants po=%p ptr=%p base=%p", __LINE__,
+ po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_base));
+ KASSERT(po->po_curbuf->plb_ptr <= po->po_curbuf->plb_fence,
+ ("[pmc,%d] buffer invariants po=%p ptr=%p fenc=%p", __LINE__,
+ po, po->po_curbuf->plb_ptr, po->po_curbuf->plb_fence));
+
+ PMCDBG(LOG,SIO, 1, "po=%p", po);
+
+ mtx_assert(&po->po_mtx, MA_OWNED);
+
+ /*
+ * Add the current buffer to the tail of the buffer list and
+ * wakeup the helper.
+ */
+ TAILQ_INSERT_TAIL(&po->po_logbuffers, po->po_curbuf, plb_next);
+ po->po_curbuf = NULL;
+ wakeup_one(po);
+}
+
+/*
+ * Stop the helper kthread.
+ */
+
+static void
+pmclog_stop_kthread(struct pmc_owner *po)
+{
+ /*
+ * Unset flag, wakeup the helper thread,
+ * wait for it to exit
+ */
+
+ mtx_assert(&pmc_kthread_mtx, MA_OWNED);
+ po->po_flags &= ~PMC_PO_OWNS_LOGFILE;
+ wakeup_one(po);
+ if (po->po_kthread)
+ msleep(po->po_kthread, &pmc_kthread_mtx, PPAUSE, "pmcdcl", 0);
+}
+
+/*
+ * Public functions
+ */
+
+/*
+ * Configure a log file for pmc owner 'po'.
+ *
+ * Parameter 'logfd' is a file handle referencing an open file in the
+ * owner process. This file needs to have been opened for writing.
+ */
+
+int
+pmclog_configure_log(struct pmc_owner *po, int logfd)
+{
+ int error;
+ struct proc *p;
+
+ PMCDBG(LOG,CFG,1, "config po=%p logfd=%d", po, logfd);
+
+ p = po->po_owner;
+
+ /* return EBUSY if a log file was already present */
+ if (po->po_flags & PMC_PO_OWNS_LOGFILE)
+ return EBUSY;
+
+ KASSERT(po->po_kthread == NULL,
+ ("[pmc,%d] po=%p kthread (%p) already present", __LINE__, po,
+ po->po_kthread));
+ KASSERT(po->po_file == NULL,
+ ("[pmc,%d] po=%p file (%p) already present", __LINE__, po,
+ po->po_file));
+
+ /* get a reference to the file state */
+ error = fget_write(curthread, logfd, &po->po_file);
+ if (error)
+ goto error;
+
+ /* mark process as owning a log file */
+ po->po_flags |= PMC_PO_OWNS_LOGFILE;
+ error = kthread_create(pmclog_loop, po, &po->po_kthread,
+ RFHIGHPID, 0, "hwpmc: proc(%d)", p->p_pid);
+ if (error)
+ goto error;
+
+ /* mark process as using HWPMCs */
+ PROC_LOCK(p);
+ p->p_flag |= P_HWPMC;
+ PROC_UNLOCK(p);
+
+ /* create a log initialization entry */
+ PMCLOG_RESERVE_WITH_ERROR(po, INITIALIZE,
+ sizeof(struct pmclog_initialize));
+ PMCLOG_EMIT32(PMC_VERSION);
+ PMCLOG_EMIT32(md->pmd_cputype);
+ PMCLOG_DESPATCH(po);
+
+ return 0;
+
+ error:
+ /* shutdown the thread */
+ mtx_lock(&pmc_kthread_mtx);
+ if (po->po_kthread)
+ pmclog_stop_kthread(po);
+ mtx_unlock(&pmc_kthread_mtx);
+
+ KASSERT(po->po_kthread == NULL, ("[pmc,%d] po=%p kthread not stopped",
+ __LINE__, po));
+
+ if (po->po_file)
+ (void) fdrop(po->po_file, curthread);
+ po->po_file = NULL; /* clear file and error state */
+ po->po_error = 0;
+
+ return error;
+}
+
+
+/*
+ * De-configure a log file. This will throw away any buffers queued
+ * for this owner process.
+ */
+
+int
+pmclog_deconfigure_log(struct pmc_owner *po)
+{
+ int error;
+ struct pmclog_buffer *lb;
+
+ PMCDBG(LOG,CFG,1, "de-config po=%p", po);
+
+ if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0)
+ return EINVAL;
+
+ /* remove this owner from the global SS pmc owner list */
+ if (po->po_sscount)
+ LIST_REMOVE(po, po_ssnext);
+
+ KASSERT(po->po_file != NULL,
+ ("[pmc,%d] po=%p no log file", __LINE__, po));
+
+ /* stop the kthread, this will reset the 'OWNS_LOGFILE' flag */
+ mtx_lock(&pmc_kthread_mtx);
+ if (po->po_kthread)
+ pmclog_stop_kthread(po);
+ mtx_unlock(&pmc_kthread_mtx);
+
+ KASSERT(po->po_kthread == NULL,
+ ("[pmc,%d] po=%p kthread not stopped", __LINE__, po));
+
+ /* return all queued log buffers to the global pool */
+ while ((lb = TAILQ_FIRST(&po->po_logbuffers)) != NULL) {
+ TAILQ_REMOVE(&po->po_logbuffers, lb, plb_next);
+ PMCLOG_INIT_BUFFER_DESCRIPTOR(lb);
+ mtx_lock_spin(&pmc_bufferlist_mtx);
+ TAILQ_INSERT_HEAD(&pmc_bufferlist, lb, plb_next);
+ mtx_unlock_spin(&pmc_bufferlist_mtx);
+ }
+
+ /* return the 'current' buffer to the global pool */
+ if ((lb = po->po_curbuf) != NULL) {
+ PMCLOG_INIT_BUFFER_DESCRIPTOR(lb);
+ mtx_lock_spin(&pmc_bufferlist_mtx);
+ TAILQ_INSERT_HEAD(&pmc_bufferlist, lb, plb_next);
+ mtx_unlock_spin(&pmc_bufferlist_mtx);
+ }
+
+ /* drop a reference to the fd */
+ error = fdrop(po->po_file, curthread);
+ po->po_file = NULL;
+ po->po_error = 0;
+
+ return error;
+}
+
+/*
+ * Flush a process' log buffer.
+ */
+
+int
+pmclog_flush(struct pmc_owner *po)
+{
+ int error, has_pending_buffers;
+
+ PMCDBG(LOG,FLS,1, "po=%p", po);
+
+ /*
+ * If there is a pending error recorded by the logger thread,
+ * return that.
+ */
+ if (po->po_error)
+ return po->po_error;
+
+ error = 0;
+
+ /*
+ * Check that we do have an active log file.
+ */
+ mtx_lock(&pmc_kthread_mtx);
+ if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) {
+ error = EINVAL;
+ goto error;
+ }
+
+ /*
+ * Schedule the current buffer if any.
+ */
+ mtx_lock_spin(&po->po_mtx);
+ if (po->po_curbuf)
+ pmclog_schedule_io(po);
+ has_pending_buffers = !TAILQ_EMPTY(&po->po_logbuffers);
+ mtx_unlock_spin(&po->po_mtx);
+
+ if (has_pending_buffers) {
+ po->po_flags |= PMC_PO_IN_FLUSH; /* ask for a wakeup */
+ error = msleep(po->po_kthread, &pmc_kthread_mtx, PWAIT,
+ "pmcflush", 0);
+ }
+
+ error:
+ mtx_unlock(&pmc_kthread_mtx);
+
+ return error;
+}
+
+
+/*
+ * Send a 'close log' event to the log file.
+ */
+
+void
+pmclog_process_closelog(struct pmc_owner *po)
+{
+ PMCLOG_RESERVE(po,CLOSELOG,sizeof(struct pmclog_closelog));
+ PMCLOG_DESPATCH(po);
+}
+
+void
+pmclog_process_dropnotify(struct pmc_owner *po)
+{
+ PMCLOG_RESERVE(po,DROPNOTIFY,sizeof(struct pmclog_dropnotify));
+ PMCLOG_DESPATCH(po);
+}
+
+void
+pmclog_process_mappingchange(struct pmc_owner *po, pid_t pid, int type,
+ uintfptr_t start, uintfptr_t end, char *path)
+{
+ int pathlen, recordlen;
+
+ pathlen = strlen(path) + 1; /* #bytes for path name */
+ recordlen = offsetof(struct pmclog_mappingchange, pl_pathname) +
+ pathlen;
+
+ PMCLOG_RESERVE(po,MAPPINGCHANGE,recordlen);
+ PMCLOG_EMIT32(type);
+ PMCLOG_EMITADDR(start);
+ PMCLOG_EMITADDR(end);
+ PMCLOG_EMIT32(pid);
+ PMCLOG_EMITSTRING(path,pathlen);
+ PMCLOG_DESPATCH(po);
+}
+
+
+void
+pmclog_process_pcsample(struct pmc *pm, struct pmc_sample *ps)
+{
+ struct pmc_owner *po;
+
+ PMCDBG(LOG,SAM,1,"pm=%p pid=%d pc=%p", pm, ps->ps_pid,
+ (void *) ps->ps_pc);
+
+ po = pm->pm_owner;
+
+ PMCLOG_RESERVE(po, PCSAMPLE, sizeof(struct pmclog_pcsample));
+ PMCLOG_EMIT32(ps->ps_pid);
+ PMCLOG_EMITADDR(ps->ps_pc);
+ PMCLOG_EMIT32(pm->pm_id);
+ PMCLOG_DESPATCH(po);
+}
+
+void
+pmclog_process_pmcallocate(struct pmc *pm)
+{
+ struct pmc_owner *po;
+
+ po = pm->pm_owner;
+
+ PMCDBG(LOG,ALL,1, "pm=%p", pm);
+
+ PMCLOG_RESERVE(po, PMCALLOCATE, sizeof(struct pmclog_pmcallocate));
+ PMCLOG_EMIT32(pm->pm_id);
+ PMCLOG_EMIT32(pm->pm_event);
+ PMCLOG_EMIT32(pm->pm_flags);
+ PMCLOG_DESPATCH(po);
+}
+
+void
+pmclog_process_pmcattach(struct pmc *pm, pid_t pid, char *path)
+{
+ int pathlen, recordlen;
+ struct pmc_owner *po;
+
+ PMCDBG(LOG,ATT,1,"pm=%p pid=%d", pm, pid);
+
+ po = pm->pm_owner;
+
+ pathlen = strlen(path) + 1; /* #bytes for the string */
+ recordlen = offsetof(struct pmclog_pmcattach, pl_pathname) + pathlen;
+
+ PMCLOG_RESERVE(po, PMCATTACH, recordlen);
+ PMCLOG_EMIT32(pm->pm_id);
+ PMCLOG_EMIT32(pid);
+ PMCLOG_EMITSTRING(path, pathlen);
+ PMCLOG_DESPATCH(po);
+}
+
+void
+pmclog_process_pmcdetach(struct pmc *pm, pid_t pid)
+{
+ struct pmc_owner *po;
+
+ PMCDBG(LOG,ATT,1,"!pm=%p pid=%d", pm, pid);
+
+ po = pm->pm_owner;
+
+ PMCLOG_RESERVE(po, PMCDETACH, sizeof(struct pmclog_pmcdetach));
+ PMCLOG_EMIT32(pm->pm_id);
+ PMCLOG_EMIT32(pid);
+ PMCLOG_DESPATCH(po);
+}
+
+/*
+ * Log a context switch event to the log file.
+ */
+
+void
+pmclog_process_proccsw(struct pmc *pm, struct pmc_process *pp, pmc_value_t v)
+{
+ struct pmc_owner *po;
+
+ KASSERT(pm->pm_flags & PMC_F_LOG_PROCCSW,
+ ("[pmclog,%d] log-process-csw called gratuitously", __LINE__));
+
+ PMCDBG(LOG,SWO,1,"pm=%p pid=%d v=%jx", pm, pp->pp_proc->p_pid,
+ v);
+
+ po = pm->pm_owner;
+
+ PMCLOG_RESERVE(po, PROCCSW, sizeof(struct pmclog_proccsw));
+ PMCLOG_EMIT32(pm->pm_id);
+ PMCLOG_EMIT64(v);
+ PMCLOG_EMIT32(pp->pp_proc->p_pid);
+ PMCLOG_DESPATCH(po);
+}
+
+void
+pmclog_process_procexec(struct pmc_owner *po, pid_t pid, char *path)
+{
+ int pathlen, recordlen;
+
+ PMCDBG(LOG,EXC,1,"po=%p pid=%d path=\"%s\"", po, pid, path);
+
+ pathlen = strlen(path) + 1; /* #bytes for the path */
+ recordlen = offsetof(struct pmclog_procexec, pl_pathname) + pathlen;
+
+ PMCLOG_RESERVE(po, PROCEXEC, recordlen);
+ PMCLOG_EMIT32(pid);
+ PMCLOG_EMITSTRING(path,pathlen);
+ PMCLOG_DESPATCH(po);
+}
+
+/*
+ * Log a process exit event (and accumulated pmc value) to the log file.
+ */
+
+void
+pmclog_process_procexit(struct pmc *pm, struct pmc_process *pp)
+{
+ int ri;
+ struct pmc_owner *po;
+
+ KASSERT(pm->pm_flags & PMC_F_LOG_PROCEXIT,
+ ("[pmc,%d] log-process-exit called gratuitously", __LINE__));
+
+ ri = PMC_TO_ROWINDEX(pm);
+ PMCDBG(LOG,EXT,1,"pm=%p pid=%d v=%jx", pm, pp->pp_proc->p_pid,
+ pp->pp_pmcs[ri].pp_pmcval);
+
+ po = pm->pm_owner;
+
+ PMCLOG_RESERVE(po, PROCEXIT, sizeof(struct pmclog_procexit));
+ PMCLOG_EMIT32(pm->pm_id);
+ PMCLOG_EMIT64(pp->pp_pmcs[ri].pp_pmcval);
+ PMCLOG_EMIT32(pp->pp_proc->p_pid);
+ PMCLOG_DESPATCH(po);
+}
+
+/*
+ * Log a fork event.
+ */
+
+void
+pmclog_process_procfork(struct pmc_owner *po, pid_t oldpid, pid_t newpid)
+{
+ PMCLOG_RESERVE(po, PROCFORK, sizeof(struct pmclog_procfork));
+ PMCLOG_EMIT32(oldpid);
+ PMCLOG_EMIT32(newpid);
+ PMCLOG_DESPATCH(po);
+}
+
+/*
+ * Log a process exit event of the form suitable for system-wide PMCs.
+ */
+
+void
+pmclog_process_sysexit(struct pmc_owner *po, pid_t pid)
+{
+ PMCLOG_RESERVE(po, SYSEXIT, sizeof(struct pmclog_sysexit));
+ PMCLOG_EMIT32(pid);
+ PMCLOG_DESPATCH(po);
+}
+
+/*
+ * Write a user log entry.
+ */
+
+int
+pmclog_process_userlog(struct pmc_owner *po, struct pmc_op_writelog *wl)
+{
+ int error;
+
+ PMCDBG(LOG,WRI,1, "writelog po=%p ud=0x%x", po, wl->pm_userdata);
+
+ error = 0;
+
+ PMCLOG_RESERVE_WITH_ERROR(po, USERDATA,
+ sizeof(struct pmclog_userdata));
+ PMCLOG_EMIT32(wl->pm_userdata);
+ PMCLOG_DESPATCH(po);
+
+ error:
+ return error;
+}
+
+/*
+ * Initialization.
+ *
+ * Create a pool of log buffers and initialize mutexes.
+ */
+
+void
+pmclog_initialize()
+{
+ int n;
+ struct pmclog_buffer *plb;
+
+ if (pmclog_buffer_size <= 0) {
+ (void) printf("hwpmc: tunable logbuffersize=%d must be greater "
+ "than zero.\n", pmclog_buffer_size);
+ pmclog_buffer_size = PMC_LOG_BUFFER_SIZE;
+ }
+
+ if (pmc_nlogbuffers <= 0) {
+ (void) printf("hwpmc: tunable nlogbuffers=%d must be greater "
+ "than zero.\n", pmc_nlogbuffers);
+ pmc_nlogbuffers = PMC_NLOGBUFFERS;
+ }
+
+ /* create global pool of log buffers */
+ for (n = 0; n < pmc_nlogbuffers; n++) {
+ MALLOC(plb, struct pmclog_buffer *, 1024 * pmclog_buffer_size,
+ M_PMC, M_ZERO|M_WAITOK);
+ PMCLOG_INIT_BUFFER_DESCRIPTOR(plb);
+ TAILQ_INSERT_HEAD(&pmc_bufferlist, plb, plb_next);
+ }
+ mtx_init(&pmc_bufferlist_mtx, "pmc-buffer-list", "pmc", MTX_SPIN);
+ mtx_init(&pmc_kthread_mtx, "pmc-kthread", "pmc", MTX_DEF);
+}
+
+/*
+ * Shutdown logging.
+ *
+ * Destroy mutexes and release memory back the to free pool.
+ */
+
+void
+pmclog_shutdown()
+{
+ struct pmclog_buffer *plb;
+
+ mtx_destroy(&pmc_kthread_mtx);
+ mtx_destroy(&pmc_bufferlist_mtx);
+
+ while ((plb = TAILQ_FIRST(&pmc_bufferlist)) != NULL) {
+ TAILQ_REMOVE(&pmc_bufferlist, plb, plb_next);
+ FREE(plb, M_PMC);
+ }
+}
diff --git a/sys/dev/hwpmc/hwpmc_mod.c b/sys/dev/hwpmc/hwpmc_mod.c
index 36e4761..962c15b 100644
--- a/sys/dev/hwpmc/hwpmc_mod.c
+++ b/sys/dev/hwpmc/hwpmc_mod.c
@@ -32,6 +32,7 @@ __FBSDID("$FreeBSD$");
#include <sys/eventhandler.h>
#include <sys/jail.h>
#include <sys/kernel.h>
+#include <sys/kthread.h>
#include <sys/limits.h>
#include <sys/lock.h>
#include <sys/malloc.h>
@@ -39,8 +40,10 @@ __FBSDID("$FreeBSD$");
#include <sys/mutex.h>
#include <sys/pmc.h>
#include <sys/pmckern.h>
+#include <sys/pmclog.h>
#include <sys/proc.h>
#include <sys/queue.h>
+#include <sys/resourcevar.h>
#include <sys/sched.h>
#include <sys/signalvar.h>
#include <sys/smp.h>
@@ -48,7 +51,9 @@ __FBSDID("$FreeBSD$");
#include <sys/sysctl.h>
#include <sys/sysent.h>
#include <sys/systm.h>
+#include <sys/vnode.h>
+#include <machine/atomic.h>
#include <machine/md_var.h>
/*
@@ -135,6 +140,13 @@ static u_long pmc_ownerhashmask;
static LIST_HEAD(pmc_ownerhash, pmc_owner) *pmc_ownerhash;
/*
+ * List of PMC owners with system-wide sampling PMCs.
+ */
+
+static LIST_HEAD(, pmc_owner) pmc_ss_owners;
+
+
+/*
* Prototypes
*/
@@ -144,54 +156,54 @@ static int pmc_debugflags_parse(char *newstr, char *fence);
#endif
static int load(struct module *module, int cmd, void *arg);
-static int pmc_syscall_handler(struct thread *td, void *syscall_args);
-static int pmc_configure_log(struct pmc_owner *po, int logfd);
-static void pmc_log_process_exit(struct pmc *pm, struct pmc_process *pp);
+static int pmc_attach_process(struct proc *p, struct pmc *pm);
static struct pmc *pmc_allocate_pmc_descriptor(void);
-static struct pmc *pmc_find_pmc_descriptor_in_process(struct pmc_owner *po,
- pmc_id_t pmc);
-static void pmc_release_pmc_descriptor(struct pmc *pmc);
+static struct pmc_owner *pmc_allocate_owner_descriptor(struct proc *p);
+static int pmc_attach_one_process(struct proc *p, struct pmc *pm);
static int pmc_can_allocate_rowindex(struct proc *p, unsigned int ri,
int cpu);
-static struct pmc_process *pmc_find_process_descriptor(struct proc *p,
- uint32_t mode);
-static void pmc_remove_process_descriptor(struct pmc_process *pp);
+static int pmc_can_attach(struct pmc *pm, struct proc *p);
+static void pmc_cleanup(void);
+static int pmc_detach_process(struct proc *p, struct pmc *pm);
+static int pmc_detach_one_process(struct proc *p, struct pmc *pm,
+ int flags);
+static void pmc_destroy_owner_descriptor(struct pmc_owner *po);
static struct pmc_owner *pmc_find_owner_descriptor(struct proc *p);
static int pmc_find_pmc(pmc_id_t pmcid, struct pmc **pm);
+static struct pmc *pmc_find_pmc_descriptor_in_process(struct pmc_owner *po,
+ pmc_id_t pmc);
+static struct pmc_process *pmc_find_process_descriptor(struct proc *p,
+ uint32_t mode);
static void pmc_force_context_switch(void);
-static void pmc_remove_owner(struct pmc_owner *po);
-static void pmc_maybe_remove_owner(struct pmc_owner *po);
-static void pmc_unlink_target_process(struct pmc *pmc,
- struct pmc_process *pp);
static void pmc_link_target_process(struct pmc *pm,
struct pmc_process *pp);
-static void pmc_unlink_owner(struct pmc *pmc);
-static void pmc_cleanup(void);
-static void pmc_save_cpu_binding(struct pmc_binding *pb);
-static void pmc_restore_cpu_binding(struct pmc_binding *pb);
-static void pmc_select_cpu(int cpu);
+static void pmc_maybe_remove_owner(struct pmc_owner *po);
+static void pmc_process_csw_in(struct thread *td);
+static void pmc_process_csw_out(struct thread *td);
static void pmc_process_exit(void *arg, struct proc *p);
static void pmc_process_fork(void *arg, struct proc *p1,
struct proc *p2, int n);
-static int pmc_attach_one_process(struct proc *p, struct pmc *pm);
-static int pmc_attach_process(struct proc *p, struct pmc *pm);
-static int pmc_detach_one_process(struct proc *p, struct pmc *pm,
- int flags);
-static int pmc_detach_process(struct proc *p, struct pmc *pm);
+static void pmc_process_samples(int cpu);
+static void pmc_release_pmc_descriptor(struct pmc *pmc);
+static void pmc_remove_owner(struct pmc_owner *po);
+static void pmc_remove_process_descriptor(struct pmc_process *pp);
+static void pmc_restore_cpu_binding(struct pmc_binding *pb);
+static void pmc_save_cpu_binding(struct pmc_binding *pb);
+static void pmc_select_cpu(int cpu);
static int pmc_start(struct pmc *pm);
static int pmc_stop(struct pmc *pm);
-static int pmc_can_attach(struct pmc *pm, struct proc *p);
+static int pmc_syscall_handler(struct thread *td, void *syscall_args);
+static void pmc_unlink_target_process(struct pmc *pmc,
+ struct pmc_process *pp);
/*
* Kernel tunables and sysctl(8) interface.
*/
-#define PMC_SYSCTL_NAME_PREFIX "kern." PMC_MODULE_NAME "."
-
SYSCTL_NODE(_kern, OID_AUTO, hwpmc, CTLFLAG_RW, 0, "HWPMC parameters");
#if DEBUG
-unsigned int pmc_debugflags = PMC_DEBUG_DEFAULT_FLAGS;
+struct pmc_debugflags pmc_debugflags = PMC_DEBUG_DEFAULT_FLAGS;
char pmc_debugstr[PMC_DEBUG_STRSIZE];
TUNABLE_STR(PMC_SYSCTL_NAME_PREFIX "debugflags", pmc_debugstr,
sizeof(pmc_debugstr));
@@ -201,7 +213,7 @@ SYSCTL_PROC(_kern_hwpmc, OID_AUTO, debugflags,
#endif
/*
- * kern.pmc.hashrows -- determines the number of rows in the
+ * kern.hwpmc.hashrows -- determines the number of rows in the
* of the hash table used to look up threads
*/
@@ -211,17 +223,16 @@ SYSCTL_INT(_kern_hwpmc, OID_AUTO, hashsize, CTLFLAG_TUN|CTLFLAG_RD,
&pmc_hashsize, 0, "rows in hash tables");
/*
- * kern.pmc.pcpusize -- the size of each per-cpu
- * area for collection PC samples.
+ * kern.hwpmc.nsamples --- number of PC samples per CPU
*/
-static int pmc_pcpu_buffer_size = PMC_PCPU_BUFFER_SIZE;
-TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "pcpubuffersize", &pmc_pcpu_buffer_size);
-SYSCTL_INT(_kern_hwpmc, OID_AUTO, pcpubuffersize, CTLFLAG_TUN|CTLFLAG_RD,
- &pmc_pcpu_buffer_size, 0, "size of per-cpu buffer in 4K pages");
+static int pmc_nsamples = PMC_NSAMPLES;
+TUNABLE_INT(PMC_SYSCTL_NAME_PREFIX "nsamples", &pmc_nsamples);
+SYSCTL_INT(_kern_hwpmc, OID_AUTO, nsamples, CTLFLAG_TUN|CTLFLAG_RD,
+ &pmc_nsamples, 0, "number of PC samples per CPU");
/*
- * kern.pmc.mtxpoolsize -- number of mutexes in the mutex pool.
+ * kern.hwpmc.mtxpoolsize -- number of mutexes in the mutex pool.
*/
static int pmc_mtxpool_size = PMC_MTXPOOL_SIZE;
@@ -230,7 +241,6 @@ SYSCTL_INT(_kern_hwpmc, OID_AUTO, mtxpoolsize, CTLFLAG_TUN|CTLFLAG_RD,
&pmc_mtxpool_size, 0, "size of spin mutex pool");
-
/*
* security.bsd.unprivileged_syspmcs -- allow non-root processes to
* allocate system-wide PMCs.
@@ -248,11 +258,11 @@ SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_syspmcs, CTLFLAG_RW,
&pmc_unprivileged_syspmcs, 0,
"allow unprivileged process to allocate system PMCs");
-#if PMC_HASH_USE_CRC32
-
-#define PMC_HASH_PTR(P,M) (crc32(&(P), sizeof((P))) & (M))
-
-#else /* integer multiplication */
+/*
+ * Hash function. Discard the lower 2 bits of the pointer since
+ * these are always zero for our uses. The hash multiplier is
+ * round((2^LONG_BIT) * ((sqrt(5)-1)/2)).
+ */
#if LONG_BIT == 64
#define _PMC_HM 11400714819323198486u
@@ -262,16 +272,8 @@ SYSCTL_INT(_security_bsd, OID_AUTO, unprivileged_syspmcs, CTLFLAG_RW,
#error Must know the size of 'long' to compile
#endif
-/*
- * Hash function. Discard the lower 2 bits of the pointer since
- * these are always zero for our uses. The hash multiplier is
- * round((2^LONG_BIT) * ((sqrt(5)-1)/2)).
- */
-
#define PMC_HASH_PTR(P,M) ((((unsigned long) (P) >> 2) * _PMC_HM) & (M))
-#endif
-
/*
* Syscall structures
*/
@@ -300,84 +302,141 @@ DECLARE_MODULE(pmc, pmc_mod, SI_SUB_SMP, SI_ORDER_ANY);
MODULE_VERSION(pmc, PMC_VERSION);
#if DEBUG
+enum pmc_dbgparse_state {
+ PMCDS_WS, /* in whitespace */
+ PMCDS_MAJOR, /* seen a major keyword */
+ PMCDS_MINOR
+};
+
static int
pmc_debugflags_parse(char *newstr, char *fence)
{
char c, *p, *q;
- unsigned int tmpflags;
- int level;
- char tmpbuf[4]; /* 3 character keyword + '\0' */
+ struct pmc_debugflags *tmpflags;
+ int error, found, *newbits, tmp;
+ size_t kwlen;
- tmpflags = 0;
- level = 0xF; /* max verbosity */
+ MALLOC(tmpflags, struct pmc_debugflags *, sizeof(*tmpflags),
+ M_PMC, M_WAITOK|M_ZERO);
p = newstr;
+ error = 0;
- for (; p < fence && (c = *p);) {
+ for (; p < fence && (c = *p); p++) {
- /* skip separators */
- if (c == ' ' || c == '\t' || c == ',') {
- p++; continue;
+ /* skip white space */
+ if (c == ' ' || c == '\t')
+ continue;
+
+ /* look for a keyword followed by "=" */
+ for (q = p; p < fence && (c = *p) && c != '='; p++)
+ ;
+ if (c != '=') {
+ error = EINVAL;
+ goto done;
}
- (void) strlcpy(tmpbuf, p, sizeof(tmpbuf));
+ kwlen = p - q;
+ newbits = NULL;
+
+ /* lookup flag group name */
+#define DBG_SET_FLAG_MAJ(S,F) \
+ if (kwlen == sizeof(S)-1 && strncmp(q, S, kwlen) == 0) \
+ newbits = &tmpflags->pdb_ ## F;
+
+ DBG_SET_FLAG_MAJ("cpu", CPU);
+ DBG_SET_FLAG_MAJ("csw", CSW);
+ DBG_SET_FLAG_MAJ("logging", LOG);
+ DBG_SET_FLAG_MAJ("module", MOD);
+ DBG_SET_FLAG_MAJ("md", MDP);
+ DBG_SET_FLAG_MAJ("owner", OWN);
+ DBG_SET_FLAG_MAJ("pmc", PMC);
+ DBG_SET_FLAG_MAJ("process", PRC);
+ DBG_SET_FLAG_MAJ("sampling", SAM);
+
+ if (newbits == NULL) {
+ error = EINVAL;
+ goto done;
+ }
+
+ p++; /* skip the '=' */
+
+ /* Now parse the individual flags */
+ tmp = 0;
+ newflag:
+ for (q = p; p < fence && (c = *p); p++)
+ if (c == ' ' || c == '\t' || c == ',')
+ break;
+
+ /* p == fence or c == ws or c == "," or c == 0 */
-#define CMP_SET_FLAG_MAJ(S,F) \
- else if (strncmp(tmpbuf, S, 3) == 0) \
- tmpflags |= __PMCDFMAJ(F)
+ if ((kwlen = p - q) == 0) {
+ *newbits = tmp;
+ continue;
+ }
-#define CMP_SET_FLAG_MIN(S,F) \
- else if (strncmp(tmpbuf, S, 3) == 0) \
- tmpflags |= __PMCDFMIN(F)
+ found = 0;
+#define DBG_SET_FLAG_MIN(S,F) \
+ if (kwlen == sizeof(S)-1 && strncmp(q, S, kwlen) == 0) \
+ tmp |= found = (1 << PMC_DEBUG_MIN_ ## F)
+
+ /* a '*' denotes all possible flags in the group */
+ if (kwlen == 1 && *q == '*')
+ tmp = found = ~0;
+ /* look for individual flag names */
+ DBG_SET_FLAG_MIN("allocaterow", ALR);
+ DBG_SET_FLAG_MIN("allocate", ALL);
+ DBG_SET_FLAG_MIN("attach", ATT);
+ DBG_SET_FLAG_MIN("bind", BND);
+ DBG_SET_FLAG_MIN("config", CFG);
+ DBG_SET_FLAG_MIN("exec", EXC);
+ DBG_SET_FLAG_MIN("exit", EXT);
+ DBG_SET_FLAG_MIN("find", FND);
+ DBG_SET_FLAG_MIN("flush", FLS);
+ DBG_SET_FLAG_MIN("fork", FRK);
+ DBG_SET_FLAG_MIN("getbuf", GTB);
+ DBG_SET_FLAG_MIN("hook", PMH);
+ DBG_SET_FLAG_MIN("init", INI);
+ DBG_SET_FLAG_MIN("intr", INT);
+ DBG_SET_FLAG_MIN("linktarget", TLK);
+ DBG_SET_FLAG_MIN("mayberemove", OMR);
+ DBG_SET_FLAG_MIN("ops", OPS);
+ DBG_SET_FLAG_MIN("read", REA);
+ DBG_SET_FLAG_MIN("register", REG);
+ DBG_SET_FLAG_MIN("release", REL);
+ DBG_SET_FLAG_MIN("remove", ORM);
+ DBG_SET_FLAG_MIN("sample", SAM);
+ DBG_SET_FLAG_MIN("scheduleio", SIO);
+ DBG_SET_FLAG_MIN("select", SEL);
+ DBG_SET_FLAG_MIN("signal", SIG);
+ DBG_SET_FLAG_MIN("swi", SWI);
+ DBG_SET_FLAG_MIN("swo", SWO);
+ DBG_SET_FLAG_MIN("start", STA);
+ DBG_SET_FLAG_MIN("stop", STO);
+ DBG_SET_FLAG_MIN("syscall", PMS);
+ DBG_SET_FLAG_MIN("unlinktarget", TUL);
+ DBG_SET_FLAG_MIN("write", WRI);
+ if (found == 0) {
+ /* unrecognized flag name */
+ error = EINVAL;
+ goto done;
+ }
- if (fence - p > 6 && strncmp(p, "level=", 6) == 0) {
- p += 6; /* skip over keyword */
- level = strtoul(p, &q, 16);
+ if (c == 0 || c == ' ' || c == '\t') { /* end of flag group */
+ *newbits = tmp;
+ continue;
}
- CMP_SET_FLAG_MAJ("mod", MOD);
- CMP_SET_FLAG_MAJ("pmc", PMC);
- CMP_SET_FLAG_MAJ("ctx", CTX);
- CMP_SET_FLAG_MAJ("own", OWN);
- CMP_SET_FLAG_MAJ("prc", PRC);
- CMP_SET_FLAG_MAJ("mdp", MDP);
- CMP_SET_FLAG_MAJ("cpu", CPU);
-
- CMP_SET_FLAG_MIN("all", ALL);
- CMP_SET_FLAG_MIN("rel", REL);
- CMP_SET_FLAG_MIN("ops", OPS);
- CMP_SET_FLAG_MIN("ini", INI);
- CMP_SET_FLAG_MIN("fnd", FND);
- CMP_SET_FLAG_MIN("pmh", PMH);
- CMP_SET_FLAG_MIN("pms", PMS);
- CMP_SET_FLAG_MIN("orm", ORM);
- CMP_SET_FLAG_MIN("omr", OMR);
- CMP_SET_FLAG_MIN("tlk", TLK);
- CMP_SET_FLAG_MIN("tul", TUL);
- CMP_SET_FLAG_MIN("ext", EXT);
- CMP_SET_FLAG_MIN("exc", EXC);
- CMP_SET_FLAG_MIN("frk", FRK);
- CMP_SET_FLAG_MIN("att", ATT);
- CMP_SET_FLAG_MIN("swi", SWI);
- CMP_SET_FLAG_MIN("swo", SWO);
- CMP_SET_FLAG_MIN("reg", REG);
- CMP_SET_FLAG_MIN("alr", ALR);
- CMP_SET_FLAG_MIN("rea", REA);
- CMP_SET_FLAG_MIN("wri", WRI);
- CMP_SET_FLAG_MIN("cfg", CFG);
- CMP_SET_FLAG_MIN("sta", STA);
- CMP_SET_FLAG_MIN("sto", STO);
- CMP_SET_FLAG_MIN("int", INT);
- CMP_SET_FLAG_MIN("bnd", BND);
- CMP_SET_FLAG_MIN("sel", SEL);
- else /* unrecognized keyword */
- return EINVAL;
-
- p += 4; /* skip keyword and separator */
+
+ p++;
+ goto newflag;
}
- pmc_debugflags = (tmpflags|level);
+ /* save the new flag set */
+ bcopy(tmpflags, &pmc_debugflags, sizeof(pmc_debugflags));
- return 0;
+ done:
+ FREE(tmpflags, M_PMC);
+ return error;
}
static int
@@ -391,13 +450,13 @@ pmc_debugflags_sysctl_handler(SYSCTL_HANDLER_ARGS)
n = sizeof(pmc_debugstr);
MALLOC(newstr, char *, n, M_PMC, M_ZERO|M_WAITOK);
- (void) strlcpy(newstr, pmc_debugstr, sizeof(pmc_debugstr));
+ (void) strlcpy(newstr, pmc_debugstr, n);
error = sysctl_handle_string(oidp, newstr, n, req);
/* if there is a new string, parse and copy it */
if (error == 0 && req->newptr != NULL) {
- fence = newstr + (n < req->newlen ? n : req->newlen);
+ fence = newstr + (n < req->newlen ? n : req->newlen + 1);
if ((error = pmc_debugflags_parse(newstr, fence)) == 0)
(void) strlcpy(pmc_debugstr, newstr,
sizeof(pmc_debugstr));
@@ -597,53 +656,21 @@ pmc_force_context_switch(void)
}
/*
- * Update the per-pmc histogram
+ * Get the file name for an executable. This is a simple wrapper
+ * around vn_fullpath(9).
*/
-void
-pmc_update_histogram(struct pmc_hw *phw, uintptr_t pc)
-{
- (void) phw;
- (void) pc;
-}
-
-/*
- * Send a signal to a process. This is meant to be invoked from an
- * interrupt handler.
- */
-
-void
-pmc_send_signal(struct pmc *pmc)
+static void
+pmc_getprocname(struct proc *p, char **fullpath, char **freepath)
{
- (void) pmc; /* shutup gcc */
-
-#if 0
- struct proc *proc;
struct thread *td;
- KASSERT(pmc->pm_owner != NULL,
- ("[pmc,%d] No owner for PMC", __LINE__));
-
- KASSERT((pmc->pm_owner->po_flags & PMC_FLAG_IS_OWNER) &&
- (pmc->pm_owner->po_flags & PMC_FLAG_HAS_TS_PMC),
- ("[pmc,%d] interrupting PMC owner has wrong flags 0x%x",
- __LINE__, pmc->pm_owner->po_flags));
-
- proc = pmc->pm_owner->po_owner;
-
- KASSERT(curthread->td_proc == proc,
- ("[pmc,%d] interruping the wrong thread (owner %p, "
- "cur %p)", __LINE__, (void *) proc, curthread->td_proc));
-
- mtx_lock_spin(&sched_lock);
- td = TAILQ_FIRST(&proc->p_threads);
- mtx_unlock_spin(&sched_lock);
- /* XXX RACE HERE: can 'td' disappear now? */
- trapsignal(td, SIGPROF, 0);
- /* XXX rework this to use the regular 'psignal' interface from a
- helper thread */
-#endif
-
+ td = curthread;
+ *fullpath = "unknown";
+ *freepath = NULL;
+ vn_lock(p->p_textvp, LK_EXCLUSIVE | LK_RETRY, td);
+ vn_fullpath(td, p->p_textvp, fullpath, freepath);
+ VOP_UNLOCK(p->p_textvp, 0, td);
}
/*
@@ -653,7 +680,7 @@ pmc_send_signal(struct pmc *pmc)
void
pmc_remove_owner(struct pmc_owner *po)
{
- struct pmc_list *pl, *tmp;
+ struct pmc *pm, *tmp;
sx_assert(&pmc_sx, SX_XLOCKED);
@@ -662,42 +689,23 @@ pmc_remove_owner(struct pmc_owner *po)
/* Remove descriptor from the owner hash table */
LIST_REMOVE(po, po_next);
- /* pass 1: release all owned PMC descriptors */
- LIST_FOREACH_SAFE(pl, &po->po_pmcs, pl_next, tmp) {
-
- PMCDBG(OWN,ORM,2, "pl=%p pmc=%p", pl, pl->pl_pmc);
+ /* release all owned PMC descriptors */
+ LIST_FOREACH_SAFE(pm, &po->po_pmcs, pm_next, tmp) {
+ PMCDBG(OWN,ORM,2, "pmc=%p", pm);
+ KASSERT(pm->pm_owner == po,
+ ("[pmc,%d] owner %p != po %p", __LINE__, pm->pm_owner, po));
- /* remove the associated PMC descriptor, if present */
- if (pl->pl_pmc)
- pmc_release_pmc_descriptor(pl->pl_pmc);
-
- /* remove the linked list entry */
- LIST_REMOVE(pl, pl_next);
- FREE(pl, M_PMC);
- }
-
- /* pass 2: delete the pmc_list chain */
- LIST_FOREACH_SAFE(pl, &po->po_pmcs, pl_next, tmp) {
- KASSERT(pl->pl_pmc == NULL,
- ("[pmc,%d] non-null pmc pointer", __LINE__));
- LIST_REMOVE(pl, pl_next);
- FREE(pl, M_PMC);
+ pmc_release_pmc_descriptor(pm); /* will unlink from the list */
}
+ KASSERT(po->po_sscount == 0,
+ ("[pmc,%d] SS count not zero", __LINE__));
KASSERT(LIST_EMPTY(&po->po_pmcs),
- ("[pmc,%d] PMC list not empty", __LINE__));
-
-
- /*
- * If this process owns a log file used for system wide logging,
- * remove the log file.
- *
- * XXX rework needed.
- */
+ ("[pmc,%d] PMC list not empty", __LINE__));
+ /* de-configure the log file if present */
if (po->po_flags & PMC_PO_OWNS_LOGFILE)
- pmc_configure_log(po, -1);
-
+ pmclog_deconfigure_log(po);
}
/*
@@ -719,7 +727,7 @@ pmc_maybe_remove_owner(struct pmc_owner *po)
if (LIST_EMPTY(&po->po_pmcs) &&
((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0)) {
pmc_remove_owner(po);
- FREE(po, M_PMC);
+ pmc_destroy_owner_descriptor(po);
}
}
@@ -737,7 +745,9 @@ pmc_link_target_process(struct pmc *pm, struct pmc_process *pp)
KASSERT(pm != NULL && pp != NULL,
("[pmc,%d] Null pm %p or pp %p", __LINE__, pm, pp));
-
+ KASSERT(PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)),
+ ("[pmc,%d] Attaching a non-process-virtual pmc=%p to pid=%d",
+ __LINE__, pm, pp->pp_proc->p_pid));
KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt < ((int) md->pmd_npmc - 1),
("[pmc,%d] Illegal reference count %d for process record %p",
__LINE__, pp->pp_refcnt, (void *) pp));
@@ -766,6 +776,12 @@ pmc_link_target_process(struct pmc *pm, struct pmc_process *pp)
if (pm->pm_owner->po_owner == pp->pp_proc)
pm->pm_flags |= PMC_F_ATTACHED_TO_OWNER;
+ /*
+ * Initialize the per-process values at this row index.
+ */
+ pp->pp_pmcs[ri].pp_pmcval = PMC_TO_MODE(pm) == PMC_MODE_TS ?
+ pm->pm_sc.pm_reloadcount : 0;
+
pp->pp_refcnt++;
}
@@ -778,6 +794,7 @@ static void
pmc_unlink_target_process(struct pmc *pm, struct pmc_process *pp)
{
int ri;
+ struct proc *p;
struct pmc_target *ptgt;
sx_assert(&pmc_sx, SX_XLOCKED);
@@ -819,36 +836,17 @@ pmc_unlink_target_process(struct pmc *pm, struct pmc_process *pp)
LIST_REMOVE(ptgt, pt_next);
FREE(ptgt, M_PMC);
-}
-
-/*
- * Remove PMC descriptor 'pmc' from the owner descriptor.
- */
-
-void
-pmc_unlink_owner(struct pmc *pm)
-{
- struct pmc_list *pl, *tmp;
- struct pmc_owner *po;
-#if DEBUG
- KASSERT(LIST_EMPTY(&pm->pm_targets),
- ("[pmc,%d] unlinking PMC with targets", __LINE__));
-#endif
-
- po = pm->pm_owner;
-
- KASSERT(po != NULL, ("[pmc,%d] No owner for PMC", __LINE__));
+ /* if the PMC now lacks targets, send the owner a SIGIO */
+ if (LIST_EMPTY(&pm->pm_targets)) {
+ p = pm->pm_owner->po_owner;
+ PROC_LOCK(p);
+ psignal(p, SIGIO);
+ PROC_UNLOCK(p);
- LIST_FOREACH_SAFE(pl, &po->po_pmcs, pl_next, tmp) {
- if (pl->pl_pmc == pm) {
- pl->pl_pmc = NULL;
- pm->pm_owner = NULL;
- return;
- }
+ PMCDBG(PRC,SIG,2, "signalling proc=%p signal=%d", p,
+ SIGIO);
}
-
- KASSERT(0, ("[pmc,%d] couldn't find pmc in owner list", __LINE__));
}
/*
@@ -914,6 +912,7 @@ static int
pmc_attach_one_process(struct proc *p, struct pmc *pm)
{
int ri;
+ char *fullpath, *freepath;
struct pmc_process *pp;
sx_assert(&pmc_sx, SX_XLOCKED);
@@ -931,7 +930,6 @@ pmc_attach_one_process(struct proc *p, struct pmc *pm)
* If not, allocate space for a descriptor and link the
* process descriptor and PMC.
*/
-
ri = PMC_TO_ROWINDEX(pm);
if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_ALLOCATE)) == NULL)
@@ -945,6 +943,19 @@ pmc_attach_one_process(struct proc *p, struct pmc *pm)
pmc_link_target_process(pm, pp);
+ if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) &&
+ (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) == 0)
+ pm->pm_flags |= PMC_F_NEEDS_LOGFILE;
+
+ pm->pm_flags |= PMC_F_ATTACH_DONE; /* mark as attached */
+
+ /* issue an attach event to a configured log file */
+ if (pm->pm_owner->po_flags & PMC_PO_OWNS_LOGFILE) {
+ pmc_getprocname(p, &fullpath, &freepath);
+ pmclog_process_pmcattach(pm, p->p_pid, fullpath);
+ if (freepath)
+ FREE(freepath, M_TEMP);
+ }
/* mark process as using HWPMCs */
PROC_LOCK(p);
p->p_flag |= P_HWPMC;
@@ -1043,12 +1054,15 @@ pmc_detach_one_process(struct proc *p, struct pmc *pm, int flags)
pmc_unlink_target_process(pm, pp);
+ /* Issue a detach entry if a log file is configured */
+ if (pm->pm_owner->po_flags & PMC_PO_OWNS_LOGFILE)
+ pmclog_process_pmcdetach(pm, p->p_pid);
+
/*
* If there are no PMCs targetting this process, we remove its
* descriptor from the target hash table and unset the P_HWPMC
* flag in the struct proc.
*/
-
KASSERT(pp->pp_refcnt >= 0 && pp->pp_refcnt < (int) md->pmd_npmc,
("[pmc,%d] Illegal refcnt %d for process struct %p",
__LINE__, pp->pp_refcnt, pp));
@@ -1113,194 +1127,314 @@ pmc_detach_process(struct proc *p, struct pmc *pm)
done:
sx_sunlock(&proctree_lock);
+
+ if (LIST_EMPTY(&pm->pm_targets))
+ pm->pm_flags &= ~PMC_F_ATTACH_DONE;
+
return 0;
}
+
/*
- * The 'hook' invoked from the kernel proper
+ * Thread context switch IN
*/
+static void
+pmc_process_csw_in(struct thread *td)
+{
+ int cpu;
+ unsigned int ri;
+ struct pmc *pm;
+ struct proc *p;
+ struct pmc_cpu *pc;
+ struct pmc_hw *phw;
+ struct pmc_process *pp;
+ pmc_value_t newvalue;
-#if DEBUG
-const char *pmc_hooknames[] = {
- "",
- "EXIT",
- "EXEC",
- "FORK",
- "CSW-IN",
- "CSW-OUT"
-};
-#endif
+ p = td->td_proc;
-static int
-pmc_hook_handler(struct thread *td, int function, void *arg)
-{
+ if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_NONE)) == NULL)
+ return;
- KASSERT(td->td_proc->p_flag & P_HWPMC,
- ("[pmc,%d] unregistered thread called pmc_hook()", __LINE__));
+ KASSERT(pp->pp_proc == td->td_proc,
+ ("[pmc,%d] not my thread state", __LINE__));
- PMCDBG(MOD,PMH,1, "hook td=%p func=%d \"%s\" arg=%p", td, function,
- pmc_hooknames[function], arg);
+ critical_enter(); /* no preemption from this point */
- switch (function)
- {
+ cpu = PCPU_GET(cpuid); /* td->td_oncpu is invalid */
- /*
- * Process exit.
- *
- * Remove this process from all hash tables. If this process
- * owned any PMCs, turn off those PMCs and deallocate them,
- * removing any associations with target processes.
- *
- * This function will be called by the last 'thread' of a
- * process.
- *
- */
+ PMCDBG(CSW,SWI,1, "cpu=%d proc=%p (%d, %s) pp=%p", cpu, p,
+ p->p_pid, p->p_comm, pp);
- case PMC_FN_PROCESS_EXIT: /* release PMCs */
- {
- int cpu;
- unsigned int ri;
- struct pmc *pm;
- struct pmc_process *pp;
- struct pmc_owner *po;
- struct proc *p;
- pmc_value_t newvalue, tmp;
+ KASSERT(cpu >= 0 && cpu < mp_ncpus,
+ ("[pmc,%d] wierd CPU id %d", __LINE__, cpu));
- sx_assert(&pmc_sx, SX_XLOCKED);
+ pc = pmc_pcpu[cpu];
- p = (struct proc *) arg;
+ for (ri = 0; ri < md->pmd_npmc; ri++) {
+
+ if ((pm = pp->pp_pmcs[ri].pp_pmc) == NULL)
+ continue;
+
+ KASSERT(PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)),
+ ("[pmc,%d] Target PMC in non-virtual mode (%d)",
+ __LINE__, PMC_TO_MODE(pm)));
+
+ KASSERT(PMC_TO_ROWINDEX(pm) == ri,
+ ("[pmc,%d] Row index mismatch pmc %d != ri %d",
+ __LINE__, PMC_TO_ROWINDEX(pm), ri));
/*
- * Since this code is invoked by the last thread in an
- * exiting process, we would have context switched IN
- * at some prior point. Kernel mode context switches
- * may happen any time, so we want to disable a context
- * switch OUT till we get any PMCs targetting this
- * process off the hardware.
- *
- * We also need to atomically remove this process'
- * entry from our target process hash table, using
- * PMC_FLAG_REMOVE.
+ * Only PMCs that are marked as 'RUNNING' need
+ * be placed on hardware.
*/
- PMCDBG(PRC,EXT,1, "process-exit proc=%p (%d, %s)", p, p->p_pid,
- p->p_comm);
+ if (pm->pm_state != PMC_STATE_RUNNING)
+ continue;
- critical_enter(); /* no preemption */
+ /* increment PMC runcount */
+ atomic_add_rel_32(&pm->pm_runcount, 1);
- cpu = curthread->td_oncpu;
+ /* configure the HWPMC we are going to use. */
+ md->pmd_config_pmc(cpu, ri, pm);
- if ((pp = pmc_find_process_descriptor(p,
- PMC_FLAG_REMOVE)) != NULL) {
+ phw = pc->pc_hwpmcs[ri];
- PMCDBG(PRC,EXT,2,
- "process-exit proc=%p pmc-process=%p", p, pp);
+ KASSERT(phw != NULL,
+ ("[pmc,%d] null hw pointer", __LINE__));
- /*
- * The exiting process could the target of
- * some PMCs which will be running on
- * currently executing CPU.
- *
- * We need to turn these PMCs off like we
- * would do at context switch OUT time.
- */
+ KASSERT(phw->phw_pmc == pm,
+ ("[pmc,%d] hw->pmc %p != pmc %p", __LINE__,
+ phw->phw_pmc, pm));
- for (ri = 0; ri < md->pmd_npmc; ri++) {
+ /*
+ * Write out saved value and start the PMC.
+ *
+ * Sampling PMCs use a per-process value, while
+ * counting mode PMCs use a per-pmc value that is
+ * inherited across descendants.
+ */
+ if (PMC_TO_MODE(pm) == PMC_MODE_TS) {
+ mtx_pool_lock_spin(pmc_mtxpool, pm);
+ newvalue = PMC_PCPU_SAVED(cpu,ri) =
+ pp->pp_pmcs[ri].pp_pmcval;
+ mtx_pool_unlock_spin(pmc_mtxpool, pm);
+ } else {
+ KASSERT(PMC_TO_MODE(pm) == PMC_MODE_TC,
+ ("[pmc,%d] illegal mode=%d", __LINE__,
+ PMC_TO_MODE(pm)));
+ mtx_pool_lock_spin(pmc_mtxpool, pm);
+ newvalue = PMC_PCPU_SAVED(cpu, ri) =
+ pm->pm_gv.pm_savedvalue;
+ mtx_pool_unlock_spin(pmc_mtxpool, pm);
+ }
- /*
- * Pick up the pmc pointer from hardware
- * state similar to the CSW_OUT code.
- */
+ PMCDBG(CSW,SWI,1,"cpu=%d ri=%d new=%jd", cpu, ri, newvalue);
- pm = NULL;
- (void) (*md->pmd_get_config)(cpu, ri, &pm);
+ md->pmd_write_pmc(cpu, ri, newvalue);
+ md->pmd_start_pmc(cpu, ri);
+ }
- PMCDBG(PRC,EXT,2, "ri=%d pm=%p", ri, pm);
+ /*
+ * perform any other architecture/cpu dependent thread
+ * switch-in actions.
+ */
- if (pm == NULL ||
- !PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)))
- continue;
+ (void) (*md->pmd_switch_in)(pc, pp);
- PMCDBG(PRC,EXT,2, "ppmcs[%d]=%p pm=%p "
- "state=%d", ri, pp->pp_pmcs[ri].pp_pmc,
- pm, pm->pm_state);
+ critical_exit();
- KASSERT(PMC_TO_ROWINDEX(pm) == ri,
- ("[pmc,%d] ri mismatch pmc(%d) ri(%d)",
- __LINE__, PMC_TO_ROWINDEX(pm), ri));
+}
- KASSERT(pm == pp->pp_pmcs[ri].pp_pmc,
- ("[pmc,%d] pm %p != pp_pmcs[%d] %p",
- __LINE__, pm, ri,
- pp->pp_pmcs[ri].pp_pmc));
+/*
+ * Thread context switch OUT.
+ */
- (void) md->pmd_stop_pmc(cpu, ri);
+static void
+pmc_process_csw_out(struct thread *td)
+{
+ int cpu;
+ enum pmc_mode mode;
+ unsigned int ri;
+ struct pmc *pm;
+ struct proc *p;
+ struct pmc_cpu *pc;
+ struct pmc_process *pp;
+ int64_t tmp;
+ pmc_value_t newvalue;
- KASSERT(pm->pm_runcount > 0,
- ("[pmc,%d] bad runcount ri %d rc %d",
- __LINE__, ri, pm->pm_runcount));
+ /*
+ * Locate our process descriptor; this may be NULL if
+ * this process is exiting and we have already removed
+ * the process from the target process table.
+ *
+ * Note that due to kernel preemption, multiple
+ * context switches may happen while the process is
+ * exiting.
+ *
+ * Note also that if the target process cannot be
+ * found we still need to deconfigure any PMCs that
+ * are currently running on hardware.
+ */
- if (pm->pm_state == PMC_STATE_RUNNING) {
- md->pmd_read_pmc(cpu, ri, &newvalue);
- tmp = newvalue -
- PMC_PCPU_SAVED(cpu,ri);
+ p = td->td_proc;
+ pp = pmc_find_process_descriptor(p, PMC_FLAG_NONE);
- mtx_pool_lock_spin(pmc_mtxpool, pm);
- pm->pm_gv.pm_savedvalue += tmp;
- pp->pp_pmcs[ri].pp_pmcval += tmp;
- mtx_pool_unlock_spin(pmc_mtxpool, pm);
- }
+ /*
+ * save PMCs
+ */
- atomic_subtract_rel_32(&pm->pm_runcount,1);
+ critical_enter();
- KASSERT((int) pm->pm_runcount >= 0,
- ("[pmc,%d] runcount is %d", __LINE__, ri));
+ cpu = PCPU_GET(cpuid); /* td->td_oncpu is invalid */
- (void) md->pmd_config_pmc(cpu, ri, NULL);
- }
+ PMCDBG(CSW,SWO,1, "cpu=%d proc=%p (%d, %s) pp=%p", cpu, p,
+ p->p_pid, p->p_comm, pp);
- /*
- * Inform the MD layer of this pseudo "context switch
- * out"
- */
+ KASSERT(cpu >= 0 && cpu < mp_ncpus,
+ ("[pmc,%d wierd CPU id %d", __LINE__, cpu));
- (void) md->pmd_switch_out(pmc_pcpu[cpu], pp);
+ pc = pmc_pcpu[cpu];
- critical_exit(); /* ok to be pre-empted now */
+ /*
+ * When a PMC gets unlinked from a target PMC, it will
+ * be removed from the target's pp_pmc[] array.
+ *
+ * However, on a MP system, the target could have been
+ * executing on another CPU at the time of the unlink.
+ * So, at context switch OUT time, we need to look at
+ * the hardware to determine if a PMC is scheduled on
+ * it.
+ */
- /*
- * Unlink this process from the PMCs that are
- * targetting it. Log value at exit() time if
- * requested.
- */
+ for (ri = 0; ri < md->pmd_npmc; ri++) {
- for (ri = 0; ri < md->pmd_npmc; ri++)
- if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL) {
- if (pm->pm_flags &
- PMC_F_LOG_TC_PROCEXIT)
- pmc_log_process_exit(pm, pp);
- pmc_unlink_target_process(pm, pp);
- }
+ pm = NULL;
+ (void) (*md->pmd_get_config)(cpu, ri, &pm);
- FREE(pp, M_PMC);
+ if (pm == NULL) /* nothing at this row index */
+ continue;
+ mode = PMC_TO_MODE(pm);
+ if (!PMC_IS_VIRTUAL_MODE(mode))
+ continue; /* not a process virtual PMC */
- } else
- critical_exit(); /* pp == NULL */
+ KASSERT(PMC_TO_ROWINDEX(pm) == ri,
+ ("[pmc,%d] ri mismatch pmc(%d) ri(%d)",
+ __LINE__, PMC_TO_ROWINDEX(pm), ri));
+
+ /* Stop hardware if not already stopped */
+ if ((pm->pm_flags & PMC_F_IS_STALLED) == 0)
+ md->pmd_stop_pmc(cpu, ri);
+
+ /* reduce this PMC's runcount */
+ atomic_subtract_rel_32(&pm->pm_runcount, 1);
/*
- * If the process owned PMCs, free them up and free up
- * memory.
+ * If this PMC is associated with this process,
+ * save the reading.
*/
- if ((po = pmc_find_owner_descriptor(p)) != NULL) {
- pmc_remove_owner(po);
- FREE(po, M_PMC);
+ if (pp != NULL && pp->pp_pmcs[ri].pp_pmc != NULL) {
+
+ KASSERT(pm == pp->pp_pmcs[ri].pp_pmc,
+ ("[pmc,%d] pm %p != pp_pmcs[%d] %p", __LINE__,
+ pm, ri, pp->pp_pmcs[ri].pp_pmc));
+
+ KASSERT(pp->pp_refcnt > 0,
+ ("[pmc,%d] pp refcnt = %d", __LINE__,
+ pp->pp_refcnt));
+
+ md->pmd_read_pmc(cpu, ri, &newvalue);
+
+ tmp = newvalue - PMC_PCPU_SAVED(cpu,ri);
+
+ PMCDBG(CSW,SWI,1,"cpu=%d ri=%d tmp=%jd", cpu, ri,
+ tmp);
+
+ if (mode == PMC_MODE_TS) {
+
+ /*
+ * For sampling process-virtual PMCs,
+ * we expect the count to be
+ * decreasing as the 'value'
+ * programmed into the PMC is the
+ * number of events to be seen till
+ * the next sampling interrupt.
+ */
+ if (tmp < 0)
+ tmp += pm->pm_sc.pm_reloadcount;
+ mtx_pool_lock_spin(pmc_mtxpool, pm);
+ pp->pp_pmcs[ri].pp_pmcval -= tmp;
+ if ((int64_t) pp->pp_pmcs[ri].pp_pmcval < 0)
+ pp->pp_pmcs[ri].pp_pmcval +=
+ pm->pm_sc.pm_reloadcount;
+ mtx_pool_unlock_spin(pmc_mtxpool, pm);
+
+ } else {
+
+ /*
+ * For counting process-virtual PMCs,
+ * we expect the count to be
+ * increasing monotonically, modulo a 64
+ * bit wraparound.
+ */
+ KASSERT((int64_t) tmp >= 0,
+ ("[pmc,%d] negative increment cpu=%d "
+ "ri=%d newvalue=%jx saved=%jx "
+ "incr=%jx", __LINE__, cpu, ri,
+ newvalue, PMC_PCPU_SAVED(cpu,ri), tmp));
+
+ mtx_pool_lock_spin(pmc_mtxpool, pm);
+ pm->pm_gv.pm_savedvalue += tmp;
+ pp->pp_pmcs[ri].pp_pmcval += tmp;
+ mtx_pool_unlock_spin(pmc_mtxpool, pm);
+
+ if (pm->pm_flags & PMC_F_LOG_PROCCSW)
+ pmclog_process_proccsw(pm, pp, tmp);
+ }
}
+ /* mark hardware as free */
+ md->pmd_config_pmc(cpu, ri, NULL);
}
- break;
+
+ /*
+ * perform any other architecture/cpu dependent thread
+ * switch out functions.
+ */
+
+ (void) (*md->pmd_switch_out)(pc, pp);
+
+ critical_exit();
+}
+
+/*
+ * The 'hook' invoked from the kernel proper
+ */
+
+
+#if DEBUG
+const char *pmc_hooknames[] = {
+ "",
+ "EXIT",
+ "EXEC",
+ "FORK",
+ "CSW-IN",
+ "CSW-OUT",
+ "SAMPLE"
+};
+#endif
+
+static int
+pmc_hook_handler(struct thread *td, int function, void *arg)
+{
+
+ PMCDBG(MOD,PMH,1, "hook td=%p func=%d \"%s\" arg=%p", td, function,
+ pmc_hooknames[function], arg);
+
+ switch (function)
+ {
/*
* Process exec()
@@ -1309,7 +1443,9 @@ pmc_hook_handler(struct thread *td, int function, void *arg)
case PMC_FN_PROCESS_EXEC:
{
int *credentials_changed;
+ char *fullpath, *freepath;
unsigned int ri;
+ int is_using_hwpmcs;
struct pmc *pm;
struct proc *p;
struct pmc_owner *po;
@@ -1317,16 +1453,32 @@ pmc_hook_handler(struct thread *td, int function, void *arg)
sx_assert(&pmc_sx, SX_XLOCKED);
+ p = td->td_proc;
+ pmc_getprocname(p, &fullpath, &freepath);
+
+ /* Inform owners of SS mode PMCs of the exec event. */
+ LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
+ if (po->po_flags & PMC_PO_OWNS_LOGFILE)
+ pmclog_process_procexec(po, p->p_pid, fullpath);
+
+ PROC_LOCK(p);
+ is_using_hwpmcs = p->p_flag & P_HWPMC;
+ PROC_UNLOCK(p);
+
+ if (!is_using_hwpmcs) {
+ if (freepath)
+ FREE(freepath, M_TEMP);
+ break;
+ }
+
/*
* PMCs are not inherited across an exec(): remove any
* PMCs that this process is the owner of.
*/
- p = td->td_proc;
-
if ((po = pmc_find_owner_descriptor(p)) != NULL) {
pmc_remove_owner(po);
- FREE(po, M_PMC);
+ pmc_destroy_owner_descriptor(po);
}
/*
@@ -1337,6 +1489,23 @@ pmc_hook_handler(struct thread *td, int function, void *arg)
if ((pp = pmc_find_process_descriptor(p, 0)) == NULL)
break;
+ /*
+ * Log the exec event to all monitoring owners. Skip
+ * owners who have already recieved the event because
+ * the have system sampling PMCs active.
+ */
+ for (ri = 0; ri < md->pmd_npmc; ri++)
+ if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL) {
+ po = pm->pm_owner;
+ if (po->po_sscount == 0 &&
+ po->po_flags & PMC_PO_OWNS_LOGFILE)
+ pmclog_process_procexec(po, p->p_pid,
+ fullpath);
+ }
+
+ if (freepath)
+ FREE(freepath, M_TEMP);
+
credentials_changed = arg;
PMCDBG(PRC,EXC,1, "exec proc=%p (%d, %s) cred-changed=%d",
@@ -1370,304 +1539,45 @@ pmc_hook_handler(struct thread *td, int function, void *arg)
if (pp->pp_refcnt == 0) {
pmc_remove_process_descriptor(pp);
FREE(pp, M_PMC);
- }
- }
- break;
-
- /*
- * Process fork()
- */
-
- case PMC_FN_PROCESS_FORK:
- {
- unsigned int ri;
- uint32_t do_descendants;
- struct pmc *pm;
- struct pmc_process *ppnew, *ppold;
- struct proc *newproc;
-
- sx_assert(&pmc_sx, SX_XLOCKED);
-
- newproc = (struct proc *) arg;
-
- PMCDBG(PMC,FRK,2, "process-fork p1=%p p2=%p",
- curthread->td_proc, newproc);
- /*
- * If the parent process (curthread->td_proc) is a
- * target of any PMCs, look for PMCs that are to be
- * inherited, and link these into the new process
- * descriptor.
- */
-
- if ((ppold = pmc_find_process_descriptor(
- curthread->td_proc, PMC_FLAG_NONE)) == NULL)
break;
-
- do_descendants = 0;
- for (ri = 0; ri < md->pmd_npmc; ri++)
- if ((pm = ppold->pp_pmcs[ri].pp_pmc) != NULL)
- do_descendants |=
- pm->pm_flags & PMC_F_DESCENDANTS;
- if (do_descendants == 0) /* nothing to do */
- break;
-
- if ((ppnew = pmc_find_process_descriptor(newproc,
- PMC_FLAG_ALLOCATE)) == NULL)
- return ENOMEM;
-
- /*
- * Run through all PMCs targeting the old process and
- * attach them to the new process.
- */
-
- for (ri = 0; ri < md->pmd_npmc; ri++)
- if ((pm = ppold->pp_pmcs[ri].pp_pmc) != NULL &&
- pm->pm_flags & PMC_F_DESCENDANTS)
- pmc_link_target_process(pm, ppnew);
-
- /*
- * Now mark the new process as being tracked by this
- * driver.
- */
-
- PROC_LOCK(newproc);
- newproc->p_flag |= P_HWPMC;
- PROC_UNLOCK(newproc);
+ }
}
break;
- /*
- * Thread context switch IN
- */
-
case PMC_FN_CSW_IN:
- {
- int cpu;
- unsigned int ri;
- struct pmc *pm;
- struct proc *p;
- struct pmc_cpu *pc;
- struct pmc_hw *phw;
- struct pmc_process *pp;
- pmc_value_t newvalue;
-
- p = td->td_proc;
-
- if ((pp = pmc_find_process_descriptor(p, PMC_FLAG_NONE)) == NULL)
- break;
-
- KASSERT(pp->pp_proc == td->td_proc,
- ("[pmc,%d] not my thread state", __LINE__));
-
- critical_enter(); /* no preemption on this CPU */
-
- cpu = PCPU_GET(cpuid); /* td->td_oncpu is invalid */
-
- PMCDBG(CTX,SWI,1, "cpu=%d proc=%p (%d, %s) pp=%p", cpu, p,
- p->p_pid, p->p_comm, pp);
-
- KASSERT(cpu >= 0 && cpu < mp_ncpus,
- ("[pmc,%d] wierd CPU id %d", __LINE__, cpu));
-
- pc = pmc_pcpu[cpu];
-
- for (ri = 0; ri < md->pmd_npmc; ri++) {
-
- if ((pm = pp->pp_pmcs[ri].pp_pmc) == NULL)
- continue;
-
- KASSERT(PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)),
- ("[pmc,%d] Target PMC in non-virtual mode (%d)",
- __LINE__, PMC_TO_MODE(pm)));
-
- KASSERT(PMC_TO_ROWINDEX(pm) == ri,
- ("[pmc,%d] Row index mismatch pmc %d != ri %d",
- __LINE__, PMC_TO_ROWINDEX(pm), ri));
-
- /*
- * Only PMCs that are marked as 'RUNNING' need
- * be placed on hardware.
- */
-
- if (pm->pm_state != PMC_STATE_RUNNING)
- continue;
-
- /* increment PMC runcount */
- atomic_add_rel_32(&pm->pm_runcount, 1);
-
- /* configure the HWPMC we are going to use. */
- md->pmd_config_pmc(cpu, ri, pm);
-
- phw = pc->pc_hwpmcs[ri];
-
- KASSERT(phw != NULL,
- ("[pmc,%d] null hw pointer", __LINE__));
-
- KASSERT(phw->phw_pmc == pm,
- ("[pmc,%d] hw->pmc %p != pmc %p", __LINE__,
- phw->phw_pmc, pm));
-
- /* write out saved value and start the PMC */
- mtx_pool_lock_spin(pmc_mtxpool, pm);
- newvalue = PMC_PCPU_SAVED(cpu, ri) =
- pm->pm_gv.pm_savedvalue;
- mtx_pool_unlock_spin(pmc_mtxpool, pm);
-
- md->pmd_write_pmc(cpu, ri, newvalue);
- md->pmd_start_pmc(cpu, ri);
-
- }
-
- /*
- * perform any other architecture/cpu dependent thread
- * switch-in actions.
- */
-
- (void) (*md->pmd_switch_in)(pc, pp);
-
- critical_exit();
+ pmc_process_csw_in(td);
+ break;
- }
- break;
+ case PMC_FN_CSW_OUT:
+ pmc_process_csw_out(td);
+ break;
/*
- * Thread context switch OUT.
+ * Process accumulated PC samples.
+ *
+ * This function is expected to be called by hardclock() for
+ * each CPU that has accumulated PC samples.
+ *
+ * This function is to be executed on the CPU whose samples
+ * are being processed.
*/
-
- case PMC_FN_CSW_OUT:
- {
- int cpu;
- unsigned int ri;
- struct pmc *pm;
- struct proc *p;
- struct pmc_cpu *pc;
- struct pmc_process *pp;
- pmc_value_t newvalue, tmp;
-
- /*
- * Locate our process descriptor; this may be NULL if
- * this process is exiting and we have already removed
- * the process from the target process table.
- *
- * Note that due to kernel preemption, multiple
- * context switches may happen while the process is
- * exiting.
- *
- * Note also that if the target process cannot be
- * found we still need to deconfigure any PMCs that
- * are currently running on hardware.
- */
-
- p = td->td_proc;
- pp = pmc_find_process_descriptor(p, PMC_FLAG_NONE);
-
- /*
- * save PMCs
- */
-
- critical_enter();
-
- cpu = PCPU_GET(cpuid); /* td->td_oncpu is invalid */
-
- PMCDBG(CTX,SWO,1, "cpu=%d proc=%p (%d, %s) pp=%p", cpu, p,
- p->p_pid, p->p_comm, pp);
-
- KASSERT(cpu >= 0 && cpu < mp_ncpus,
- ("[pmc,%d wierd CPU id %d", __LINE__, cpu));
-
- pc = pmc_pcpu[cpu];
+ case PMC_FN_DO_SAMPLES:
/*
- * When a PMC gets unlinked from a target PMC, it will
- * be removed from the target's pp_pmc[] array.
- *
- * However, on a MP system, the target could have been
- * executing on another CPU at the time of the unlink.
- * So, at context switch OUT time, we need to look at
- * the hardware to determine if a PMC is scheduled on
- * it.
+ * Clear the cpu specific bit in the CPU mask before
+ * do the rest of the processing. If the NMI handler
+ * gets invoked after the "atomic_clear_int()" call
+ * below but before "pmc_process_samples()" gets
+ * around to processing the interrupt, then we will
+ * come back here at the next hardclock() tick (and
+ * may find nothing to do if "pmc_process_samples()"
+ * had already processed the interrupt). We don't
+ * lose the interrupt sample.
*/
-
- for (ri = 0; ri < md->pmd_npmc; ri++) {
-
- pm = NULL;
- (void) (*md->pmd_get_config)(cpu, ri, &pm);
-
- if (pm == NULL) /* nothing at this row index */
- continue;
-
- if (!PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)))
- continue; /* not a process virtual PMC */
-
- KASSERT(PMC_TO_ROWINDEX(pm) == ri,
- ("[pmc,%d] ri mismatch pmc(%d) ri(%d)",
- __LINE__, PMC_TO_ROWINDEX(pm), ri));
-
- /* Stop hardware */
- md->pmd_stop_pmc(cpu, ri);
-
- /* reduce this PMC's runcount */
- atomic_subtract_rel_32(&pm->pm_runcount, 1);
-
- /*
- * If this PMC is associated with this process,
- * save the reading.
- */
-
- if (pp != NULL && pp->pp_pmcs[ri].pp_pmc != NULL) {
-
- KASSERT(pm == pp->pp_pmcs[ri].pp_pmc,
- ("[pmc,%d] pm %p != pp_pmcs[%d] %p",
- __LINE__, pm, ri,
- pp->pp_pmcs[ri].pp_pmc));
-
- KASSERT(pp->pp_refcnt > 0,
- ("[pmc,%d] pp refcnt = %d", __LINE__,
- pp->pp_refcnt));
-
- md->pmd_read_pmc(cpu, ri, &newvalue);
-
- tmp = newvalue - PMC_PCPU_SAVED(cpu,ri);
-
- KASSERT((int64_t) tmp >= 0,
- ("[pmc,%d] negative increment cpu=%d "
- "ri=%d newvalue=%jx saved=%jx "
- "incr=%jx", __LINE__, cpu, ri,
- newvalue, PMC_PCPU_SAVED(cpu,ri),
- tmp));
-
- /*
- * Increment the PMC's count and this
- * target process's count by the difference
- * between the current reading and the
- * saved value at context switch in time.
- */
-
- mtx_pool_lock_spin(pmc_mtxpool, pm);
-
- pm->pm_gv.pm_savedvalue += tmp;
- pp->pp_pmcs[ri].pp_pmcval += tmp;
-
- mtx_pool_unlock_spin(pmc_mtxpool, pm);
-
- }
-
- /* mark hardware as free */
- md->pmd_config_pmc(cpu, ri, NULL);
- }
-
- /*
- * perform any other architecture/cpu dependent thread
- * switch out functions.
- */
-
- (void) (*md->pmd_switch_out)(pc, pp);
-
- critical_exit();
-
- }
- break;
+ atomic_clear_int(&pmc_cpumask, (1 << PCPU_GET(cpuid)));
+ pmc_process_samples(PCPU_GET(cpuid));
+ break;
default:
#if DEBUG
@@ -1696,19 +1606,35 @@ pmc_allocate_owner_descriptor(struct proc *p)
/* allocate space for N pointers and one descriptor struct */
MALLOC(po, struct pmc_owner *, sizeof(struct pmc_owner),
- M_PMC, M_WAITOK);
+ M_PMC, M_ZERO|M_WAITOK);
- po->po_flags = 0;
+ po->po_sscount = po->po_error = po->po_flags = 0;
+ po->po_file = NULL;
po->po_owner = p;
+ po->po_kthread = NULL;
LIST_INIT(&po->po_pmcs);
LIST_INSERT_HEAD(poh, po, po_next); /* insert into hash table */
+ TAILQ_INIT(&po->po_logbuffers);
+ mtx_init(&po->po_mtx, "pmc-owner-mtx", "pmc", MTX_SPIN);
+
PMCDBG(OWN,ALL,1, "allocate-owner proc=%p (%d, %s) pmc-owner=%p",
p, p->p_pid, p->p_comm, po);
return po;
}
+static void
+pmc_destroy_owner_descriptor(struct pmc_owner *po)
+{
+
+ PMCDBG(OWN,REL,1, "destroy-owner po=%p proc=%p (%d, %s)",
+ po, po->po_owner, po->po_owner->p_pid, po->po_owner->p_comm);
+
+ mtx_destroy(&po->po_mtx);
+ FREE(po, M_PMC);
+}
+
/*
* find the descriptor corresponding to process 'p', adding or removing it
* as specified by 'mode'.
@@ -1850,6 +1776,31 @@ pmc_destroy_pmc_descriptor(struct pmc *pm)
#endif
}
+static void
+pmc_wait_for_pmc_idle(struct pmc *pm)
+{
+#if DEBUG
+ volatile int maxloop;
+
+ maxloop = 100 * mp_ncpus;
+#endif
+
+ /*
+ * Loop (with a forced context switch) till the PMC's runcount
+ * comes down to zero.
+ */
+ while (atomic_load_acq_32(&pm->pm_runcount) > 0) {
+#if DEBUG
+ maxloop--;
+ KASSERT(maxloop > 0,
+ ("[pmc,%d] (ri%d, rc%d) waiting too long for "
+ "pmc to be free", __LINE__,
+ PMC_TO_ROWINDEX(pm), pm->pm_runcount));
+#endif
+ pmc_force_context_switch();
+ }
+}
+
/*
* This function does the following things:
*
@@ -1865,12 +1816,10 @@ pmc_destroy_pmc_descriptor(struct pmc *pm)
static void
pmc_release_pmc_descriptor(struct pmc *pm)
{
-#if DEBUG
- volatile int maxloop;
-#endif
u_int ri, cpu;
enum pmc_mode mode;
struct pmc_hw *phw;
+ struct pmc_owner *po;
struct pmc_process *pp;
struct pmc_target *ptgt, *tmp;
struct pmc_binding pb;
@@ -1895,21 +1844,21 @@ pmc_release_pmc_descriptor(struct pmc *pm)
* A system mode PMC runs on a specific CPU. Switch
* to this CPU and turn hardware off.
*/
-
pmc_save_cpu_binding(&pb);
cpu = PMC_TO_CPU(pm);
- if (pm->pm_state == PMC_STATE_RUNNING) {
+ pmc_select_cpu(cpu);
- pmc_select_cpu(cpu);
+ /* switch off non-stalled CPUs */
+ if (pm->pm_state == PMC_STATE_RUNNING &&
+ (pm->pm_flags & PMC_F_IS_STALLED) == 0) {
phw = pmc_pcpu[cpu]->pc_hwpmcs[ri];
KASSERT(phw->phw_pmc == pm,
("[pmc, %d] pmc ptr ri(%d) hw(%p) pm(%p)",
__LINE__, ri, phw->phw_pmc, pm));
-
PMCDBG(PMC,REL,2, "stopping cpu=%d ri=%d", cpu, ri);
critical_enter();
@@ -1923,10 +1872,27 @@ pmc_release_pmc_descriptor(struct pmc *pm)
md->pmd_config_pmc(cpu, ri, NULL);
critical_exit();
+ /* adjust the global and process count of SS mode PMCs */
+ if (mode == PMC_MODE_SS && pm->pm_state == PMC_STATE_RUNNING) {
+ po = pm->pm_owner;
+ po->po_sscount--;
+ if (po->po_sscount == 0) {
+ atomic_subtract_rel_int(&pmc_ss_count, 1);
+ LIST_REMOVE(po, po_ssnext);
+ }
+ }
+
pm->pm_state = PMC_STATE_DELETED;
pmc_restore_cpu_binding(&pb);
+ /*
+ * We could have references to this PMC structure in
+ * the per-cpu sample queues. Wait for the queue to
+ * drain.
+ */
+ pmc_wait_for_pmc_idle(pm);
+
} else if (PMC_IS_VIRTUAL_MODE(mode)) {
/*
@@ -1938,30 +1904,11 @@ pmc_release_pmc_descriptor(struct pmc *pm)
*
* Then we wait till all CPUs are done with this PMC.
*/
-
pm->pm_state = PMC_STATE_DELETED;
- /*
- * Wait for the PMCs runcount to come to zero.
- */
-
-#if DEBUG
- maxloop = 100 * mp_ncpus;
-#endif
-
- while (atomic_load_acq_32(&pm->pm_runcount) > 0) {
-
-#if DEBUG
- maxloop--;
- KASSERT(maxloop > 0,
- ("[pmc,%d] (ri%d, rc%d) waiting too long for "
- "pmc to be free", __LINE__,
- PMC_TO_ROWINDEX(pm), pm->pm_runcount));
-#endif
-
- pmc_force_context_switch();
- }
+ /* Wait for the PMCs runcount to come to zero. */
+ pmc_wait_for_pmc_idle(pm);
/*
* At this point the PMC is off all CPUs and cannot be
@@ -1971,7 +1918,6 @@ pmc_release_pmc_descriptor(struct pmc *pm)
* it from the hash table. The module-wide SX lock
* protects us from races.
*/
-
LIST_FOREACH_SAFE(ptgt, &pm->pm_targets, pt_next, tmp) {
pp = ptgt->pt_process;
pmc_unlink_target_process(pm, pp); /* frees 'ptgt' */
@@ -2009,8 +1955,10 @@ pmc_release_pmc_descriptor(struct pmc *pm)
PMC_UNMARK_ROW_THREAD(ri);
/* unlink from the owner's list */
- if (pm->pm_owner)
- pmc_unlink_owner(pm);
+ if (pm->pm_owner) {
+ LIST_REMOVE(pm, pm_next);
+ pm->pm_owner = NULL;
+ }
pmc_destroy_pmc_descriptor(pm);
}
@@ -2022,47 +1970,29 @@ pmc_release_pmc_descriptor(struct pmc *pm)
static int
pmc_register_owner(struct proc *p, struct pmc *pmc)
{
- struct pmc_list *pl;
struct pmc_owner *po;
sx_assert(&pmc_sx, SX_XLOCKED);
- MALLOC(pl, struct pmc_list *, sizeof(struct pmc_list), M_PMC,
- M_WAITOK);
-
- if (pl == NULL)
- return ENOMEM;
-
if ((po = pmc_find_owner_descriptor(p)) == NULL)
- if ((po = pmc_allocate_owner_descriptor(p)) == NULL) {
- FREE(pl, M_PMC);
+ if ((po = pmc_allocate_owner_descriptor(p)) == NULL)
return ENOMEM;
- }
-
- /* XXX is this too restrictive */
- if (PMC_ID_TO_MODE(pmc->pm_id) == PMC_MODE_TS) {
- /* can have only one TS mode PMC per process */
- if (po->po_flags & PMC_PO_HAS_TS_PMC) {
- FREE(pl, M_PMC);
- return EINVAL;
- }
- po->po_flags |= PMC_PO_HAS_TS_PMC;
- }
KASSERT(pmc->pm_owner == NULL,
("[pmc,%d] attempting to own an initialized PMC", __LINE__));
pmc->pm_owner = po;
- pl->pl_pmc = pmc;
-
- LIST_INSERT_HEAD(&po->po_pmcs, pl, pl_next);
+ LIST_INSERT_HEAD(&po->po_pmcs, pmc, pm_next);
PROC_LOCK(p);
p->p_flag |= P_HWPMC;
PROC_UNLOCK(p);
- PMCDBG(PMC,REG,1, "register-owner pmc-owner=%p pl=%p pmc=%p",
- po, pl, pmc);
+ if (po->po_flags & PMC_PO_OWNS_LOGFILE)
+ pmclog_process_pmcallocate(pmc);
+
+ PMCDBG(PMC,REG,1, "register-owner pmc-owner=%p pmc=%p",
+ po, pmc);
return 0;
}
@@ -2096,7 +2026,6 @@ pmc_can_allocate_rowindex(struct proc *p, unsigned int ri, int cpu)
{
enum pmc_mode mode;
struct pmc *pm;
- struct pmc_list *pl;
struct pmc_owner *po;
struct pmc_process *pp;
@@ -2111,8 +2040,7 @@ pmc_can_allocate_rowindex(struct proc *p, unsigned int ri, int cpu)
* CPU and same RI.
*/
if ((po = pmc_find_owner_descriptor(p)) != NULL)
- LIST_FOREACH(pl, &po->po_pmcs, pl_next) {
- pm = pl->pl_pmc;
+ LIST_FOREACH(pm, &po->po_pmcs, pm_next) {
if (PMC_TO_ROWINDEX(pm) == ri) {
mode = PMC_TO_MODE(pm);
if (PMC_IS_VIRTUAL_MODE(mode))
@@ -2189,15 +2117,15 @@ pmc_can_allocate_row(int ri, enum pmc_mode mode)
static struct pmc *
pmc_find_pmc_descriptor_in_process(struct pmc_owner *po, pmc_id_t pmcid)
{
- struct pmc_list *pl;
+ struct pmc *pm;
KASSERT(PMC_ID_TO_ROWINDEX(pmcid) < md->pmd_npmc,
("[pmc,%d] Illegal pmc index %d (max %d)", __LINE__,
PMC_ID_TO_ROWINDEX(pmcid), md->pmd_npmc));
- LIST_FOREACH(pl, &po->po_pmcs, pl_next)
- if (pl->pl_pmc->pm_id == pmcid)
- return pl->pl_pmc;
+ LIST_FOREACH(pm, &po->po_pmcs, pm_next)
+ if (pm->pm_id == pmcid)
+ return pm;
return NULL;
}
@@ -2232,6 +2160,7 @@ pmc_start(struct pmc *pm)
{
int error, cpu, ri;
enum pmc_mode mode;
+ struct pmc_owner *po;
struct pmc_binding pb;
KASSERT(pm != NULL,
@@ -2243,36 +2172,67 @@ pmc_start(struct pmc *pm)
PMCDBG(PMC,OPS,1, "start pmc=%p mode=%d ri=%d", pm, mode, ri);
- pm->pm_state = PMC_STATE_RUNNING;
+ po = pm->pm_owner;
if (PMC_IS_VIRTUAL_MODE(mode)) {
/*
- * If a PMCATTACH hadn't been done on this
- * PMC, attach this PMC to its owner process.
+ * If a PMCATTACH has never been done on this PMC,
+ * attach it to its owner process.
*/
if (LIST_EMPTY(&pm->pm_targets))
- error = pmc_attach_process(pm->pm_owner->po_owner, pm);
+ error = (pm->pm_flags & PMC_F_ATTACH_DONE) ? ESRCH :
+ pmc_attach_process(po->po_owner, pm);
/*
- * If the PMC is attached to its owner, then force a context
- * switch to ensure that the MD state gets set correctly.
+ * Disallow PMCSTART if a logfile is required but has not
+ * been configured yet.
*/
- if (error == 0 && (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER))
- pmc_force_context_switch();
+
+ if (error == 0 && (pm->pm_flags & PMC_F_NEEDS_LOGFILE) &&
+ (po->po_flags & PMC_PO_OWNS_LOGFILE) == 0)
+ error = EDOOFUS;
/*
- * Nothing further to be done; thread context switch code
- * will start/stop the hardware as appropriate.
+ * If the PMC is attached to its owner, then force a context
+ * switch to ensure that the MD state gets set correctly.
*/
+ if (error == 0) {
+ pm->pm_state = PMC_STATE_RUNNING;
+ if (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER)
+ pmc_force_context_switch();
+ }
+
return error;
+ }
+
+
+ /*
+ * A system-wide PMC.
+ */
+ if ((pm->pm_flags & PMC_F_NEEDS_LOGFILE) &&
+ (po->po_flags & PMC_PO_OWNS_LOGFILE) == 0)
+ return EDOOFUS; /* programming error */
+
+ /*
+ * Add the owner to the global list if this is a system-wide
+ * sampling PMC.
+ */
+
+ if (mode == PMC_MODE_SS) {
+ if (po->po_sscount == 0) {
+ LIST_INSERT_HEAD(&pmc_ss_owners, po, po_ssnext);
+ atomic_add_rel_int(&pmc_ss_count, 1);
+ PMCDBG(PMC,OPS,1, "po=%p in global list", po);
+ }
+ po->po_sscount++;
}
/*
- * A system-wide PMC. Move to the CPU associated with this
+ * Move to the CPU associated with this
* PMC, and start the hardware.
*/
@@ -2290,6 +2250,8 @@ pmc_start(struct pmc *pm)
* so write out the initial value and start the PMC.
*/
+ pm->pm_state = PMC_STATE_RUNNING;
+
critical_enter();
if ((error = md->pmd_write_pmc(cpu, ri,
PMC_IS_SAMPLING_MODE(mode) ?
@@ -2311,6 +2273,7 @@ static int
pmc_stop(struct pmc *pm)
{
int cpu, error, ri;
+ struct pmc_owner *po;
struct pmc_binding pb;
KASSERT(pm != NULL, ("[pmc,%d] null pmc", __LINE__));
@@ -2361,6 +2324,18 @@ pmc_stop(struct pmc *pm)
pmc_restore_cpu_binding(&pb);
+ po = pm->pm_owner;
+
+ /* remove this owner from the global list of SS PMC owners */
+ if (PMC_TO_MODE(pm) == PMC_MODE_SS) {
+ po->po_sscount--;
+ if (po->po_sscount == 0) {
+ atomic_subtract_rel_int(&pmc_ss_count, 1);
+ LIST_REMOVE(po, po_ssnext);
+ PMCDBG(PMC,OPS,2,"po=%p removed from global list", po);
+ }
+ }
+
return error;
}
@@ -2400,6 +2375,8 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
PMC_GET_SX_XLOCK(ENOSYS);
+ DROP_GIANT();
+
is_sx_downgraded = 0;
c = (struct pmc_syscall_args *) syscall_args;
@@ -2437,16 +2414,49 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
/* mark this process as owning a log file */
p = td->td_proc;
if ((po = pmc_find_owner_descriptor(p)) == NULL)
- if ((po = pmc_allocate_owner_descriptor(p)) == NULL)
- return ENOMEM;
+ if ((po = pmc_allocate_owner_descriptor(p)) == NULL) {
+ error = ENOMEM;
+ break;
+ }
+
+ /*
+ * If a valid fd was passed in, try to configure that,
+ * otherwise if 'fd' was less than zero and there was
+ * a log file configured, flush its buffers and
+ * de-configure it.
+ */
+ if (cl.pm_logfd >= 0)
+ error = pmclog_configure_log(po, cl.pm_logfd);
+ else if (po->po_flags & PMC_PO_OWNS_LOGFILE) {
+ pmclog_process_closelog(po);
+ error = pmclog_flush(po);
+ if (error == 0)
+ error = pmclog_deconfigure_log(po);
+ } else
+ error = EINVAL;
+ }
+ break;
- if ((error = pmc_configure_log(po, cl.pm_logfd)) != 0)
+
+ /*
+ * Flush a log file.
+ */
+
+ case PMC_OP_FLUSHLOG:
+ {
+ struct pmc_owner *po;
+
+ sx_assert(&pmc_sx, SX_XLOCKED);
+
+ if ((po = pmc_find_owner_descriptor(td->td_proc)) == NULL) {
+ error = EINVAL;
break;
+ }
+ error = pmclog_flush(po);
}
break;
-
/*
* Retrieve hardware configuration.
*/
@@ -2486,7 +2496,18 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
case PMC_OP_GETMODULEVERSION:
{
- error = copyout(&_pmc_version.mv_version, arg, sizeof(int));
+ uint32_t cv, modv;
+
+ /* retrieve the client's idea of the ABI version */
+ if ((error = copyin(arg, &cv, sizeof(uint32_t))) != 0)
+ break;
+ /* don't service clients newer than our driver */
+ modv = PMC_VERSION;
+ if ((cv & 0xFFFF0000) > (modv & 0xFFFF0000)) {
+ error = EPROGMISMATCH;
+ break;
+ }
+ error = copyout(&modv, arg, sizeof(int));
}
break;
@@ -2748,8 +2769,15 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
* Look for valid values for 'pm_flags'
*/
- if ((pa.pm_flags & ~(PMC_F_DESCENDANTS|PMC_F_LOG_TC_CSW))
- != 0) {
+ if ((pa.pm_flags & ~(PMC_F_DESCENDANTS | PMC_F_LOG_PROCCSW |
+ PMC_F_LOG_PROCEXIT)) != 0) {
+ error = EINVAL;
+ break;
+ }
+
+ /* process logging options are not allowed for system PMCs */
+ if (PMC_IS_SYSTEM_MODE(mode) && (pa.pm_flags &
+ (PMC_F_LOG_PROCCSW | PMC_F_LOG_PROCEXIT))) {
error = EINVAL;
break;
}
@@ -2759,11 +2787,8 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
* CPU.
*/
- if (PMC_IS_SAMPLING_MODE(mode)) {
+ if (PMC_IS_SAMPLING_MODE(mode))
caps |= PMC_CAP_INTERRUPT;
- error = ENOSYS; /* for snapshot 6 */
- break;
- }
PMCDBG(PMC,ALL,2, "event=%d caps=0x%x mode=%d cpu=%d",
pa.pm_ev, caps, mode, cpu);
@@ -2828,6 +2853,14 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
PMCDBG(PMC,ALL,2, "ev=%d class=%d mode=%d n=%d -> pmcid=%x",
pmc->pm_event, pa.pm_class, mode, n, pmc->pm_id);
+ /* Process mode PMCs with logging enabled need log files */
+ if (pmc->pm_flags & (PMC_F_LOG_PROCEXIT | PMC_F_LOG_PROCCSW))
+ pmc->pm_flags |= PMC_F_NEEDS_LOGFILE;
+
+ /* All system mode sampling PMCs require a log file */
+ if (PMC_IS_SAMPLING_MODE(mode) && PMC_IS_SYSTEM_MODE(mode))
+ pmc->pm_flags |= PMC_F_NEEDS_LOGFILE;
+
/*
* Configure global pmc's immediately
*/
@@ -2999,6 +3032,85 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
/*
+ * Retrieve the MSR number associated with the counter
+ * 'pmc_id'. This allows processes to directly use RDPMC
+ * instructions to read their PMCs, without the overhead of a
+ * system call.
+ */
+
+ case PMC_OP_PMCGETMSR:
+ {
+ int ri;
+ struct pmc *pm;
+ struct pmc_target *pt;
+ struct pmc_op_getmsr gm;
+
+ PMC_DOWNGRADE_SX();
+
+ /* CPU has no 'GETMSR' support */
+ if (md->pmd_get_msr == NULL) {
+ error = ENOSYS;
+ break;
+ }
+
+ if ((error = copyin(arg, &gm, sizeof(gm))) != 0)
+ break;
+
+ if ((error = pmc_find_pmc(gm.pm_pmcid, &pm)) != 0)
+ break;
+
+ /*
+ * The allocated PMC has to be a process virtual PMC,
+ * i.e., of type MODE_T[CS]. Global PMCs can only be
+ * read using the PMCREAD operation since they may be
+ * allocated on a different CPU than the one we could
+ * be running on at the time of the RDPMC instruction.
+ *
+ * The GETMSR operation is not allowed for PMCs that
+ * are inherited across processes.
+ */
+
+ if (!PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) ||
+ (pm->pm_flags & PMC_F_DESCENDANTS)) {
+ error = EINVAL;
+ break;
+ }
+
+ /*
+ * It only makes sense to use a RDPMC (or its
+ * equivalent instruction on non-x86 architectures) on
+ * a process that has allocated and attached a PMC to
+ * itself. Conversely the PMC is only allowed to have
+ * one process attached to it -- its owner.
+ */
+
+ if ((pt = LIST_FIRST(&pm->pm_targets)) == NULL ||
+ LIST_NEXT(pt, pt_next) != NULL ||
+ pt->pt_process->pp_proc != pm->pm_owner->po_owner) {
+ error = EINVAL;
+ break;
+ }
+
+ ri = PMC_TO_ROWINDEX(pm);
+
+ if ((error = (*md->pmd_get_msr)(ri, &gm.pm_msr)) < 0)
+ break;
+
+ if ((error = copyout(&gm, arg, sizeof(gm))) < 0)
+ break;
+
+ /*
+ * Mark our process as using MSRs. Update machine
+ * state using a forced context switch.
+ */
+
+ pt->pt_process->pp_flags |= PMC_PP_ENABLE_MSR_ACCESS;
+ pmc_force_context_switch();
+
+ }
+ break;
+
+ /*
* Release an allocated PMC
*/
@@ -3166,17 +3278,6 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
sizeof(prw.pm_value))))
break;
- /*
- * send a signal (SIGIO) to the owner if it is trying to read
- * a PMC with no target processes attached.
- */
-
- if (LIST_EMPTY(&pm->pm_targets) &&
- (prw.pm_flags & PMC_F_OLDVALUE)) {
- PROC_LOCK(curthread->td_proc);
- psignal(curthread->td_proc, SIGIO);
- PROC_UNLOCK(curthread->td_proc);
- }
}
break;
@@ -3291,107 +3392,34 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
/*
- * Write a user-entry to the log file.
+ * Flush the per-owner log file and Write a user-entry to the
+ * log file.
*/
case PMC_OP_WRITELOG:
{
+ struct pmc_op_writelog wl;
+ struct pmc_owner *po;
PMC_DOWNGRADE_SX();
- /*
- * flush all per-cpu hash tables
- * append user-log entry
- */
-
- error = ENOSYS;
- }
- break;
-
-
-#if __i386__ || __amd64__
-
- /*
- * Machine dependent operation for i386-class processors.
- *
- * Retrieve the MSR number associated with the counter
- * 'pmc_id'. This allows processes to directly use RDPMC
- * instructions to read their PMCs, without the overhead of a
- * system call.
- */
-
- case PMC_OP_PMCX86GETMSR:
- {
- int ri;
- struct pmc *pm;
- struct pmc_target *pt;
- struct pmc_op_x86_getmsr gm;
-
- PMC_DOWNGRADE_SX();
-
- /* CPU has no 'GETMSR' support */
- if (md->pmd_get_msr == NULL) {
- error = ENOSYS;
- break;
- }
-
- if ((error = copyin(arg, &gm, sizeof(gm))) != 0)
- break;
-
- if ((error = pmc_find_pmc(gm.pm_pmcid, &pm)) != 0)
+ if ((error = copyin(arg, &wl, sizeof(wl))) != 0)
break;
- /*
- * The allocated PMC has to be a process virtual PMC,
- * i.e., of type MODE_T[CS]. Global PMCs can only be
- * read using the PMCREAD operation since they may be
- * allocated on a different CPU than the one we could
- * be running on at the time of the RDPMC instruction.
- *
- * The GETMSR operation is not allowed for PMCs that
- * are inherited across processes.
- */
-
- if (!PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)) ||
- (pm->pm_flags & PMC_F_DESCENDANTS)) {
+ if ((po = pmc_find_owner_descriptor(td->td_proc)) == NULL) {
error = EINVAL;
break;
}
- /*
- * It only makes sense to use a RDPMC (or its
- * equivalent instruction on non-x86 architectures) on
- * a process that has allocated and attached a PMC to
- * itself. Conversely the PMC is only allowed to have
- * one process attached to it -- its owner.
- */
-
- if ((pt = LIST_FIRST(&pm->pm_targets)) == NULL ||
- LIST_NEXT(pt, pt_next) != NULL ||
- pt->pt_process->pp_proc != pm->pm_owner->po_owner) {
+ if ((po->po_flags & PMC_PO_OWNS_LOGFILE) == 0) {
error = EINVAL;
break;
}
- ri = PMC_TO_ROWINDEX(pm);
-
- if ((error = (*md->pmd_get_msr)(ri, &gm.pm_msr)) < 0)
- break;
-
- if ((error = copyout(&gm, arg, sizeof(gm))) < 0)
- break;
-
- /*
- * Mark our process as using MSRs. Update machine
- * state using a forced context switch.
- */
-
- pt->pt_process->pp_flags |= PMC_PP_ENABLE_MSR_ACCESS;
- pmc_force_context_switch();
-
+ error = pmclog_process_userlog(po, &wl);
}
break;
-#endif
+
default:
error = EINVAL;
@@ -3406,6 +3434,8 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
if (error)
atomic_add_int(&pmc_stats.pm_syscall_errors, 1);
+ PICKUP_GIANT();
+
return error;
}
@@ -3413,57 +3443,175 @@ pmc_syscall_handler(struct thread *td, void *syscall_args)
* Helper functions
*/
+
/*
- * Configure a log file.
+ * Interrupt processing.
+ *
+ * Find a free slot in the per-cpu array of PC samples and write the
+ * current (PMC,PID,PC) triple to it. If an event was successfully
+ * added, a bit is set in mask 'pmc_cpumask' denoting that the
+ * DO_SAMPLES hook needs to be invoked from the clock handler.
+ *
+ * This function is meant to be called from an NMI handler. It cannot
+ * use any of the locking primitives supplied by the OS.
*/
-static int
-pmc_configure_log(struct pmc_owner *po, int logfd)
+int
+pmc_process_interrupt(int cpu, struct pmc *pm, intfptr_t pc, int usermode)
{
- struct proc *p;
-
- return ENOSYS; /* for now */
+ int error, ri;
+ struct thread *td;
+ struct pmc_sample *ps;
+ struct pmc_samplebuffer *psb;
- p = po->po_owner;
+ error = 0;
+ ri = PMC_TO_ROWINDEX(pm);
- if (po->po_logfd < 0 && logfd < 0) /* nothing to do */
- return 0;
+ psb = pmc_pcpu[cpu]->pc_sb;
+
+ ps = psb->ps_write;
+ if (ps->ps_pc) { /* in use, reader hasn't caught up */
+ atomic_add_int(&pmc_stats.pm_intr_bufferfull, 1);
+ atomic_set_int(&pm->pm_flags, PMC_F_IS_STALLED);
+ PMCDBG(SAM,INT,1,"(spc) cpu=%d pm=%p pc=%jx um=%d wr=%d rd=%d",
+ cpu, pm, (int64_t) pc, usermode,
+ (int) (psb->ps_write - psb->ps_samples),
+ (int) (psb->ps_read - psb->ps_samples));
+ error = ENOMEM;
+ goto done;
+ }
- if (po->po_logfd >= 0 && logfd < 0) {
- /* deconfigure log */
- /* XXX */
- po->po_flags &= ~PMC_PO_OWNS_LOGFILE;
- pmc_maybe_remove_owner(po);
+ /* fill in entry */
+ PMCDBG(SAM,INT,1,"cpu=%d pm=%p pc=%jx um=%d wr=%d rd=%d", cpu, pm,
+ (int64_t) pc, usermode,
+ (int) (psb->ps_write - psb->ps_samples),
+ (int) (psb->ps_read - psb->ps_samples));
- } else if (po->po_logfd < 0 && logfd >= 0) {
- /* configure log file */
- /* XXX */
- po->po_flags |= PMC_PO_OWNS_LOGFILE;
+ atomic_add_rel_32(&pm->pm_runcount, 1); /* hold onto PMC */
+ ps->ps_pmc = pm;
+ if ((td = curthread) && td->td_proc)
+ ps->ps_pid = td->td_proc->p_pid;
+ else
+ ps->ps_pid = -1;
+ ps->ps_usermode = usermode;
+ ps->ps_pc = pc; /* mark entry as in use */
+
+ /* increment write pointer, modulo ring buffer size */
+ ps++;
+ if (ps == psb->ps_fence)
+ psb->ps_write = psb->ps_samples;
+ else
+ psb->ps_write = ps;
- /* mark process as using HWPMCs */
- PROC_LOCK(p);
- p->p_flag |= P_HWPMC;
- PROC_UNLOCK(p);
- } else
- return EBUSY;
+ done:
+ /* mark CPU as needing processing */
+ atomic_set_rel_int(&pmc_cpumask, (1 << cpu));
- return 0;
+ return error;
}
+
/*
- * Log an exit event to the PMC owner's log file.
+ * Process saved PC samples.
*/
static void
-pmc_log_process_exit(struct pmc *pm, struct pmc_process *pp)
+pmc_process_samples(int cpu)
{
- KASSERT(pm->pm_flags & PMC_F_LOG_TC_PROCEXIT,
- ("[pmc,%d] log-process-exit called gratuitously", __LINE__));
+ int n, ri;
+ struct pmc *pm;
+ struct thread *td;
+ struct pmc_owner *po;
+ struct pmc_sample *ps;
+ struct pmc_samplebuffer *psb;
- (void) pm;
- (void) pp;
+ KASSERT(PCPU_GET(cpuid) == cpu,
+ ("[pmc,%d] not on the correct CPU pcpu=%d cpu=%d", __LINE__,
+ PCPU_GET(cpuid), cpu));
+
+ psb = pmc_pcpu[cpu]->pc_sb;
+
+ for (n = 0; n < pmc_nsamples; n++) { /* bound on #iterations */
+
+ ps = psb->ps_read;
+ if (ps->ps_pc == (uintfptr_t) 0) /* no data */
+ break;
+
+ pm = ps->ps_pmc;
+ po = pm->pm_owner;
+
+ KASSERT(PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)),
+ ("[pmc,%d] pmc=%p non-sampling mode=%d", __LINE__,
+ pm, PMC_TO_MODE(pm)));
+
+ /* Ignore PMCs that have been switched off */
+ if (pm->pm_state != PMC_STATE_RUNNING)
+ goto entrydone;
+
+ PMCDBG(SAM,OPS,1,"cpu=%d pm=%p pc=%jx um=%d wr=%d rd=%d", cpu,
+ pm, (int64_t) ps->ps_pc, ps->ps_usermode,
+ (int) (psb->ps_write - psb->ps_samples),
+ (int) (psb->ps_read - psb->ps_samples));
+
+ /*
+ * If this is a process-mode PMC that is attached to
+ * its owner, and if the PC is in user mode, update
+ * profiling statistics like timer-based profiling
+ * would have done.
+ */
+ if (pm->pm_flags & PMC_F_ATTACHED_TO_OWNER) {
+ if (ps->ps_usermode) {
+ td = FIRST_THREAD_IN_PROC(po->po_owner);
+ addupc_intr(td, ps->ps_pc, 1);
+ }
+ goto entrydone;
+ }
+
+ /*
+ * Otherwise, this is either a sampling mode PMC that
+ * is attached to a different process than its owner,
+ * or a system-wide sampling PMC. Dispatch a log
+ * entry to the PMC's owner process.
+ */
+
+ pmclog_process_pcsample(pm, ps);
+
+ entrydone:
+ ps->ps_pc = (uintfptr_t) 0; /* mark entry as free */
+ atomic_subtract_rel_32(&pm->pm_runcount, 1);
+
+ /* increment read pointer, modulo sample size */
+ if (++ps == psb->ps_fence)
+ psb->ps_read = psb->ps_samples;
+ else
+ psb->ps_read = ps;
+ }
+
+ atomic_add_int(&pmc_stats.pm_log_sweeps, 1);
+
+ /* Do not re-enable stalled PMCs if we failed to process any samples */
+ if (n == 0)
+ return;
+
+ /*
+ * Restart any stalled sampling PMCs on this CPU.
+ *
+ * If the NMI handler sets PMC_F_IS_STALLED on a PMC after the
+ * check below, we'll end up processing the stalled PMC at the
+ * next hardclock tick.
+ */
+ for (n = 0; n < md->pmd_npmc; n++) {
+ (void) (*md->pmd_get_config)(cpu,n,&pm);
+ if (pm == NULL || /* !cfg'ed */
+ pm->pm_state != PMC_STATE_RUNNING || /* !active */
+ !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)) || /* !sampling */
+ (pm->pm_flags & PMC_F_IS_STALLED) == 0) /* !stalled */
+ continue;
- return;
+ pm->pm_flags &= ~PMC_F_IS_STALLED;
+ ri = PMC_TO_ROWINDEX(pm);
+ (*md->pmd_start_pmc)(cpu, ri);
+ }
}
/*
@@ -3473,30 +3621,173 @@ pmc_log_process_exit(struct pmc *pm, struct pmc_process *pp)
/*
* Handle a process exit.
*
+ * Remove this process from all hash tables. If this process
+ * owned any PMCs, turn off those PMCs and deallocate them,
+ * removing any associations with target processes.
+ *
+ * This function will be called by the last 'thread' of a
+ * process.
+ *
* XXX This eventhandler gets called early in the exit process.
* Consider using a 'hook' invocation from thread_exit() or equivalent
* spot. Another negative is that kse_exit doesn't seem to call
* exit1() [??].
+ *
*/
static void
pmc_process_exit(void *arg __unused, struct proc *p)
{
int is_using_hwpmcs;
+ int cpu;
+ unsigned int ri;
+ struct pmc *pm;
+ struct pmc_process *pp;
+ struct pmc_owner *po;
+ pmc_value_t newvalue, tmp;
PROC_LOCK(p);
is_using_hwpmcs = p->p_flag & P_HWPMC;
PROC_UNLOCK(p);
- if (is_using_hwpmcs) {
- PMCDBG(PRC,EXT,1,"process-exit proc=%p (%d, %s)", p, p->p_pid,
- p->p_comm);
+ /*
+ * Log a sysexit event to all SS PMC owners.
+ */
+ LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
+ if (po->po_flags & PMC_PO_OWNS_LOGFILE)
+ pmclog_process_sysexit(po, p->p_pid);
+
+ if (!is_using_hwpmcs)
+ return;
+
+ PMC_GET_SX_XLOCK();
+ PMCDBG(PRC,EXT,1,"process-exit proc=%p (%d, %s)", p, p->p_pid,
+ p->p_comm);
- PMC_GET_SX_XLOCK();
- (void) pmc_hook_handler(curthread, PMC_FN_PROCESS_EXIT,
- (void *) p);
- sx_xunlock(&pmc_sx);
+ /*
+ * Since this code is invoked by the last thread in an exiting
+ * process, we would have context switched IN at some prior
+ * point. However, with PREEMPTION, kernel mode context
+ * switches may happen any time, so we want to disable a
+ * context switch OUT till we get any PMCs targetting this
+ * process off the hardware.
+ *
+ * We also need to atomically remove this process'
+ * entry from our target process hash table, using
+ * PMC_FLAG_REMOVE.
+ */
+ PMCDBG(PRC,EXT,1, "process-exit proc=%p (%d, %s)", p, p->p_pid,
+ p->p_comm);
+
+ critical_enter(); /* no preemption */
+
+ cpu = curthread->td_oncpu;
+
+ if ((pp = pmc_find_process_descriptor(p,
+ PMC_FLAG_REMOVE)) != NULL) {
+
+ PMCDBG(PRC,EXT,2,
+ "process-exit proc=%p pmc-process=%p", p, pp);
+
+ /*
+ * The exiting process could the target of
+ * some PMCs which will be running on
+ * currently executing CPU.
+ *
+ * We need to turn these PMCs off like we
+ * would do at context switch OUT time.
+ */
+ for (ri = 0; ri < md->pmd_npmc; ri++) {
+
+ /*
+ * Pick up the pmc pointer from hardware
+ * state similar to the CSW_OUT code.
+ */
+ pm = NULL;
+ (void) (*md->pmd_get_config)(cpu, ri, &pm);
+
+ PMCDBG(PRC,EXT,2, "ri=%d pm=%p", ri, pm);
+
+ if (pm == NULL ||
+ !PMC_IS_VIRTUAL_MODE(PMC_TO_MODE(pm)))
+ continue;
+
+ PMCDBG(PRC,EXT,2, "ppmcs[%d]=%p pm=%p "
+ "state=%d", ri, pp->pp_pmcs[ri].pp_pmc,
+ pm, pm->pm_state);
+
+ KASSERT(PMC_TO_ROWINDEX(pm) == ri,
+ ("[pmc,%d] ri mismatch pmc(%d) ri(%d)",
+ __LINE__, PMC_TO_ROWINDEX(pm), ri));
+
+ KASSERT(pm == pp->pp_pmcs[ri].pp_pmc,
+ ("[pmc,%d] pm %p != pp_pmcs[%d] %p",
+ __LINE__, pm, ri, pp->pp_pmcs[ri].pp_pmc));
+
+ (void) md->pmd_stop_pmc(cpu, ri);
+
+ KASSERT(pm->pm_runcount > 0,
+ ("[pmc,%d] bad runcount ri %d rc %d",
+ __LINE__, ri, pm->pm_runcount));
+
+ /* Stopped the hardware only if it is actually on */
+ if (pm->pm_state == PMC_STATE_RUNNING &&
+ (pm->pm_flags & PMC_F_IS_STALLED) == 0) {
+ md->pmd_read_pmc(cpu, ri, &newvalue);
+ tmp = newvalue -
+ PMC_PCPU_SAVED(cpu,ri);
+
+ mtx_pool_lock_spin(pmc_mtxpool, pm);
+ pm->pm_gv.pm_savedvalue += tmp;
+ pp->pp_pmcs[ri].pp_pmcval += tmp;
+ mtx_pool_unlock_spin(pmc_mtxpool, pm);
+ }
+
+ atomic_subtract_rel_32(&pm->pm_runcount,1);
+
+ KASSERT((int) pm->pm_runcount >= 0,
+ ("[pmc,%d] runcount is %d", __LINE__, ri));
+
+ (void) md->pmd_config_pmc(cpu, ri, NULL);
+ }
+
+ /*
+ * Inform the MD layer of this pseudo "context switch
+ * out"
+ */
+ (void) md->pmd_switch_out(pmc_pcpu[cpu], pp);
+
+ critical_exit(); /* ok to be pre-empted now */
+
+ /*
+ * Unlink this process from the PMCs that are
+ * targetting it. This will send a signal to
+ * all PMC owner's whose PMCs are orphaned.
+ *
+ * Log PMC value at exit time if requested.
+ */
+ for (ri = 0; ri < md->pmd_npmc; ri++)
+ if ((pm = pp->pp_pmcs[ri].pp_pmc) != NULL) {
+ if (pm->pm_flags & PMC_F_LOG_PROCEXIT)
+ pmclog_process_procexit(pm, pp);
+ pmc_unlink_target_process(pm, pp);
+ }
+ FREE(pp, M_PMC);
+
+ } else
+ critical_exit(); /* pp == NULL */
+
+
+ /*
+ * If the process owned PMCs, free them up and free up
+ * memory.
+ */
+ if ((po = pmc_find_owner_descriptor(p)) != NULL) {
+ pmc_remove_owner(po);
+ pmc_destroy_owner_descriptor(po);
}
+
+ sx_xunlock(&pmc_sx);
}
/*
@@ -3507,10 +3798,15 @@ pmc_process_exit(void *arg __unused, struct proc *p)
*/
static void
-pmc_process_fork(void *arg __unused, struct proc *p1, struct proc *p2,
+pmc_process_fork(void *arg __unused, struct proc *p1, struct proc *newproc,
int flags)
{
int is_using_hwpmcs;
+ unsigned int ri;
+ uint32_t do_descendants;
+ struct pmc *pm;
+ struct pmc_owner *po;
+ struct pmc_process *ppnew, *ppold;
(void) flags; /* unused parameter */
@@ -3518,14 +3814,72 @@ pmc_process_fork(void *arg __unused, struct proc *p1, struct proc *p2,
is_using_hwpmcs = p1->p_flag & P_HWPMC;
PROC_UNLOCK(p1);
- if (is_using_hwpmcs) {
- PMCDBG(PMC,FRK,1, "process-fork proc=%p (%d, %s)", p1,
- p1->p_pid, p1->p_comm);
- PMC_GET_SX_XLOCK();
- (void) pmc_hook_handler(curthread, PMC_FN_PROCESS_FORK,
- (void *) p2);
- sx_xunlock(&pmc_sx);
- }
+ /*
+ * If there are system-wide sampling PMCs active, we need to
+ * log all fork events to their owner's logs.
+ */
+
+ LIST_FOREACH(po, &pmc_ss_owners, po_ssnext)
+ if (po->po_flags & PMC_PO_OWNS_LOGFILE)
+ pmclog_process_procfork(po, p1->p_pid, newproc->p_pid);
+
+ if (!is_using_hwpmcs)
+ return;
+
+ PMC_GET_SX_XLOCK();
+ PMCDBG(PMC,FRK,1, "process-fork proc=%p (%d, %s) -> %p", p1,
+ p1->p_pid, p1->p_comm, newproc);
+
+ /*
+ * If the parent process (curthread->td_proc) is a
+ * target of any PMCs, look for PMCs that are to be
+ * inherited, and link these into the new process
+ * descriptor.
+ */
+ if ((ppold = pmc_find_process_descriptor(curthread->td_proc,
+ PMC_FLAG_NONE)) == NULL)
+ goto done; /* nothing to do */
+
+ do_descendants = 0;
+ for (ri = 0; ri < md->pmd_npmc; ri++)
+ if ((pm = ppold->pp_pmcs[ri].pp_pmc) != NULL)
+ do_descendants |= pm->pm_flags & PMC_F_DESCENDANTS;
+ if (do_descendants == 0) /* nothing to do */
+ goto done;
+
+ /* allocate a descriptor for the new process */
+ if ((ppnew = pmc_find_process_descriptor(newproc,
+ PMC_FLAG_ALLOCATE)) == NULL)
+ goto done;
+
+ /*
+ * Run through all PMCs that were targeting the old process
+ * and which specified F_DESCENDANTS and attach them to the
+ * new process.
+ *
+ * Log the fork event to all owners of PMCs attached to this
+ * process, if not already logged.
+ */
+ for (ri = 0; ri < md->pmd_npmc; ri++)
+ if ((pm = ppold->pp_pmcs[ri].pp_pmc) != NULL &&
+ (pm->pm_flags & PMC_F_DESCENDANTS)) {
+ pmc_link_target_process(pm, ppnew);
+ po = pm->pm_owner;
+ if (po->po_sscount == 0 &&
+ po->po_flags & PMC_PO_OWNS_LOGFILE)
+ pmclog_process_procfork(po, p1->p_pid,
+ newproc->p_pid);
+ }
+
+ /*
+ * Now mark the new process as being tracked by this driver.
+ */
+ PROC_LOCK(newproc);
+ newproc->p_flag |= P_HWPMC;
+ PROC_UNLOCK(newproc);
+
+ done:
+ sx_xunlock(&pmc_sx);
}
@@ -3542,8 +3896,9 @@ static const char *pmc_name_of_pmcclass[] = {
static int
pmc_initialize(void)
{
- int error, cpu, n;
+ int cpu, error, n;
struct pmc_binding pb;
+ struct pmc_samplebuffer *sb;
md = NULL;
error = 0;
@@ -3563,25 +3918,17 @@ pmc_initialize(void)
*/
if (pmc_hashsize <= 0) {
- (void) printf("pmc: sysctl variable \""
- PMC_SYSCTL_NAME_PREFIX "hashsize\" must be greater than "
- "zero\n");
+ (void) printf("hwpmc: tunable hashsize=%d must be greater "
+ "than zero.\n", pmc_hashsize);
pmc_hashsize = PMC_HASH_SIZE;
}
-#if defined(__i386__)
- /* determine the CPU kind. This is i386 specific */
- if (strcmp(cpu_vendor, "AuthenticAMD") == 0)
- md = pmc_amd_initialize();
- else if (strcmp(cpu_vendor, "GenuineIntel") == 0)
- md = pmc_intel_initialize();
- /* XXX: what about the other i386 CPU manufacturers? */
-#elif defined(__amd64__)
- if (strcmp(cpu_vendor, "AuthenticAMD") == 0)
- md = pmc_amd_initialize();
-#else /* other architectures */
- md = NULL;
-#endif
+ if (pmc_nsamples <= 0 || pmc_nsamples > 65535) {
+ (void) printf("hwpmc: tunable nsamples=%d out of range.\n", pmc_nsamples);
+ pmc_nsamples = PMC_NSAMPLES;
+ }
+
+ md = pmc_md_initialize();
if (md == NULL || md->pmd_init == NULL)
return ENOSYS;
@@ -3608,6 +3955,24 @@ pmc_initialize(void)
if (error != 0)
return error;
+ /* allocate space for the sample array */
+ for (cpu = 0; cpu < mp_ncpus; cpu++) {
+ if (pmc_cpu_is_disabled(cpu))
+ continue;
+ MALLOC(sb, struct pmc_samplebuffer *,
+ sizeof(struct pmc_samplebuffer) +
+ pmc_nsamples * sizeof(struct pmc_sample), M_PMC,
+ M_WAITOK|M_ZERO);
+
+ sb->ps_read = sb->ps_write = sb->ps_samples;
+ sb->ps_fence = sb->ps_samples + pmc_nsamples
+;
+ KASSERT(pmc_pcpu[cpu] != NULL,
+ ("[pmc,%d] cpu=%d Null per-cpu data", __LINE__, cpu));
+
+ pmc_pcpu[cpu]->pc_sb = sb;
+ }
+
/* allocate space for the row disposition array */
pmc_pmcdisp = malloc(sizeof(enum pmc_mode) * md->pmd_npmc,
M_PMC, M_WAITOK|M_ZERO);
@@ -3627,6 +3992,9 @@ pmc_initialize(void)
&pmc_processhashmask);
mtx_init(&pmc_processhash_mtx, "pmc-process-hash", "pmc", MTX_SPIN);
+ LIST_INIT(&pmc_ss_owners);
+ pmc_ss_count = 0;
+
/* allocate a pool of spin mutexes */
pmc_mtxpool = mtx_pool_create("pmc", pmc_mtxpool_size, MTX_SPIN);
@@ -3640,6 +4008,9 @@ pmc_initialize(void)
pmc_fork_tag = EVENTHANDLER_REGISTER(process_fork,
pmc_process_fork, NULL, EVENTHANDLER_PRI_ANY);
+ /* initialize logging */
+ pmclog_initialize();
+
/* set hook functions */
pmc_intr = md->pmd_intr;
pmc_hook = pmc_hook_handler;
@@ -3670,7 +4041,9 @@ pmc_cleanup(void)
PMCDBG(MOD,INI,0, "%s", "cleanup");
- pmc_intr = NULL; /* no more interrupts please */
+ /* switch off sampling */
+ atomic_store_rel_int(&pmc_cpumask, 0);
+ pmc_intr = NULL;
sx_xlock(&pmc_sx);
if (pmc_hook == NULL) { /* being unloaded already */
@@ -3701,7 +4074,8 @@ pmc_cleanup(void)
PROC_LOCK(po->po_owner);
psignal(po->po_owner, SIGBUS);
PROC_UNLOCK(po->po_owner);
- FREE(po, M_PMC);
+
+ pmc_destroy_owner_descriptor(po);
}
}
@@ -3732,6 +4106,11 @@ pmc_cleanup(void)
pmc_ownerhash = NULL;
}
+ KASSERT(LIST_EMPTY(&pmc_ss_owners),
+ ("[pmc,%d] Global SS owner list not empty", __LINE__));
+ KASSERT(pmc_ss_count == 0,
+ ("[pmc,%d] Global SS count not empty", __LINE__));
+
/* do processor dependent cleanup */
PMCDBG(MOD,INI,3, "%s", "md cleanup");
if (md) {
@@ -3762,6 +4141,8 @@ pmc_cleanup(void)
pmc_pmcdisp = NULL;
}
+ pmclog_shutdown();
+
sx_xunlock(&pmc_sx); /* we are done */
}
diff --git a/sys/dev/hwpmc/hwpmc_pentium.c b/sys/dev/hwpmc/hwpmc_pentium.c
index 48e8be3..fc26b52 100644
--- a/sys/dev/hwpmc/hwpmc_pentium.c
+++ b/sys/dev/hwpmc/hwpmc_pentium.c
@@ -35,8 +35,9 @@ __FBSDID("$FreeBSD$");
#include <sys/smp.h>
#include <sys/systm.h>
-#include <machine/cputypes.h>
+#include <machine/cpufunc.h>
#include <machine/md_var.h>
+#include <machine/pmc_mdep.h>
/*
* Intel Pentium PMCs
diff --git a/sys/dev/hwpmc/hwpmc_pentium.h b/sys/dev/hwpmc/hwpmc_pentium.h
new file mode 100644
index 0000000..36ecc64
--- /dev/null
+++ b/sys/dev/hwpmc/hwpmc_pentium.h
@@ -0,0 +1,72 @@
+/*-
+ * Copyright (c) 2005, Joseph Koshy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/* Machine dependent interfaces */
+
+#ifndef _DEV_HWPMC_PENTIUM_H_
+#define _DEV_HWPMC_PENTIUM_H_ 1
+
+/* Intel Pentium PMCs */
+
+#define PENTIUM_NPMCS 3 /* 1 TSC + 2 PMCs */
+#define PENTIUM_CESR_PC1 (1 << 25)
+#define PENTIUM_CESR_CC1_MASK 0x01C00000
+#define PENTIUM_CESR_TO_CC1(C) (((C) & 0x07) << 22)
+#define PENTIUM_CESR_ES1_MASK 0x003F0000
+#define PENTIUM_CESR_TO_ES1(E) (((E) & 0x3F) << 16)
+#define PENTIUM_CESR_PC0 (1 << 9)
+#define PENTIUM_CESR_CC0_MASK 0x000001C0
+#define PENTIUM_CESR_TO_CC0(C) (((C) & 0x07) << 6)
+#define PENTIUM_CESR_ES0_MASK 0x0000003F
+#define PENTIUM_CESR_TO_ES0(E) ((E) & 0x3F)
+#define PENTIUM_CESR_RESERVED 0xFC00FC00
+
+#define PENTIUM_MSR_CESR 0x11
+#define PENTIUM_MSR_CTR0 0x12
+#define PENTIUM_MSR_CTR1 0x13
+
+struct pmc_md_pentium_op_pmcallocate {
+ uint32_t pm_pentium_config;
+};
+
+#ifdef _KERNEL
+
+/* MD extension for 'struct pmc' */
+struct pmc_md_pentium_pmc {
+ uint32_t pm_pentium_cesr;
+};
+
+
+/*
+ * Prototypes
+ */
+
+int pmc_initialize_p5(struct pmc_mdep *); /* Pentium PMCs */
+
+#endif /* _KERNEL */
+#endif /* _DEV_HWPMC_PENTIUM_H_ */
diff --git a/sys/dev/hwpmc/hwpmc_piv.c b/sys/dev/hwpmc/hwpmc_piv.c
index c9ade03..0b14745 100644
--- a/sys/dev/hwpmc/hwpmc_piv.c
+++ b/sys/dev/hwpmc/hwpmc_piv.c
@@ -35,8 +35,9 @@ __FBSDID("$FreeBSD$");
#include <sys/smp.h>
#include <sys/systm.h>
-#include <machine/apicreg.h>
+#include <machine/cpufunc.h>
#include <machine/md_var.h>
+#include <machine/specialreg.h>
/*
* PENTIUM 4 SUPPORT
@@ -134,7 +135,11 @@ __FBSDID("$FreeBSD$");
* CPUy +.....-
* RC 0 1 2 1 0
*
- * Here CPUx and CPUy are one of the two logical processors on a HTT CPU.
+ * Key:
+ * 'CPU[xy]' : one of the two logical processors on a HTT CPU.
+ * 'RC' : run count (#threads per physical core).
+ * '+' : point in time when a thread is put on a CPU.
+ * '-' : point in time where a thread is taken off a CPU.
*
* Handling HTT CONFIG
*
@@ -438,7 +443,9 @@ struct p4_cpu {
struct pmc_hw *pc_hwpmcs[P4_NPMCS];
struct pmc_hw pc_p4pmcs[P4_NPMCS];
char pc_escrs[P4_NESCR];
- struct mtx pc_mtx; /* spin lock */
+ struct mtx pc_mtx; /* spin lock */
+ uint32_t pc_intrflag; /* NMI handler flags */
+ unsigned int pc_intrlock; /* NMI handler spin lock */
unsigned char pc_flags[P4_NPMCS]; /* 4 bits each: {cfg,run}count */
union {
pmc_value_t pc_hw[P4_NPMCS * P4_NHTT];
@@ -447,6 +454,20 @@ struct p4_cpu {
pmc_value_t pc_pmc_values[P4_NPMCS * P4_NHTT];
};
+/*
+ * A 'logical' CPU shares PMC resources with partner 'physical' CPU,
+ * except the TSC, which is architectural and hence seperate. The
+ * 'logical' CPU descriptor thus has pointers to the physical CPUs
+ * descriptor state except for the TSC (rowindex 0) which is not
+ * shared.
+ */
+
+struct p4_logicalcpu {
+ struct pmc_cpu pc_common;
+ struct pmc_hw *pc_hwpmcs[P4_NPMCS];
+ struct pmc_hw pc_tsc;
+};
+
#define P4_PCPU_PMC_VALUE(PC,RI,CPU) (PC)->pc_pmc_values[(RI)*((CPU) & 1)]
#define P4_PCPU_HW_VALUE(PC,RI,CPU) (PC)->pc_si.pc_hw[(RI)*((CPU) & 1)]
#define P4_PCPU_SAVED_IP(PC,RI,CPU) (PC)->pc_si.pc_ip[(RI)*((CPU) & 1)]
@@ -468,6 +489,29 @@ struct p4_cpu {
#define P4_CPU_TO_FLAG(C) (pmc_cpu_is_logical(cpu) ? 0x2 : 0x1)
+#define P4_PCPU_GET_INTRFLAG(PC,I) ((PC)->pc_intrflag & (1 << (I)))
+#define P4_PCPU_SET_INTRFLAG(PC,I,V) do { \
+ uint32_t __mask; \
+ __mask = 1 << (I); \
+ if ((V)) \
+ (PC)->pc_intrflag |= __mask; \
+ else \
+ (PC)->pc_intrflag &= ~__mask; \
+ } while (0)
+
+/*
+ * A minimal spin lock implementation for use inside the NMI handler.
+ *
+ * We don't want to use a regular spin lock here, because curthread
+ * may not be consistent at the time the handler is invoked.
+ */
+#define P4_PCPU_ACQ_INTR_SPINLOCK(PC) do { \
+ while (!atomic_cmpset_acq_int(&pc->pc_intrlock, 0, 1)) \
+ ia32_pause(); \
+ } while (0)
+#define P4_PCPU_REL_INTR_SPINLOCK(PC) \
+ atomic_store_rel_int(&pc->pc_intrlock, 0);
+
/* ESCR row disposition */
static int p4_escrdisp[P4_NESCR];
@@ -538,6 +582,7 @@ p4_init(int cpu)
int n, phycpu;
char *pescr;
struct p4_cpu *pcs;
+ struct p4_logicalcpu *plcs;
struct pmc_hw *phw;
KASSERT(cpu >= 0 && cpu < mp_ncpus,
@@ -562,8 +607,23 @@ p4_init(int cpu)
cpu, phycpu));
if (pcs == NULL) /* decline to init */
return ENXIO;
+
p4_system_has_htt = 1;
- pmc_pcpu[cpu] = (struct pmc_cpu *) pcs;
+
+ MALLOC(plcs, struct p4_logicalcpu *,
+ sizeof(struct p4_logicalcpu), M_PMC, M_WAITOK|M_ZERO);
+
+ /* The TSC is architectural state and is not shared */
+ plcs->pc_hwpmcs[0] = &plcs->pc_tsc;
+ plcs->pc_tsc.phw_state = PMC_PHW_FLAG_IS_ENABLED |
+ PMC_PHW_CPU_TO_STATE(cpu) | PMC_PHW_INDEX_TO_STATE(0) |
+ PMC_PHW_FLAG_IS_SHAREABLE;
+
+ /* Other PMCs are shared with the physical CPU */
+ for (n = 1; n < P4_NPMCS; n++)
+ plcs->pc_hwpmcs[n] = pcs->pc_hwpmcs[n];
+
+ pmc_pcpu[cpu] = (struct pmc_cpu *) plcs;
return 0;
}
@@ -605,16 +665,17 @@ p4_cleanup(int cpu)
PMCDBG(MDP,INI,0, "p4-cleanup cpu=%d", cpu);
+ if ((pcs = (struct p4_cpu *) pmc_pcpu[cpu]) == NULL)
+ return 0;
+
/*
- * Free up the per-cpu structure for the given cpu if
- * allocated, and if this is a physical CPU.
+ * If the CPU is physical we need to teardown the
+ * full MD state.
*/
-
- if ((pcs = (struct p4_cpu *) pmc_pcpu[cpu]) != NULL &&
- !pmc_cpu_is_logical(cpu)) {
+ if (!pmc_cpu_is_logical(cpu))
mtx_destroy(&pcs->pc_mtx);
- FREE(pcs, M_PMC);
- }
+
+ FREE(pcs, M_PMC);
pmc_pcpu[cpu] = NULL;
@@ -637,7 +698,7 @@ p4_switch_in(struct pmc_cpu *pc, struct pmc_process *pp)
if (pp->pp_flags & PMC_PP_ENABLE_MSR_ACCESS)
load_cr4(rcr4() | CR4_PCE);
- PMCDBG(MDP,SWI,2, "cr4=0x%x", rcr4());
+ PMCDBG(MDP,SWI,2, "cr4=0x%x", (uint32_t) rcr4());
return 0;
}
@@ -657,7 +718,7 @@ p4_switch_out(struct pmc_cpu *pc, struct pmc_process *pp)
/* always disallow the RDPMC instruction */
load_cr4(rcr4() & ~CR4_PCE);
- PMCDBG(MDP,SWO,2, "cr4=0x%x", rcr4());
+ PMCDBG(MDP,SWO,2, "cr4=0x%x", (uint32_t) rcr4());
return 0;
}
@@ -681,6 +742,26 @@ p4_read_pmc(int cpu, int ri, pmc_value_t *v)
KASSERT(ri >= 0 && ri < P4_NPMCS,
("[p4,%d] illegal row-index %d", __LINE__, ri));
+
+ if (ri == 0) { /* TSC */
+#if DEBUG
+ pc = (struct p4_cpu *) pmc_pcpu[cpu];
+ phw = pc->pc_hwpmcs[ri];
+ pm = phw->phw_pmc;
+
+ KASSERT(pm, ("[p4,%d] cpu=%d ri=%d not configured", __LINE__,
+ cpu, ri));
+ KASSERT(PMC_TO_CLASS(pm) == PMC_CLASS_TSC,
+ ("[p4,%d] cpu=%d ri=%d not a TSC (%d)", __LINE__, cpu, ri,
+ PMC_TO_CLASS(pm)));
+ KASSERT(PMC_IS_COUNTING_MODE(PMC_TO_MODE(pm)),
+ ("[p4,%d] TSC counter in non-counting mode", __LINE__));
+#endif
+ *v = rdtsc();
+ PMCDBG(MDP,REA,2, "p4-read -> %jx", *v);
+ return 0;
+ }
+
pc = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)];
phw = pc->pc_hwpmcs[ri];
pd = &p4_pmcdesc[ri];
@@ -698,14 +779,6 @@ p4_read_pmc(int cpu, int ri, pmc_value_t *v)
PMCDBG(MDP,REA,1, "p4-read cpu=%d ri=%d mode=%d", cpu, ri, mode);
- if (PMC_TO_CLASS(pm) == PMC_CLASS_TSC) {
- KASSERT(PMC_IS_COUNTING_MODE(mode),
- ("[p4,%d] TSC counter in non-counting mode", __LINE__));
- *v = rdtsc();
- PMCDBG(MDP,REA,2, "p4-read -> %jx", *v);
- return 0;
- }
-
KASSERT(pd->pm_descr.pd_class == PMC_CLASS_P4,
("[p4,%d] unknown PMC class %d", __LINE__, pd->pm_descr.pd_class));
@@ -747,6 +820,27 @@ p4_write_pmc(int cpu, int ri, pmc_value_t v)
KASSERT(ri >= 0 && ri < P4_NPMCS,
("[amd,%d] illegal row-index %d", __LINE__, ri));
+
+ /*
+ * The P4's TSC register is writeable, but we don't allow a
+ * write as changing the TSC's value could interfere with
+ * timekeeping and other system functions.
+ */
+ if (ri == 0) {
+#if DEBUG
+ pc = (struct p4_cpu *) pmc_pcpu[cpu];
+ phw = pc->pc_hwpmcs[ri];
+ pm = phw->phw_pmc;
+ KASSERT(pm, ("[p4,%d] cpu=%d ri=%d not configured", __LINE__,
+ cpu, ri));
+ KASSERT(PMC_TO_CLASS(pm) == PMC_CLASS_TSC,
+ ("[p4,%d] cpu=%d ri=%d not a TSC (%d)", __LINE__,
+ cpu, ri, PMC_TO_CLASS(pm)));
+#endif
+ return 0;
+ }
+
+ /* Shared PMCs */
pc = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)];
phw = pc->pc_hwpmcs[ri];
pm = phw->phw_pmc;
@@ -762,14 +856,6 @@ p4_write_pmc(int cpu, int ri, pmc_value_t v)
mode, v);
/*
- * The P4's TSC register is writeable, but we don't allow a
- * write as changing the TSC's value could interfere with
- * timekeeping and other system functions.
- */
- if (PMC_TO_CLASS(pm) == PMC_CLASS_TSC)
- return 0;
-
- /*
* write the PMC value to the register/saved value: for
* sampling mode PMCs, the value to be programmed into the PMC
* counter is -(C+1) where 'C' is the requested sample rate.
@@ -808,7 +894,21 @@ p4_config_pmc(int cpu, int ri, struct pmc *pm)
KASSERT(ri >= 0 && ri < P4_NPMCS,
("[p4,%d] illegal row-index %d", __LINE__, ri));
- pc = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)];
+ PMCDBG(MDP,CFG,1, "cpu=%d ri=%d pm=%p", cpu, ri, pm);
+
+ if (ri == 0) { /* TSC */
+ pc = (struct p4_cpu *) pmc_pcpu[cpu];
+ phw = pc->pc_hwpmcs[ri];
+
+ KASSERT(pm == NULL || phw->phw_pmc == NULL,
+ ("[p4,%d] hwpmc doubly config'ed", __LINE__));
+ phw->phw_pmc = pm;
+ return 0;
+ }
+
+ /* Shared PMCs */
+
+ pc = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)];
phw = pc->pc_hwpmcs[ri];
KASSERT(pm == NULL || phw->phw_pmc == NULL ||
@@ -826,9 +926,6 @@ p4_config_pmc(int cpu, int ri, struct pmc *pm)
("[p4,%d] cpu=%d ri=%d pmc configured with zero cfg count",
__LINE__, cpu, ri));
- PMCDBG(MDP,CFG,1, "cpu=%d ri=%d cfg=%d pm=%p", cpu, ri, cfgflags,
- pm);
-
cpuflag = P4_CPU_TO_FLAG(cpu);
if (pm) { /* config */
@@ -1073,8 +1170,8 @@ p4_allocate_pmc(int cpu, int ri, struct pmc *pm,
/* CCCR fields */
if (caps & PMC_CAP_THRESHOLD)
- cccrvalue |= (a->pm_p4_cccrconfig & P4_CCCR_THRESHOLD_MASK) |
- P4_CCCR_COMPARE;
+ cccrvalue |= (a->pm_md.pm_p4.pm_p4_cccrconfig &
+ P4_CCCR_THRESHOLD_MASK) | P4_CCCR_COMPARE;
if (caps & PMC_CAP_EDGE)
cccrvalue |= P4_CCCR_EDGE;
@@ -1083,7 +1180,8 @@ p4_allocate_pmc(int cpu, int ri, struct pmc *pm,
cccrvalue |= P4_CCCR_COMPLEMENT;
if (p4_system_has_htt)
- cccrvalue |= a->pm_p4_cccrconfig & P4_CCCR_ACTIVE_THREAD_MASK;
+ cccrvalue |= a->pm_md.pm_p4.pm_p4_cccrconfig &
+ P4_CCCR_ACTIVE_THREAD_MASK;
else /* no HTT; thread field should be '11b' */
cccrvalue |= P4_CCCR_TO_ACTIVE_THREAD(0x3);
@@ -1096,12 +1194,14 @@ p4_allocate_pmc(int cpu, int ri, struct pmc *pm,
/* ESCR fields */
if (caps & PMC_CAP_QUALIFIER)
- escrvalue |= a->pm_p4_escrconfig & P4_ESCR_EVENT_MASK_MASK;
+ escrvalue |= a->pm_md.pm_p4.pm_p4_escrconfig &
+ P4_ESCR_EVENT_MASK_MASK;
if (caps & PMC_CAP_TAGGING)
- escrvalue |= (a->pm_p4_escrconfig & P4_ESCR_TAG_VALUE_MASK) |
- P4_ESCR_TAG_ENABLE;
+ escrvalue |= (a->pm_md.pm_p4.pm_p4_escrconfig &
+ P4_ESCR_TAG_VALUE_MASK) | P4_ESCR_TAG_ENABLE;
if (caps & PMC_CAP_QUALIFIER)
- escrvalue |= (a->pm_p4_escrconfig & P4_ESCR_EVENT_MASK_MASK);
+ escrvalue |= (a->pm_md.pm_p4.pm_p4_escrconfig &
+ P4_ESCR_EVENT_MASK_MASK);
/* HTT: T0_{OS,USR} bits may get moved to T1 at pmc start */
tflags = 0;
@@ -1434,116 +1534,150 @@ p4_stop_pmc(int cpu, int ri)
* The hardware sets the CCCR_OVF whenever a counter overflow occurs, so the handler
* examines all the 18 CCCR registers, processing the counters that have overflowed.
*
- * On HTT machines, multiple logical CPUs may try to enter the NMI service
- * routine at the same time.
+ * On HTT machines, the CCCR register is shared and will interrupt
+ * both logical processors if so configured. Thus multiple logical
+ * CPUs could enter the NMI service routine at the same time. These
+ * will get serialized using a per-cpu spinlock dedicated for use in
+ * the NMI handler.
*/
-extern volatile lapic_t *lapic;
-
-static void
-p4_lapic_enable_pmc_interrupt(void)
-{
- uint32_t value;
-
- value = lapic->lvt_pcint;
- value &= ~APIC_LVT_M;
- lapic->lvt_pcint = value;
-}
-
-
static int
p4_intr(int cpu, uintptr_t eip, int usermode)
{
- int i, pmc_interrupted;
- uint32_t cccrval, pmi_ovf_mask;
+ int i, did_interrupt, error, ri;
+ uint32_t cccrval, ovf_mask, ovf_partner;
struct p4_cpu *pc;
struct pmc_hw *phw;
struct pmc *pm;
pmc_value_t v;
- (void) eip;
- (void) usermode;
- PMCDBG(MDP,INT, 1, "cpu=%d eip=%x pcint=0x%x", cpu, eip,
- lapic->lvt_pcint);
+ PMCDBG(MDP,INT, 1, "cpu=%d eip=%p um=%d", cpu, (void *) eip, usermode);
- pmc_interrupted = 0;
- pc = (struct p4_cpu *) pmc_pcpu[cpu];
+ pc = (struct p4_cpu *) pmc_pcpu[P4_TO_PHYSICAL_CPU(cpu)];
- pmi_ovf_mask = pmc_cpu_is_logical(cpu) ?
+ ovf_mask = pmc_cpu_is_logical(cpu) ?
P4_CCCR_OVF_PMI_T1 : P4_CCCR_OVF_PMI_T0;
- pmi_ovf_mask |= P4_CCCR_OVF;
+ ovf_mask |= P4_CCCR_OVF;
+ if (p4_system_has_htt)
+ ovf_partner = pmc_cpu_is_logical(cpu) ? P4_CCCR_OVF_PMI_T0 :
+ P4_CCCR_OVF_PMI_T1;
+ else
+ ovf_partner = 0;
+ did_interrupt = 0;
+
+ if (p4_system_has_htt)
+ P4_PCPU_ACQ_INTR_SPINLOCK(pc);
/*
- * Loop through all CCCRs, looking for ones that have the
- * OVF_PMI bit set for our logical CPU.
+ * Loop through all CCCRs, looking for ones that have
+ * interrupted this CPU.
*/
+ for (i = 0; i < P4_NPMCS-1; i++) {
+
+ ri = i + 1; /* row index */
- for (i = 1; i < P4_NPMCS; i++) {
- cccrval = rdmsr(P4_CCCR_MSR_FIRST + i - 1);
+ /*
+ * Check if our partner logical CPU has already marked
+ * this PMC has having interrupted it. If so, reset
+ * the flag and process the interrupt, but leave the
+ * hardware alone.
+ */
+ if (p4_system_has_htt && P4_PCPU_GET_INTRFLAG(pc,ri)) {
+ P4_PCPU_SET_INTRFLAG(pc,ri,0);
+ did_interrupt = 1;
- if ((cccrval & pmi_ovf_mask) != pmi_ovf_mask)
+ /*
+ * Ignore de-configured or stopped PMCs.
+ * Ignore PMCs not in sampling mode.
+ */
+ phw = pc->pc_hwpmcs[ri];
+ pm = phw->phw_pmc;
+ if (pm == NULL ||
+ pm->pm_state != PMC_STATE_RUNNING ||
+ !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) {
+ continue;
+ }
+ (void) pmc_process_interrupt(cpu, pm, eip, usermode);
continue;
+ }
- v = rdmsr(P4_PERFCTR_MSR_FIRST + i - 1);
+ /*
+ * Fresh interrupt. Look for the CCCR_OVF bit
+ * and the OVF_Tx bit for this logical
+ * processor being set.
+ */
+ cccrval = rdmsr(P4_CCCR_MSR_FIRST + i);
- pmc_interrupted = 1;
+ if ((cccrval & ovf_mask) != ovf_mask)
+ continue;
- PMCDBG(MDP,INT, 2, "ri=%d v=%jx", i, v);
+ /*
+ * If the other logical CPU would also have been
+ * interrupted due to the PMC being shared, record
+ * this fact in the per-cpu saved interrupt flag
+ * bitmask.
+ */
+ if (p4_system_has_htt && (cccrval & ovf_partner))
+ P4_PCPU_SET_INTRFLAG(pc, ri, 1);
+
+ v = rdmsr(P4_PERFCTR_MSR_FIRST + i);
+
+ PMCDBG(MDP,INT, 2, "ri=%d v=%jx", ri, v);
- /* Stop the counter, and turn off the overflow bit */
+ /* Stop the counter, and reset the overflow bit */
cccrval &= ~(P4_CCCR_OVF | P4_CCCR_ENABLE);
- wrmsr(P4_CCCR_MSR_FIRST + i - 1, cccrval);
+ wrmsr(P4_CCCR_MSR_FIRST + i, cccrval);
- phw = pc->pc_hwpmcs[i];
- pm = phw->phw_pmc;
+ did_interrupt = 1;
/*
- * Ignore de-configured or stopped PMCs.
- * Also ignore counting mode PMCs that may
- * have overflowed their counters.
+ * Ignore de-configured or stopped PMCs. Ignore PMCs
+ * not in sampling mode.
*/
+ phw = pc->pc_hwpmcs[ri];
+ pm = phw->phw_pmc;
+
if (pm == NULL ||
pm->pm_state != PMC_STATE_RUNNING ||
- !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
+ !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) {
continue;
+ }
/*
- * If the previous sample hasn't been read yet, the
- * sampling interrupt is coming in too fast for the
- * rest of the system to cope. Do not re-enable the
- * counter.
+ * Process the interrupt. Re-enable the PMC if
+ * processing was successful.
*/
-
- if (P4_PCPU_SAVED_IP(pc,i,cpu)) {
- atomic_add_int(&pmc_stats.pm_intr_ignored, 1);
- continue;
- }
+ error = pmc_process_interrupt(cpu, pm, eip, usermode);
/*
- * write the the reload count and restart the
- * hardware.
+ * Only the first processor executing the NMI handler
+ * in a HTT pair will restart a PMC, and that too
+ * only if there were no errors.
*/
-
v = P4_RELOAD_COUNT_TO_PERFCTR_VALUE(
pm->pm_sc.pm_reloadcount);
- wrmsr(P4_PERFCTR_MSR_FIRST + i - 1, v);
- wrmsr(P4_CCCR_MSR_FIRST + i - 1,
- cccrval | P4_CCCR_ENABLE);
+ wrmsr(P4_PERFCTR_MSR_FIRST + i, v);
+ if (error == 0)
+ wrmsr(P4_CCCR_MSR_FIRST + i,
+ cccrval | P4_CCCR_ENABLE);
}
- if (pmc_interrupted) {
+ /* allow the other CPU to proceed */
+ if (p4_system_has_htt)
+ P4_PCPU_REL_INTR_SPINLOCK(pc);
- /*
- * On Intel CPUs, the PMC 'pcint' entry in the LAPIC
- * gets masked when a PMC interrupts the CPU. We need
- * to unmask this.
- */
- p4_lapic_enable_pmc_interrupt();
+ /*
+ * On Intel P4 CPUs, the PMC 'pcint' entry in the LAPIC gets
+ * masked when a PMC interrupts the CPU. We need to unmask
+ * the interrupt source explicitly.
+ */
- /* XXX: Invoke helper (non-NMI) interrupt here */
- }
+ if (did_interrupt)
+ pmc_x86_lapic_enable_pmc_interrupt();
+ else
+ atomic_add_int(&pmc_stats.pm_intr_ignored, 1);
- return pmc_interrupted;
+ return did_interrupt;
}
/*
diff --git a/sys/dev/hwpmc/hwpmc_piv.h b/sys/dev/hwpmc/hwpmc_piv.h
new file mode 100644
index 0000000..0837b26
--- /dev/null
+++ b/sys/dev/hwpmc/hwpmc_piv.h
@@ -0,0 +1,124 @@
+/*-
+ * Copyright (c) 2005, Joseph Koshy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/* Machine dependent interfaces */
+
+#ifndef _DEV_HWPMC_PIV_H_
+#define _DEV_HWPMC_PIV_H_ 1
+
+/* Intel P4 PMCs */
+
+#define P4_NPMCS 19 /* 1 TSC + 18 PMCS */
+#define P4_NESCR 45
+#define P4_INVALID_PMC_INDEX -1
+#define P4_MAX_ESCR_PER_EVENT 2
+#define P4_MAX_PMC_PER_ESCR 3
+
+#define P4_CCCR_OVF (1 << 31)
+#define P4_CCCR_CASCADE (1 << 30)
+#define P4_CCCR_OVF_PMI_T1 (1 << 27)
+#define P4_CCCR_OVF_PMI_T0 (1 << 26)
+#define P4_CCCR_FORCE_OVF (1 << 25)
+#define P4_CCCR_EDGE (1 << 24)
+#define P4_CCCR_THRESHOLD_SHIFT 20
+#define P4_CCCR_THRESHOLD_MASK 0x00F00000
+#define P4_CCCR_TO_THRESHOLD(C) (((C) << P4_CCCR_THRESHOLD_SHIFT) & \
+ P4_CCCR_THRESHOLD_MASK)
+#define P4_CCCR_COMPLEMENT (1 << 19)
+#define P4_CCCR_COMPARE (1 << 18)
+#define P4_CCCR_ACTIVE_THREAD_SHIFT 16
+#define P4_CCCR_ACTIVE_THREAD_MASK 0x00030000
+#define P4_CCCR_TO_ACTIVE_THREAD(T) (((T) << P4_CCCR_ACTIVE_THREAD_SHIFT) & \
+ P4_CCCR_ACTIVE_THREAD_MASK)
+#define P4_CCCR_ESCR_SELECT_SHIFT 13
+#define P4_CCCR_ESCR_SELECT_MASK 0x0000E000
+#define P4_CCCR_TO_ESCR_SELECT(E) (((E) << P4_CCCR_ESCR_SELECT_SHIFT) & \
+ P4_CCCR_ESCR_SELECT_MASK)
+#define P4_CCCR_ENABLE (1 << 12)
+#define P4_CCCR_VALID_BITS (P4_CCCR_OVF | P4_CCCR_CASCADE | \
+ P4_CCCR_OVF_PMI_T1 | P4_CCCR_OVF_PMI_T0 | P4_CCCR_FORCE_OVF | \
+ P4_CCCR_EDGE | P4_CCCR_THRESHOLD_MASK | P4_CCCR_COMPLEMENT | \
+ P4_CCCR_COMPARE | P4_CCCR_ESCR_SELECT_MASK | P4_CCCR_ENABLE)
+
+#define P4_ESCR_EVENT_SELECT_SHIFT 25
+#define P4_ESCR_EVENT_SELECT_MASK 0x7E000000
+#define P4_ESCR_TO_EVENT_SELECT(E) (((E) << P4_ESCR_EVENT_SELECT_SHIFT) & \
+ P4_ESCR_EVENT_SELECT_MASK)
+#define P4_ESCR_EVENT_MASK_SHIFT 9
+#define P4_ESCR_EVENT_MASK_MASK 0x01FFFE00
+#define P4_ESCR_TO_EVENT_MASK(M) (((M) << P4_ESCR_EVENT_MASK_SHIFT) & \
+ P4_ESCR_EVENT_MASK_MASK)
+#define P4_ESCR_TAG_VALUE_SHIFT 5
+#define P4_ESCR_TAG_VALUE_MASK 0x000001E0
+#define P4_ESCR_TO_TAG_VALUE(T) (((T) << P4_ESCR_TAG_VALUE_SHIFT) & \
+ P4_ESCR_TAG_VALUE_MASK)
+#define P4_ESCR_TAG_ENABLE 0x00000010
+#define P4_ESCR_T0_OS 0x00000008
+#define P4_ESCR_T0_USR 0x00000004
+#define P4_ESCR_T1_OS 0x00000002
+#define P4_ESCR_T1_USR 0x00000001
+#define P4_ESCR_OS P4_ESCR_T0_OS
+#define P4_ESCR_USR P4_ESCR_T0_USR
+#define P4_ESCR_VALID_BITS (P4_ESCR_EVENT_SELECT_MASK | \
+ P4_ESCR_EVENT_MASK_MASK | P4_ESCR_TAG_VALUE_MASK | \
+ P4_ESCR_TAG_ENABLE | P4_ESCR_T0_OS | P4_ESCR_T0_USR | P4_ESCR_T1_OS \
+ P4_ESCR_T1_USR)
+
+#define P4_PERFCTR_MASK 0xFFFFFFFFFFLL /* 40 bits */
+#define P4_PERFCTR_OVERFLOWED(PMC) ((rdpmc(PMC) & (1LL << 39)) == 0)
+
+#define P4_CCCR_MSR_FIRST 0x360 /* MSR_BPU_CCCR0 */
+#define P4_PERFCTR_MSR_FIRST 0x300 /* MSR_BPU_COUNTER0 */
+
+#define P4_RELOAD_COUNT_TO_PERFCTR_VALUE(V) (1 - (V))
+#define P4_PERFCTR_VALUE_TO_RELOAD_COUNT(P) (1 - (P))
+
+struct pmc_md_p4_op_pmcallocate {
+ uint32_t pm_p4_cccrconfig;
+ uint32_t pm_p4_escrconfig;
+};
+
+#ifdef _KERNEL
+
+/* MD extension for 'struct pmc' */
+struct pmc_md_p4_pmc {
+ uint32_t pm_p4_cccrvalue;
+ uint32_t pm_p4_escrvalue;
+ uint32_t pm_p4_escr;
+ uint32_t pm_p4_escrmsr;
+};
+
+
+/*
+ * Prototypes
+ */
+
+int pmc_initialize_p4(struct pmc_mdep *); /* Pentium IV PMCs */
+
+#endif /* _KERNEL */
+#endif /* _MACHINE_PMC_MDEP_H */
diff --git a/sys/dev/hwpmc/hwpmc_powerpc.c b/sys/dev/hwpmc/hwpmc_powerpc.c
new file mode 100644
index 0000000..af905b6
--- /dev/null
+++ b/sys/dev/hwpmc/hwpmc_powerpc.c
@@ -0,0 +1,40 @@
+/*-
+ * Copyright (c) 2005, Joseph Koshy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/pmc.h>
+
+#include <machine/pmc_mdep.h>
+
+struct pmc_mdep *
+pmc_md_initialize()
+{
+ return NULL;
+}
diff --git a/sys/dev/hwpmc/hwpmc_ppro.c b/sys/dev/hwpmc/hwpmc_ppro.c
index 370e6e5..9c89c9e 100644
--- a/sys/dev/hwpmc/hwpmc_ppro.c
+++ b/sys/dev/hwpmc/hwpmc_ppro.c
@@ -35,11 +35,29 @@ __FBSDID("$FreeBSD$");
#include <sys/smp.h>
#include <sys/systm.h>
-#include <machine/cputypes.h>
+#include <machine/cpufunc.h>
#include <machine/md_var.h>
+#include <machine/pmc_mdep.h>
+#include <machine/specialreg.h>
/*
* PENTIUM PRO SUPPORT
+ *
+ * Quirks:
+ *
+ * - Both PMCs are enabled by a single bit P6_EVSEL_EN in performance
+ * counter '0'. This bit needs to be '1' if any of the two
+ * performance counters are in use. Perf counters can also be
+ * switched off by writing zeros to their EVSEL register.
+ *
+ * - While the width of these counters is 40 bits, we do not appear to
+ * have a way of writing 40 bits to the counter MSRs. A WRMSR
+ * instruction will sign extend bit 31 of the value being written to
+ * the perf counter -- a value of 0x80000000 written to an perf
+ * counter register will be sign extended to 0xFF80000000.
+ *
+ * This quirk primarily affects thread-mode PMCs in counting mode, as
+ * these PMCs read and write PMC registers at every context switch.
*/
struct p6pmc_descr {
@@ -269,16 +287,43 @@ p6_find_event(enum pmc_event ev)
* Per-CPU data structure for P6 class CPUs
*
* [common stuff]
+ * [flags for maintaining PMC start/stop state]
* [3 struct pmc_hw pointers]
* [3 struct pmc_hw structures]
*/
struct p6_cpu {
struct pmc_cpu pc_common;
+ uint32_t pc_state;
struct pmc_hw *pc_hwpmcs[P6_NPMCS];
struct pmc_hw pc_p6pmcs[P6_NPMCS];
};
+/*
+ * If CTR1 is active, we need to keep the 'EN' bit if CTR0 set,
+ * with the rest of CTR0 being zero'ed out.
+ */
+#define P6_SYNC_CTR_STATE(PC) do { \
+ uint32_t _config, _enable; \
+ _enable = 0; \
+ if ((PC)->pc_state & 0x02) \
+ _enable |= P6_EVSEL_EN; \
+ if ((PC)->pc_state & 0x01) \
+ _config = rdmsr(P6_MSR_EVSEL0) | \
+ P6_EVSEL_EN; \
+ else \
+ _config = 0; \
+ wrmsr(P6_MSR_EVSEL0, _config | _enable); \
+ } while (0)
+
+#define P6_MARK_STARTED(PC,RI) do { \
+ (PC)->pc_state |= (1 << ((RI)-1)); \
+ } while (0)
+
+#define P6_MARK_STOPPED(PC,RI) do { \
+ (PC)->pc_state &= ~(1<< ((RI)-1)); \
+ } while (0)
+
static int
p6_init(int cpu)
{
@@ -294,9 +339,6 @@ p6_init(int cpu)
MALLOC(pcs, struct p6_cpu *, sizeof(struct p6_cpu), M_PMC,
M_WAITOK|M_ZERO);
- if (pcs == NULL)
- return ENOMEM;
-
phw = pcs->pc_p6pmcs;
for (n = 0; n < P6_NPMCS; n++, phw++) {
@@ -377,12 +419,14 @@ p6_read_pmc(int cpu, int ri, pmc_value_t *v)
KASSERT(pm,
("[p6,%d] cpu %d ri %d pmc not configured", __LINE__, cpu, ri));
- if (pd->pm_descr.pd_class == PMC_CLASS_TSC)
+ if (pd->pm_descr.pd_class == PMC_CLASS_TSC) {
+ *v = rdtsc();
return 0;
+ }
- tmp = rdmsr(pd->pm_pmc_msr) & P6_PERFCTR_MASK;
+ tmp = rdmsr(pd->pm_pmc_msr) & P6_PERFCTR_READ_MASK;
if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
- *v = -tmp;
+ *v = P6_PERFCTR_VALUE_TO_RELOAD_COUNT(tmp);
else
*v = tmp;
@@ -413,9 +457,9 @@ p6_write_pmc(int cpu, int ri, pmc_value_t v)
pd->pm_pmc_msr, v);
if (PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm)))
- v = -v;
+ v = P6_RELOAD_COUNT_TO_PERFCTR_VALUE(v);
- wrmsr(pd->pm_pmc_msr, v & P6_PERFCTR_MASK);
+ wrmsr(pd->pm_pmc_msr, v & P6_PERFCTR_WRITE_MASK);
return 0;
}
@@ -518,7 +562,7 @@ p6_allocate_pmc(int cpu, int ri, struct pmc *pm,
} else
allowed_unitmask = P6_EVSEL_TO_UMASK(pevent->pm_unitmask);
- unitmask = a->pm_p6_config & P6_EVSEL_UMASK_MASK;
+ unitmask = a->pm_md.pm_ppro.pm_ppro_config & P6_EVSEL_UMASK_MASK;
if (unitmask & ~allowed_unitmask) /* disallow reserved bits */
return EINVAL;
@@ -533,7 +577,8 @@ p6_allocate_pmc(int cpu, int ri, struct pmc *pm,
config |= unitmask;
if (caps & PMC_CAP_THRESHOLD)
- config |= a->pm_p6_config & P6_EVSEL_CMASK_MASK;
+ config |= a->pm_md.pm_ppro.pm_ppro_config &
+ P6_EVSEL_CMASK_MASK;
/* set at least one of the 'usr' or 'os' caps */
if (caps & PMC_CAP_USER)
@@ -550,7 +595,7 @@ p6_allocate_pmc(int cpu, int ri, struct pmc *pm,
if (caps & PMC_CAP_INTERRUPT)
config |= P6_EVSEL_INT;
- pm->pm_md.pm_p6.pm_p6_evsel = config;
+ pm->pm_md.pm_ppro.pm_ppro_evsel = config;
PMCDBG(MDP,ALL,2, "p6-allocate config=0x%x", config);
@@ -584,6 +629,7 @@ p6_start_pmc(int cpu, int ri)
{
uint32_t config;
struct pmc *pm;
+ struct p6_cpu *pc;
struct pmc_hw *phw;
const struct p6pmc_descr *pd;
@@ -592,7 +638,8 @@ p6_start_pmc(int cpu, int ri)
KASSERT(ri >= 0 && ri < P6_NPMCS,
("[p6,%d] illegal row-index %d", __LINE__, ri));
- phw = pmc_pcpu[cpu]->pc_hwpmcs[ri];
+ pc = (struct p6_cpu *) pmc_pcpu[cpu];
+ phw = pc->pc_common.pc_hwpmcs[ri];
pm = phw->phw_pmc;
pd = &p6_pmcdesc[ri];
@@ -609,25 +656,24 @@ p6_start_pmc(int cpu, int ri)
("[p6,%d] unknown PMC class %d", __LINE__,
pd->pm_descr.pd_class));
- config = pm->pm_md.pm_p6.pm_p6_evsel;
+ config = pm->pm_md.pm_ppro.pm_ppro_evsel;
PMCDBG(MDP,STA,2, "p6-start/2 cpu=%d ri=%d evselmsr=0x%x config=0x%x",
cpu, ri, pd->pm_evsel_msr, config);
- if (pd->pm_evsel_msr == P6_MSR_EVSEL0) /* CTR 0 */
- wrmsr(pd->pm_evsel_msr, config | P6_EVSEL_EN);
- else { /* CTR1 shares the enable bit CTR 0 */
- wrmsr(pd->pm_evsel_msr, config);
- wrmsr(P6_MSR_EVSEL0, rdmsr(P6_MSR_EVSEL0) | P6_EVSEL_EN);
- }
+ P6_MARK_STARTED(pc, ri);
+ wrmsr(pd->pm_evsel_msr, config);
+
+ P6_SYNC_CTR_STATE(pc);
+
return 0;
}
static int
p6_stop_pmc(int cpu, int ri)
{
- uint32_t config;
struct pmc *pm;
+ struct p6_cpu *pc;
struct pmc_hw *phw;
struct p6pmc_descr *pd;
@@ -636,7 +682,8 @@ p6_stop_pmc(int cpu, int ri)
KASSERT(ri >= 0 && ri < P6_NPMCS,
("[p6,%d] illegal row index %d", __LINE__, ri));
- phw = pmc_pcpu[cpu]->pc_hwpmcs[ri];
+ pc = (struct p6_cpu *) pmc_pcpu[cpu];
+ phw = pc->pc_common.pc_hwpmcs[ri];
pm = phw->phw_pmc;
pd = &p6_pmcdesc[ri];
@@ -653,30 +700,75 @@ p6_stop_pmc(int cpu, int ri)
PMCDBG(MDP,STO,1, "p6-stop cpu=%d ri=%d", cpu, ri);
- /*
- * If CTR0 is being turned off but CTR1 is active, we need
- * leave CTR0's EN field set. If CTR1 is being stopped, it
- * suffices to zero its EVSEL register.
- */
+ wrmsr(pd->pm_evsel_msr, 0); /* stop hw */
+ P6_MARK_STOPPED(pc, ri); /* update software state */
- if (ri == 1 &&
- pmc_pcpu[cpu]->pc_hwpmcs[2]->phw_pmc != NULL)
- config = P6_EVSEL_EN;
- else
- config = 0;
- wrmsr(pd->pm_evsel_msr, config);
+ P6_SYNC_CTR_STATE(pc); /* restart CTR1 if need be */
- PMCDBG(MDP,STO,2, "p6-stop/2 cpu=%d ri=%d config=0x%x", cpu, ri,
- config);
+ PMCDBG(MDP,STO,2, "p6-stop/2 cpu=%d ri=%d", cpu, ri);
return 0;
}
static int
p6_intr(int cpu, uintptr_t eip, int usermode)
{
- (void) cpu;
- (void) eip;
- return 0;
+ int i, error, retval, ri;
+ uint32_t perf0cfg;
+ struct pmc *pm;
+ struct p6_cpu *pc;
+ struct pmc_hw *phw;
+ pmc_value_t v;
+
+ KASSERT(cpu >= 0 && cpu < mp_ncpus,
+ ("[p6,%d] CPU %d out of range", __LINE__, cpu));
+
+ retval = 0;
+ pc = (struct p6_cpu *) pmc_pcpu[cpu];
+
+ /* stop both PMCs */
+ perf0cfg = rdmsr(P6_MSR_EVSEL0);
+ wrmsr(P6_MSR_EVSEL0, perf0cfg & ~P6_EVSEL_EN);
+
+ for (i = 0; i < P6_NPMCS-1; i++) {
+ ri = i + 1;
+
+ if (!P6_PMC_HAS_OVERFLOWED(i))
+ continue;
+
+ phw = pc->pc_common.pc_hwpmcs[ri];
+
+ if ((pm = phw->phw_pmc) == NULL ||
+ pm->pm_state != PMC_STATE_RUNNING ||
+ !PMC_IS_SAMPLING_MODE(PMC_TO_MODE(pm))) {
+ continue;
+ }
+
+ retval = 1;
+
+ error = pmc_process_interrupt(cpu, pm, eip, usermode);
+ if (error)
+ P6_MARK_STOPPED(pc,ri);
+
+ /* reload sampling count */
+ v = pm->pm_sc.pm_reloadcount;
+ wrmsr(P6_MSR_PERFCTR0 + i,
+ P6_RELOAD_COUNT_TO_PERFCTR_VALUE(v));
+
+ }
+
+ /*
+ * On P6 processors, the LAPIC needs to have its PMC interrupt
+ * unmasked after a PMC interrupt.
+ */
+ if (retval)
+ pmc_x86_lapic_enable_pmc_interrupt();
+ else
+ atomic_add_int(&pmc_stats.pm_intr_ignored, 1);
+
+ /* restart counters that can be restarted */
+ P6_SYNC_CTR_STATE(pc);
+
+ return retval;
}
static int
diff --git a/sys/dev/hwpmc/hwpmc_ppro.h b/sys/dev/hwpmc/hwpmc_ppro.h
new file mode 100644
index 0000000..f750735
--- /dev/null
+++ b/sys/dev/hwpmc/hwpmc_ppro.h
@@ -0,0 +1,83 @@
+/*-
+ * Copyright (c) 2005, Joseph Koshy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+/* Machine dependent interfaces */
+
+#ifndef _DEV_HWPMC_PPRO_H_
+#define _DEV_HWPMC_PPRO_H_
+
+/* Intel PPro, Celeron, P-II, P-III, Pentium-M PMCS */
+
+#define P6_NPMCS 3 /* 1 TSC + 2 PMCs */
+
+#define P6_EVSEL_CMASK_MASK 0xFF000000
+#define P6_EVSEL_TO_CMASK(C) (((C) & 0xFF) << 24)
+#define P6_EVSEL_INV (1 << 23)
+#define P6_EVSEL_EN (1 << 22)
+#define P6_EVSEL_INT (1 << 20)
+#define P6_EVSEL_PC (1 << 19)
+#define P6_EVSEL_E (1 << 18)
+#define P6_EVSEL_OS (1 << 17)
+#define P6_EVSEL_USR (1 << 16)
+#define P6_EVSEL_UMASK_MASK 0x0000FF00
+#define P6_EVSEL_TO_UMASK(U) (((U) & 0xFF) << 8)
+#define P6_EVSEL_EVENT_SELECT(ES) ((ES) & 0xFF)
+#define P6_EVSEL_RESERVED (1 << 21)
+
+#define P6_MSR_EVSEL0 0x0186
+#define P6_MSR_EVSEL1 0x0187
+#define P6_MSR_PERFCTR0 0x00C1
+#define P6_MSR_PERFCTR1 0x00C2
+
+#define P6_PERFCTR_READ_MASK 0xFFFFFFFFFFLL /* 40 bits */
+#define P6_PERFCTR_WRITE_MASK 0xFFFFFFFFU /* 32 bits */
+
+#define P6_RELOAD_COUNT_TO_PERFCTR_VALUE(R) (-(R))
+#define P6_PERFCTR_VALUE_TO_RELOAD_COUNT(P) (-(P))
+
+#define P6_PMC_HAS_OVERFLOWED(P) ((rdpmc(P) & (1LL << 39)) == 0)
+
+struct pmc_md_ppro_op_pmcallocate {
+ uint32_t pm_ppro_config;
+};
+
+#ifdef _KERNEL
+
+/* MD extension for 'struct pmc' */
+struct pmc_md_ppro_pmc {
+ uint32_t pm_ppro_evsel;
+};
+
+/*
+ * Prototypes
+ */
+
+int pmc_initialize_p6(struct pmc_mdep *); /* Pentium Pro PMCs */
+
+#endif /* _KERNEL */
+#endif /* _DEV_HWPMC_PPRO_H_ */
diff --git a/sys/dev/hwpmc/hwpmc_sparc64.c b/sys/dev/hwpmc/hwpmc_sparc64.c
new file mode 100644
index 0000000..af905b6
--- /dev/null
+++ b/sys/dev/hwpmc/hwpmc_sparc64.c
@@ -0,0 +1,40 @@
+/*-
+ * Copyright (c) 2005, Joseph Koshy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ */
+
+#include <sys/cdefs.h>
+__FBSDID("$FreeBSD$");
+
+#include <sys/param.h>
+#include <sys/pmc.h>
+
+#include <machine/pmc_mdep.h>
+
+struct pmc_mdep *
+pmc_md_initialize()
+{
+ return NULL;
+}
diff --git a/sys/dev/hwpmc/hwpmc_intel.c b/sys/dev/hwpmc/hwpmc_x86.c
index ea2c6c8..5256a1a 100644
--- a/sys/dev/hwpmc/hwpmc_intel.c
+++ b/sys/dev/hwpmc/hwpmc_x86.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2003-2005 Joseph Koshy
+ * Copyright (c) 2005, Joseph Koshy
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -28,17 +28,28 @@
__FBSDID("$FreeBSD$");
#include <sys/param.h>
-#include <sys/lock.h>
-#include <sys/mutex.h>
+#include <sys/bus.h>
#include <sys/pmc.h>
-#include <sys/pmckern.h>
-#include <sys/smp.h>
#include <sys/systm.h>
-#include <machine/cputypes.h>
+#include <machine/apicreg.h>
+#include <machine/pmc_mdep.h>
#include <machine/md_var.h>
-struct pmc_mdep *
+extern volatile lapic_t *lapic;
+
+void
+pmc_x86_lapic_enable_pmc_interrupt(void)
+{
+ uint32_t value;
+
+ value = lapic->lvt_pcint;
+ value &= ~APIC_LVT_M;
+ lapic->lvt_pcint = value;
+}
+
+
+static struct pmc_mdep *
pmc_intel_initialize(void)
{
struct pmc_mdep *pmc_mdep;
@@ -53,6 +64,7 @@ pmc_intel_initialize(void)
cputype = -1;
switch (cpu_id & 0xF00) {
+#if defined(__i386__)
case 0x500: /* Pentium family processors */
cputype = PMC_CPU_INTEL_P5;
break;
@@ -75,12 +87,15 @@ pmc_intel_initialize(void)
break;
}
break;
+#endif
+#if defined(__i386__) || defined(__amd64__)
case 0xF00: /* P4 */
model = ((cpu_id & 0xF0000) >> 12) | ((cpu_id & 0xF0) >> 4);
if (model >= 0 && model <= 3) /* known models */
cputype = PMC_CPU_INTEL_PIV;
break;
}
+#endif
if ((int) cputype == -1) {
printf("pmc: Unknown Intel CPU.\n");
@@ -101,14 +116,18 @@ pmc_intel_initialize(void)
switch (cputype) {
+#if defined(__i386__) || defined(__amd64__)
+
/*
- * Intel Pentium 4 Processors
+ * Intel Pentium 4 Processors, and P4/EMT64 processors.
*/
case PMC_CPU_INTEL_PIV:
error = pmc_initialize_p4(pmc_mdep);
break;
+#endif
+#if defined(__i386__)
/*
* P6 Family Processors
*/
@@ -129,6 +148,7 @@ pmc_intel_initialize(void)
case PMC_CPU_INTEL_P5:
error = pmc_initialize_p5(pmc_mdep);
break;
+#endif
default:
KASSERT(0,("[intel,%d] Unknown CPU type", __LINE__));
@@ -141,3 +161,19 @@ pmc_intel_initialize(void)
return pmc_mdep;
}
+
+
+/*
+ * Machine dependent initialization for x86 class platforms.
+ */
+
+struct pmc_mdep *
+pmc_md_initialize()
+{
+ /* determine the CPU kind */
+ if (strcmp(cpu_vendor, "AuthenticAMD") == 0)
+ return pmc_amd_initialize();
+ else if (strcmp(cpu_vendor, "GenuineIntel") == 0)
+ return pmc_intel_initialize();
+ return NULL;
+}
diff --git a/sys/dev/hwpmc/pmc_events.h b/sys/dev/hwpmc/pmc_events.h
new file mode 100644
index 0000000..23fc9dc
--- /dev/null
+++ b/sys/dev/hwpmc/pmc_events.h
@@ -0,0 +1,530 @@
+/*-
+ * Copyright (c) 2005 Joseph Koshy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _DEV_HWPMC_PMC_EVENTS_H_
+#define _DEV_HWPMC_PMC_EVENTS_H_
+
+/*
+ * PMC event codes.
+ *
+ * __PMC_EV(CLASS, SYMBOLIC-NAME, VALUE, READABLE-NAME)
+ *
+ */
+
+/*
+ * AMD K7 Events, from "The AMD Athlon(tm) Processor x86 Code
+ * Optimization Guide" [Doc#22007K, Feb 2002]
+ */
+
+#define __PMC_EV_K7() \
+__PMC_EV(K7, DC_ACCESSES, k7-dc-accesses) \
+__PMC_EV(K7, DC_MISSES, k7-dc-misses) \
+__PMC_EV(K7, DC_REFILLS_FROM_L2, k7-dc-refills-from-l2) \
+__PMC_EV(K7, DC_REFILLS_FROM_SYSTEM, k7-dc-refills-from-system) \
+__PMC_EV(K7, DC_WRITEBACKS, k7-dc-writebacks) \
+__PMC_EV(K7, L1_DTLB_MISS_AND_L2_DTLB_HITS, \
+ k7-l1-dtlb-miss-and-l2-dtlb-hits) \
+__PMC_EV(K7, L1_AND_L2_DTLB_MISSES, k7-l1-and-l2-dtlb-misses) \
+__PMC_EV(K7, MISALIGNED_REFERENCES, k7-misaligned-references) \
+__PMC_EV(K7, IC_FETCHES, k7-ic-fetches) \
+__PMC_EV(K7, IC_MISSES, k7-ic-misses) \
+__PMC_EV(K7, L1_ITLB_MISSES, k7-l1-itlb-misses) \
+__PMC_EV(K7, L1_L2_ITLB_MISSES, k7-l1-l2-itlb-misses) \
+__PMC_EV(K7, RETIRED_INSTRUCTIONS, k7-retired-instructions) \
+__PMC_EV(K7, RETIRED_OPS, k7-retired-ops) \
+__PMC_EV(K7, RETIRED_BRANCHES, k7-retired-branches) \
+__PMC_EV(K7, RETIRED_BRANCHES_MISPREDICTED, \
+ k7-retired-branches-mispredicted) \
+__PMC_EV(K7, RETIRED_TAKEN_BRANCHES, k7-retired-taken-branches) \
+__PMC_EV(K7, RETIRED_TAKEN_BRANCHES_MISPREDICTED, \
+ k7-retired-taken-branches-mispredicted) \
+__PMC_EV(K7, RETIRED_FAR_CONTROL_TRANSFERS, \
+ k7-retired-far-control-transfers) \
+__PMC_EV(K7, RETIRED_RESYNC_BRANCHES, k7-retired-resync-branches) \
+__PMC_EV(K7, INTERRUPTS_MASKED_CYCLES, k7-interrupts-masked-cycles) \
+__PMC_EV(K7, INTERRUPTS_MASKED_WHILE_PENDING_CYCLES, \
+ k7-interrupts-masked-while-pending-cycles) \
+__PMC_EV(K7, HARDWARE_INTERRUPTS, k7-hardware-interrupts)
+
+#define PMC_EV_K7_FIRST PMC_EV_K7_DC_ACCESSES
+#define PMC_EV_K7_LAST PMC_EV_K7_HARDWARE_INTERRUPTS
+
+/*
+ * Intel P4 Events, from "IA-32 Intel(r) Architecture Software
+ * Developer's Manual, Volume 3: System Programming Guide" [245472-012]
+ */
+
+#define __PMC_EV_P4() \
+__PMC_EV(P4, TC_DELIVER_MODE, p4-tc-deliver-mode) \
+__PMC_EV(P4, BPU_FETCH_REQUEST, p4-bpu-fetch-request) \
+__PMC_EV(P4, ITLB_REFERENCE, p4-itlb-reference) \
+__PMC_EV(P4, MEMORY_CANCEL, p4-memory-cancel) \
+__PMC_EV(P4, MEMORY_COMPLETE, p4-memory-complete) \
+__PMC_EV(P4, LOAD_PORT_REPLAY, p4-load-port-replay) \
+__PMC_EV(P4, STORE_PORT_REPLAY, p4-store-port-replay) \
+__PMC_EV(P4, MOB_LOAD_REPLAY, p4-mob-load-replay) \
+__PMC_EV(P4, PAGE_WALK_TYPE, p4-page-walk-type) \
+__PMC_EV(P4, BSQ_CACHE_REFERENCE, p4-bsq-cache-reference) \
+__PMC_EV(P4, IOQ_ALLOCATION, p4-ioq-allocation) \
+__PMC_EV(P4, IOQ_ACTIVE_ENTRIES, p4-ioq-active-entries) \
+__PMC_EV(P4, FSB_DATA_ACTIVITY, p4-fsb-data-activity) \
+__PMC_EV(P4, BSQ_ALLOCATION, p4-bsq-allocation) \
+__PMC_EV(P4, BSQ_ACTIVE_ENTRIES, p4-bsq-active-entries) \
+__PMC_EV(P4, SSE_INPUT_ASSIST, p4-sse-input-assist) \
+__PMC_EV(P4, PACKED_SP_UOP, p4-packed-sp-uop) \
+__PMC_EV(P4, PACKED_DP_UOP, p4-packed-dp-uop) \
+__PMC_EV(P4, SCALAR_SP_UOP, p4-scalar-sp-uop) \
+__PMC_EV(P4, SCALAR_DP_UOP, p4-scalar-dp-uop) \
+__PMC_EV(P4, 64BIT_MMX_UOP, p4-64bit-mmx-uop) \
+__PMC_EV(P4, 128BIT_MMX_UOP, p4-128bit-mmx-uop) \
+__PMC_EV(P4, X87_FP_UOP, p4-x87-fp-uop) \
+__PMC_EV(P4, X87_SIMD_MOVES_UOP, p4-x87-simd-moves-uop) \
+__PMC_EV(P4, GLOBAL_POWER_EVENTS, p4-global-power-events) \
+__PMC_EV(P4, TC_MS_XFER, p4-tc-ms-xfer) \
+__PMC_EV(P4, UOP_QUEUE_WRITES, p4-uop-queue-writes) \
+__PMC_EV(P4, RETIRED_MISPRED_BRANCH_TYPE, \
+ p4-retired-mispred-branch-type) \
+__PMC_EV(P4, RETIRED_BRANCH_TYPE, p4-retired-branch-type) \
+__PMC_EV(P4, RESOURCE_STALL, p4-resource-stall) \
+__PMC_EV(P4, WC_BUFFER, p4-wc-buffer) \
+__PMC_EV(P4, B2B_CYCLES, p4-b2b-cycles) \
+__PMC_EV(P4, BNR, p4-bnr) \
+__PMC_EV(P4, SNOOP, p4-snoop) \
+__PMC_EV(P4, RESPONSE, p4-response) \
+__PMC_EV(P4, FRONT_END_EVENT, p4-front-end-event) \
+__PMC_EV(P4, EXECUTION_EVENT, p4-execution-event) \
+__PMC_EV(P4, REPLAY_EVENT, p4-replay-event) \
+__PMC_EV(P4, INSTR_RETIRED, p4-instr-retired) \
+__PMC_EV(P4, UOPS_RETIRED, p4-uops-retired) \
+__PMC_EV(P4, UOP_TYPE, p4-uop-type) \
+__PMC_EV(P4, BRANCH_RETIRED, p4-branch-retired) \
+__PMC_EV(P4, MISPRED_BRANCH_RETIRED, p4-mispred-branch-retired) \
+__PMC_EV(P4, X87_ASSIST, p4-x87-assist) \
+__PMC_EV(P4, MACHINE_CLEAR, p4-machine-clear)
+
+#define PMC_EV_P4_FIRST PMC_EV_P4_TC_DELIVER_MODE
+#define PMC_EV_P4_LAST PMC_EV_P4_MACHINE_CLEAR
+
+/* Intel Pentium Pro, P-II, P-III and Pentium-M style events */
+
+#define __PMC_EV_P6() \
+__PMC_EV(P6, DATA_MEM_REFS, p6-data-mem-refs) \
+__PMC_EV(P6, DCU_LINES_IN, p6-dcu-lines-in) \
+__PMC_EV(P6, DCU_M_LINES_IN, p6-dcu-m-lines-in) \
+__PMC_EV(P6, DCU_M_LINES_OUT, p6-dcu-m-lines-out) \
+__PMC_EV(P6, DCU_MISS_OUTSTANDING, p6-dcu-miss-outstanding) \
+__PMC_EV(P6, IFU_FETCH, p6-ifu-fetch) \
+__PMC_EV(P6, IFU_FETCH_MISS, p6-ifu-fetch-miss) \
+__PMC_EV(P6, ITLB_MISS, p6-itlb-miss) \
+__PMC_EV(P6, IFU_MEM_STALL, p6-ifu-mem-stall) \
+__PMC_EV(P6, ILD_STALL, p6-ild-stall) \
+__PMC_EV(P6, L2_IFETCH, p6-l2-ifetch) \
+__PMC_EV(P6, L2_LD, p6-l2-ld) \
+__PMC_EV(P6, L2_ST, p6-l2-st) \
+__PMC_EV(P6, L2_LINES_IN, p6-l2-lines-in) \
+__PMC_EV(P6, L2_LINES_OUT, p6-l2-lines-out) \
+__PMC_EV(P6, L2_M_LINES_INM, p6-l2-m-lines-inm) \
+__PMC_EV(P6, L2_M_LINES_OUTM, p6-l2-m-lines-outm) \
+__PMC_EV(P6, L2_RQSTS, p6-l2-rqsts) \
+__PMC_EV(P6, L2_ADS, p6-l2-ads) \
+__PMC_EV(P6, L2_DBUS_BUSY, p6-l2-dbus-busy) \
+__PMC_EV(P6, L2_DBUS_BUSY_RD, p6-l2-dbus-busy-rd) \
+__PMC_EV(P6, BUS_DRDY_CLOCKS, p6-bus-drdy-clocks) \
+__PMC_EV(P6, BUS_LOCK_CLOCKS, p6-bus-lock-clocks) \
+__PMC_EV(P6, BUS_REQ_OUTSTANDING, p6-bus-req-outstanding) \
+__PMC_EV(P6, BUS_TRAN_BRD, p6-bus-tran-brd) \
+__PMC_EV(P6, BUS_TRAN_RFO, p6-bus-tran-rfo) \
+__PMC_EV(P6, BUS_TRANS_WB, p6-bus-trans-wb) \
+__PMC_EV(P6, BUS_TRAN_IFETCH, p6-bus-tran-ifetch) \
+__PMC_EV(P6, BUS_TRAN_INVAL, p6-bus-tran-inval) \
+__PMC_EV(P6, BUS_TRAN_PWR, p6-bus-tran-pwr) \
+__PMC_EV(P6, BUS_TRANS_P, p6-bus-trans-p) \
+__PMC_EV(P6, BUS_TRANS_IO, p6-bus-trans-io) \
+__PMC_EV(P6, BUS_TRAN_DEF, p6-bus-tran-def) \
+__PMC_EV(P6, BUS_TRAN_BURST, p6-bus-tran-burst) \
+__PMC_EV(P6, BUS_TRAN_ANY, p6-bus-tran-any) \
+__PMC_EV(P6, BUS_TRAN_MEM, p6-bus-tran-mem) \
+__PMC_EV(P6, BUS_DATA_RCV, p6-bus-data-rcv) \
+__PMC_EV(P6, BUS_BNR_DRV, p6-bus-bnr-drv) \
+__PMC_EV(P6, BUS_HIT_DRV, p6-bus-hit-drv) \
+__PMC_EV(P6, BUS_HITM_DRV, p6-bus-hitm-drv) \
+__PMC_EV(P6, BUS_SNOOP_STALL, p6-bus-snoop-stall) \
+__PMC_EV(P6, FLOPS, p6-flops) \
+__PMC_EV(P6, FP_COMPS_OPS_EXE, p6-fp-comps-ops-exe) \
+__PMC_EV(P6, FP_ASSIST, p6-fp-assist) \
+__PMC_EV(P6, MUL, p6-mul) \
+__PMC_EV(P6, DIV, p6-div) \
+__PMC_EV(P6, CYCLES_DIV_BUSY, p6-cycles-div-busy) \
+__PMC_EV(P6, LD_BLOCKS, p6-ld-blocks) \
+__PMC_EV(P6, SB_DRAINS, p6-sb-drains) \
+__PMC_EV(P6, MISALIGN_MEM_REF, p6-misalign-mem-ref) \
+__PMC_EV(P6, EMON_KNI_PREF_DISPATCHED, p6-emon-kni-pref-dispatched) \
+__PMC_EV(P6, EMON_KNI_PREF_MISS, p6-emon-kni-pref-miss) \
+__PMC_EV(P6, INST_RETIRED, p6-inst-retired) \
+__PMC_EV(P6, UOPS_RETIRED, p6-uops-retired) \
+__PMC_EV(P6, INST_DECODED, p6-inst-decoded) \
+__PMC_EV(P6, EMON_KNI_INST_RETIRED, p6-emon-kni-inst-retired) \
+__PMC_EV(P6, EMON_KNI_COMP_INST_RET, p6-emon-kni-comp-inst-ret) \
+__PMC_EV(P6, HW_INT_RX, p6-hw-int-rx) \
+__PMC_EV(P6, CYCLES_INT_MASKED, p6-cycles-int-masked) \
+__PMC_EV(P6, CYCLES_INT_PENDING_AND_MASKED, \
+ p6-cycles-in-pending-and-masked) \
+__PMC_EV(P6, BR_INST_RETIRED, p6-br-inst-retired) \
+__PMC_EV(P6, BR_MISS_PRED_RETIRED, p6-br-miss-pred-retired) \
+__PMC_EV(P6, BR_TAKEN_RETIRED, p6-br-taken-retired) \
+__PMC_EV(P6, BR_MISS_PRED_TAKEN_RET, p6-br-miss-pred-taken-ret) \
+__PMC_EV(P6, BR_INST_DECODED, p6-br-inst-decoded) \
+__PMC_EV(P6, BTB_MISSES, p6-btb-misses) \
+__PMC_EV(P6, BR_BOGUS, p6-br-bogus) \
+__PMC_EV(P6, BACLEARS, p6-baclears) \
+__PMC_EV(P6, RESOURCE_STALLS, p6-resource-stalls) \
+__PMC_EV(P6, PARTIAL_RAT_STALLS, p6-partial-rat-stalls) \
+__PMC_EV(P6, SEGMENT_REG_LOADS, p6-segment-reg-loads) \
+__PMC_EV(P6, CPU_CLK_UNHALTED, p6-cpu-clk-unhalted) \
+__PMC_EV(P6, MMX_INSTR_EXEC, p6-mmx-instr-exec) \
+__PMC_EV(P6, MMX_SAT_INSTR_EXEC, p6-mmx-sat-instr-exec) \
+__PMC_EV(P6, MMX_UOPS_EXEC, p6-mmx-uops-exec) \
+__PMC_EV(P6, MMX_INSTR_TYPE_EXEC, p6-mmx-instr-type-exec) \
+__PMC_EV(P6, FP_MMX_TRANS, p6-fp-mmx-trans) \
+__PMC_EV(P6, MMX_ASSIST, p6-mmx-assist) \
+__PMC_EV(P6, MMX_INSTR_RET, p6-mmx-instr-ret) \
+__PMC_EV(P6, SEG_RENAME_STALLS, p6-seg-rename-stalls) \
+__PMC_EV(P6, SEG_REG_RENAMES, p6-seg-reg-renames) \
+__PMC_EV(P6, RET_SEG_RENAMES, p6-ret-seg-renames) \
+__PMC_EV(P6, EMON_EST_TRANS, p6-emon-est-trans) \
+__PMC_EV(P6, EMON_THERMAL_TRIP, p6-emon-thermal-trip) \
+__PMC_EV(P6, BR_INST_EXEC, p6-br-inst-exec) \
+__PMC_EV(P6, BR_MISSP_EXEC, p6-br-missp-exec) \
+__PMC_EV(P6, BR_BAC_MISSP_EXEC, p6-br-bac-missp-exec) \
+__PMC_EV(P6, BR_CND_EXEC, p6-br-cnd-exec) \
+__PMC_EV(P6, BR_CND_MISSP_EXEC, p6-br-cnd-missp-exec) \
+__PMC_EV(P6, BR_IND_EXEC, p6-br-ind-exec) \
+__PMC_EV(P6, BR_IND_MISSP_EXEC, p6-br-ind-missp-exec) \
+__PMC_EV(P6, BR_RET_EXEC, p6-br-ret-exec) \
+__PMC_EV(P6, BR_RET_MISSP_EXEC, p6-br-ret-missp-exec) \
+__PMC_EV(P6, BR_RET_BAC_MISSP_EXEC, p6-br-ret-bac-missp-exec) \
+__PMC_EV(P6, BR_CALL_EXEC, p6-br-call-exec) \
+__PMC_EV(P6, BR_CALL_MISSP_EXEC, p6-br-call-missp-exec) \
+__PMC_EV(P6, BR_IND_CALL_EXEC, p6-br-ind-call-exec) \
+__PMC_EV(P6, EMON_SIMD_INSTR_RETIRED, p6-emon-simd-instr-retired) \
+__PMC_EV(P6, EMON_SYNCH_UOPS, p6-emon-synch-uops) \
+__PMC_EV(P6, EMON_ESP_UOPS, p6-emon-esp-uops) \
+__PMC_EV(P6, EMON_FUSED_UOPS_RET, p6-emon-fused-uops-ret) \
+__PMC_EV(P6, EMON_UNFUSION, p6-emon-unfusion) \
+__PMC_EV(P6, EMON_PREF_RQSTS_UP, p6-emon-pref-rqsts-up) \
+__PMC_EV(P6, EMON_PREF_RQSTS_DN, p6-emon-pref-rqsts-dn) \
+__PMC_EV(P6, EMON_SSE_SSE2_INST_RETIRED, \
+ p6-emon-sse-sse2-inst-retired) \
+__PMC_EV(P6, EMON_SSE_SSE2_COMP_INST_RETIRED, \
+ p6-emon-sse-sse2-comp-inst-retired)
+
+
+#define PMC_EV_P6_FIRST PMC_EV_P6_DATA_MEM_REFS
+#define PMC_EV_P6_LAST PMC_EV_P6_EMON_SSE_SSE2_COMP_INST_RETIRED
+
+/* AMD K8 PMCs */
+
+#define __PMC_EV_K8() \
+__PMC_EV(K8, FP_DISPATCHED_FPU_OPS, k8-fp-dispatched-fpu-ops) \
+__PMC_EV(K8, FP_CYCLES_WITH_NO_FPU_OPS_RETIRED, \
+ k8-fp-cycles-with-no-fpu-ops-retired) \
+__PMC_EV(K8, FP_DISPATCHED_FPU_FAST_FLAG_OPS, \
+ k8-fp-dispatched-fpu-fast-flag-ops) \
+__PMC_EV(K8, LS_SEGMENT_REGISTER_LOAD, k8-ls-segment-register-load) \
+__PMC_EV(K8, LS_MICROARCHITECTURAL_RESYNC_BY_SELF_MODIFYING_CODE, \
+ k8-ls-microarchitectural-resync-by-self-modifying-code) \
+__PMC_EV(K8, LS_MICROARCHITECTURAL_RESYNC_BY_SNOOP, \
+ k8-ls-microarchitectural-resync-by-snoop) \
+__PMC_EV(K8, LS_BUFFER2_FULL, k8-ls-buffer2-full) \
+__PMC_EV(K8, LS_LOCKED_OPERATION, k8-ls-locked-operation) \
+__PMC_EV(K8, LS_MICROARCHITECTURAL_LATE_CANCEL, \
+ k8-ls-microarchitectural-late-cancel) \
+__PMC_EV(K8, LS_RETIRED_CFLUSH_INSTRUCTIONS, \
+ k8-ls-retired-cflush-instructions) \
+__PMC_EV(K8, LS_RETIRED_CPUID_INSTRUCTIONS, \
+ k8-ls-retired-cpuid-instructions) \
+__PMC_EV(K8, DC_ACCESS, k8-dc-access) \
+__PMC_EV(K8, DC_MISS, k8-dc-miss) \
+__PMC_EV(K8, DC_REFILL_FROM_L2, k8-dc-refill-from-l2) \
+__PMC_EV(K8, DC_REFILL_FROM_SYSTEM, k8-dc-refill-from-system) \
+__PMC_EV(K8, DC_COPYBACK, k8-dc-copyback) \
+__PMC_EV(K8, DC_L1_DTLB_MISS_AND_L2_DTLB_HIT, \
+ k8-dc-l1-dtlb-miss-and-l2-dtlb-hit) \
+__PMC_EV(K8, DC_L1_DTLB_MISS_AND_L2_DTLB_MISS, \
+ k8-dc-l1-dtlb-miss-and-l2-dtlb-miss) \
+__PMC_EV(K8, DC_MISALIGNED_DATA_REFERENCE, \
+ k8-dc-misaligned-data-reference) \
+__PMC_EV(K8, DC_MICROARCHITECTURAL_LATE_CANCEL, \
+ k8-dc-microarchitectural-late-cancel-of-an-access) \
+__PMC_EV(K8, DC_MICROARCHITECTURAL_EARLY_CANCEL, \
+ k8-dc-microarchitectural-early-cancel-of-an-access) \
+__PMC_EV(K8, DC_ONE_BIT_ECC_ERROR, k8-dc-one-bit-ecc-error) \
+__PMC_EV(K8, DC_DISPATCHED_PREFETCH_INSTRUCTIONS, \
+ k8-dc-dispatched-prefetch-instructions) \
+__PMC_EV(K8, DC_DCACHE_ACCESSES_BY_LOCKS, \
+ k8-dc-dcache-accesses-by-locks) \
+__PMC_EV(K8, BU_CPU_CLK_UNHALTED, k8-bu-cpu-clk-unhalted) \
+__PMC_EV(K8, BU_INTERNAL_L2_REQUEST, k8-bu-internal-l2-request) \
+__PMC_EV(K8, BU_FILL_REQUEST_L2_MISS, k8-bu-fill-request-l2-miss) \
+__PMC_EV(K8, BU_FILL_INTO_L2, k8-bu-fill-into-l2) \
+__PMC_EV(K8, IC_FETCH, k8-ic-fetch) \
+__PMC_EV(K8, IC_MISS, k8-ic-miss) \
+__PMC_EV(K8, IC_REFILL_FROM_L2, k8-ic-refill-from-l2) \
+__PMC_EV(K8, IC_REFILL_FROM_SYSTEM, k8-ic-refill-from-system) \
+__PMC_EV(K8, IC_L1_ITLB_MISS_AND_L2_ITLB_HIT, \
+ k8-ic-l1-itlb-miss-and-l2-itlb-hit) \
+__PMC_EV(K8, IC_L1_ITLB_MISS_AND_L2_ITLB_MISS, \
+ k8-ic-l1-itlb-miss-and-l2-itlb-miss) \
+__PMC_EV(K8, IC_MICROARCHITECTURAL_RESYNC_BY_SNOOP, \
+ k8-ic-microarchitectural-resync-by-snoop) \
+__PMC_EV(K8, IC_INSTRUCTION_FETCH_STALL, \
+ k8-ic-instruction-fetch-stall) \
+__PMC_EV(K8, IC_RETURN_STACK_HIT, k8-ic-return-stack-hit) \
+__PMC_EV(K8, IC_RETURN_STACK_OVERFLOW, k8-ic-return-stack-overflow) \
+__PMC_EV(K8, FR_RETIRED_X86_INSTRUCTIONS, \
+ k8-fr-retired-x86-instructions) \
+__PMC_EV(K8, FR_RETIRED_UOPS, k8-fr-retired-uops) \
+__PMC_EV(K8, FR_RETIRED_BRANCHES, k8-fr-retired-branches) \
+__PMC_EV(K8, FR_RETIRED_BRANCHES_MISPREDICTED, \
+ k8-fr-retired-branches-mispredicted) \
+__PMC_EV(K8, FR_RETIRED_TAKEN_BRANCHES, \
+ k8-fr-retired-taken-branches) \
+__PMC_EV(K8, FR_RETIRED_TAKEN_BRANCHES_MISPREDICTED, \
+ k8-fr-retired-taken-branches-mispredicted) \
+__PMC_EV(K8, FR_RETIRED_FAR_CONTROL_TRANSFERS, \
+ k8-fr-retired-far-control-transfers) \
+__PMC_EV(K8, FR_RETIRED_RESYNCS, k8-fr-retired-resyncs) \
+__PMC_EV(K8, FR_RETIRED_NEAR_RETURNS, k8-fr-retired-near-returns) \
+__PMC_EV(K8, FR_RETIRED_NEAR_RETURNS_MISPREDICTED, \
+ k8-fr-retired-near-returns-mispredicted) \
+__PMC_EV(K8, \
+ FR_RETIRED_TAKEN_BRANCHES_MISPREDICTED_BY_ADDR_MISCOMPARE, \
+ k8-fr-retired-taken-branches-mispredicted-by-addr-miscompare) \
+__PMC_EV(K8, FR_RETIRED_FPU_INSTRUCTIONS, \
+ k8-fr-retired-fpu-instructions) \
+__PMC_EV(K8, FR_RETIRED_FASTPATH_DOUBLE_OP_INSTRUCTIONS, \
+ k8-fr-retired-fastpath-double-op-instructions) \
+__PMC_EV(K8, FR_INTERRUPTS_MASKED_CYCLES, \
+ k8-fr-interrupts-masked-cycles) \
+__PMC_EV(K8, FR_INTERRUPTS_MASKED_WHILE_PENDING_CYCLES, \
+ k8-fr-interrupts-masked-while-pending-cycles) \
+__PMC_EV(K8, FR_TAKEN_HARDWARE_INTERRUPTS, \
+ k8-fr-taken-hardware-interrupts) \
+__PMC_EV(K8, FR_DECODER_EMPTY, k8-fr-decoder-empty) \
+__PMC_EV(K8, FR_DISPATCH_STALLS, k8-fr-dispatch-stalls) \
+__PMC_EV(K8, FR_DISPATCH_STALL_FROM_BRANCH_ABORT_TO_RETIRE, \
+ k8-fr-dispatch-stall-from-branch-abort-to-retire) \
+__PMC_EV(K8, FR_DISPATCH_STALL_FOR_SERIALIZATION, \
+ k8-fr-dispatch-stall-for-serialization) \
+__PMC_EV(K8, FR_DISPATCH_STALL_FOR_SEGMENT_LOAD, \
+ k8-fr-dispatch-stall-for-segment-load) \
+__PMC_EV(K8, FR_DISPATCH_STALL_WHEN_REORDER_BUFFER_IS_FULL, \
+ k8-fr-dispatch-stall-when-reorder-buffer-is-full) \
+__PMC_EV(K8, \
+ FR_DISPATCH_STALL_WHEN_RESERVATION_STATIONS_ARE_FULL, \
+ k8-fr-dispatch-stall-when-reservation-stations-are-full) \
+__PMC_EV(K8, FR_DISPATCH_STALL_WHEN_FPU_IS_FULL, \
+ k8-fr-dispatch-stall-when-fpu-is-full) \
+__PMC_EV(K8, FR_DISPATCH_STALL_WHEN_LS_IS_FULL, \
+ k8-fr-dispatch-stall-when-ls-is-full) \
+__PMC_EV(K8, FR_DISPATCH_STALL_WHEN_WAITING_FOR_ALL_TO_BE_QUIET, \
+ k8-fr-dispatch-stall-when-waiting-for-all-to-be-quiet) \
+__PMC_EV(K8, \
+ FR_DISPATCH_STALL_WHEN_FAR_XFER_OR_RESYNC_BRANCH_PENDING, \
+ k8-fr-dispatch-stall-when-far-xfer-or-resync-branch-pending) \
+__PMC_EV(K8, FR_FPU_EXCEPTIONS, k8-fr-fpu-exceptions) \
+__PMC_EV(K8, FR_NUMBER_OF_BREAKPOINTS_FOR_DR0, \
+ k8-fr-number-of-breakpoints-for-dr0) \
+__PMC_EV(K8, FR_NUMBER_OF_BREAKPOINTS_FOR_DR1, \
+ k8-fr-number-of-breakpoints-for-dr1) \
+__PMC_EV(K8, FR_NUMBER_OF_BREAKPOINTS_FOR_DR2, \
+ k8-fr-number-of-breakpoints-for-dr2) \
+__PMC_EV(K8, FR_NUMBER_OF_BREAKPOINTS_FOR_DR3, \
+ k8-fr-number-of-breakpoints-for-dr3) \
+__PMC_EV(K8, NB_MEMORY_CONTROLLER_PAGE_ACCESS_EVENT, \
+ k8-nb-memory-controller-page-access-event) \
+__PMC_EV(K8, NB_MEMORY_CONTROLLER_PAGE_TABLE_OVERFLOW, \
+ k8-nb-memory-controller-page-table-overflow) \
+__PMC_EV(K8, NB_MEMORY_CONTROLLER_DRAM_COMMAND_SLOTS_MISSED, \
+ k8-nb-memory-controller-dram-slots-missed) \
+__PMC_EV(K8, NB_MEMORY_CONTROLLER_TURNAROUND, \
+ k8-nb-memory-controller-turnaround) \
+__PMC_EV(K8, NB_MEMORY_CONTROLLER_BYPASS_SATURATION, \
+ k8-nb-memory-controller-bypass-saturation) \
+__PMC_EV(K8, NB_SIZED_COMMANDS, k8-nb-sized-commands) \
+__PMC_EV(K8, NB_PROBE_RESULT, k8-nb-probe-result) \
+__PMC_EV(K8, NB_HT_BUS0_BANDWIDTH, k8-nb-ht-bus0-bandwidth) \
+__PMC_EV(K8, NB_HT_BUS1_BANDWIDTH, k8-nb-ht-bus1-bandwidth) \
+__PMC_EV(K8, NB_HT_BUS2_BANDWIDTH, k8-nb-ht-bus2-bandwidth)
+
+#define PMC_EV_K8_FIRST PMC_EV_K8_FP_DISPATCHED_FPU_OPS
+#define PMC_EV_K8_LAST PMC_EV_K8_NB_HT_BUS2_BANDWIDTH
+
+
+/* Intel Pentium Events */
+#define __PMC_EV_P5() \
+__PMC_EV(P5, DATA_READ, p5-data-read) \
+__PMC_EV(P5, DATA_WRITE, p5-data-write) \
+__PMC_EV(P5, DATA_TLB_MISS, p5-data-tlb-miss) \
+__PMC_EV(P5, DATA_READ_MISS, p5-data-read-miss) \
+__PMC_EV(P5, DATA_WRITE_MISS, p5-data-write-miss) \
+__PMC_EV(P5, WRITE_HIT_TO_M_OR_E_STATE_LINES, \
+ p5-write-hit-to-m-or-e-state-lines) \
+__PMC_EV(P5, DATA_CACHE_LINES_WRITTEN_BACK, \
+ p4-data-cache-lines-written-back) \
+__PMC_EV(P5, EXTERNAL_SNOOPS, p5-external-snoops) \
+__PMC_EV(P5, EXTERNAL_DATA_CACHE_SNOOP_HITS, \
+ p5-external-data-cache-snoop-hits) \
+__PMC_EV(P5, MEMORY_ACCESSES_IN_BOTH_PIPES, \
+ p5-memory-accesses-in-both-pipes) \
+__PMC_EV(P5, BANK_CONFLICTS, p5-bank-conflicts) \
+__PMC_EV(P5, MISALIGNED_DATA_OR_IO_REFERENCES, \
+ p5-misaligned-data-or-io-references) \
+__PMC_EV(P5, CODE_READ, p5-code-read) \
+__PMC_EV(P5, CODE_TLB_MISS, p5-code-tlb-miss) \
+__PMC_EV(P5, CODE_CACHE_MISS, p5-code-cache-miss) \
+__PMC_EV(P5, ANY_SEGMENT_REGISTER_LOADED, \
+ p5-any-segment-register-loaded) \
+__PMC_EV(P5, BRANCHES, p5-branches) \
+__PMC_EV(P5, BTB_HITS, p5-btb-hits) \
+__PMC_EV(P5, TAKEN_BRANCH_OR_BTB_HIT, \
+ p5-taken-branch-or-btb-hit) \
+__PMC_EV(P5, PIPELINE_FLUSHES, p5-pipeline-flushes) \
+__PMC_EV(P5, INSTRUCTIONS_EXECUTED, p5-instructions-executed) \
+__PMC_EV(P5, INSTRUCTIONS_EXECUTED_V_PIPE, \
+ p5-instructions-executed-v-pipe) \
+__PMC_EV(P5, BUS_CYCLE_DURATION, p5-bus-cycle-duration) \
+__PMC_EV(P5, WRITE_BUFFER_FULL_STALL_DURATION, \
+ p5-write-buffer-full-stall-duration) \
+__PMC_EV(P5, WAITING_FOR_DATA_MEMORY_READ_STALL_DURATION, \
+ p5-waiting-for-data-memory-read-stall-duration) \
+__PMC_EV(P5, STALL_ON_WRITE_TO_AN_E_OR_M_STATE_LINE, \
+ p5-stall-on-write-to-an-e-or-m-state-line) \
+__PMC_EV(P5, LOCKED_BUS_CYCLE, p5-locked-bus-cycle) \
+__PMC_EV(P5, IO_READ_OR_WRITE_CYCLE, p5-io-read-or-write-cycle) \
+__PMC_EV(P5, NONCACHEABLE_MEMORY_READS, \
+ p5-noncacheable-memory-reads) \
+__PMC_EV(P5, PIPELINE_AGI_STALLS, p5-pipeline-agi-stalls) \
+__PMC_EV(P5, FLOPS, p5-flops) \
+__PMC_EV(P5, BREAKPOINT_MATCH_ON_DR0_REGISTER, \
+ p5-breakpoint-match-on-dr0-register) \
+__PMC_EV(P5, BREAKPOINT_MATCH_ON_DR1_REGISTER, \
+ p5-breakpoint-match-on-dr1-register) \
+__PMC_EV(P5, BREAKPOINT_MATCH_ON_DR2_REGISTER, \
+ p5-breakpoint-match-on-dr2-register) \
+__PMC_EV(P5, BREAKPOINT_MATCH_ON_DR3_REGISTER, \
+ p5-breakpoint-match-on-dr3-register) \
+__PMC_EV(P5, HARDWARE_INTERRUPTS, p5-hardware-interrupts) \
+__PMC_EV(P5, DATA_READ_OR_WRITE, p5-data-read-or-write) \
+__PMC_EV(P5, DATA_READ_MISS_OR_WRITE_MISS, \
+ p5-data-read-miss-or-write-miss) \
+__PMC_EV(P5, BUS_OWNERSHIP_LATENCY, p5-bus-ownership-latency) \
+__PMC_EV(P5, BUS_OWNERSHIP_TRANSFERS, p5-bus-ownership-transfers) \
+__PMC_EV(P5, MMX_INSTRUCTIONS_EXECUTED_U_PIPE, \
+ p5-mmx-instructions-executed-u-pipe) \
+__PMC_EV(P5, MMX_INSTRUCTIONS_EXECUTED_V_PIPE, \
+ p5-mmx-instructions-executed-v-pipe) \
+__PMC_EV(P5, CACHE_M_LINE_SHARING, p5-cache-m-line-sharing) \
+__PMC_EV(P5, CACHE_LINE_SHARING, p5-cache-line-sharing) \
+__PMC_EV(P5, EMMS_INSTRUCTIONS_EXECUTED, \
+ p5-emms-instructions-executed) \
+__PMC_EV(P5, TRANSITIONS_BETWEEN_MMX_AND_FP_INSTRUCTIONS, \
+ p5-transitions-between-mmx-and-fp-instructions) \
+__PMC_EV(P5, BUS_UTILIZATION_DUE_TO_PROCESSOR_ACTIVITY, \
+ p5-bus-utilization-due-to-processor-activity) \
+__PMC_EV(P5, WRITES_TO_NONCACHEABLE_MEMORY, \
+ p5-writes-to-noncacheable-memory) \
+__PMC_EV(P5, SATURATING_MMX_INSTRUCTIONS_EXECUTED, \
+ p5-saturating-mmx-instructions-executed) \
+__PMC_EV(P5, SATURATIONS_PERFORMED, p5-saturations-performed) \
+__PMC_EV(P5, NUMBER_OF_CYCLES_NOT_IN_HALT_STATE, \
+ p5-number-of-cycles-not-in-halt-state) \
+__PMC_EV(P5, DATA_CACHE_TLB_MISS_STALL_DURATION, \
+ p5-data-cache-tlb-miss-stall-duration) \
+__PMC_EV(P5, MMX_INSTRUCTION_DATA_READS, \
+ p5-mmx-instruction-data-reads) \
+__PMC_EV(P5, MMX_INSTRUCTION_DATA_READ_MISSES, \
+ p5-mmx-instruction-data-read-misses) \
+__PMC_EV(P5, FLOATING_POINT_STALLS_DURATION, \
+ p5-floating-point-stalls-duration) \
+__PMC_EV(P5, TAKEN_BRANCHES, p5-taken-branches) \
+__PMC_EV(P5, D1_STARVATION_AND_FIFO_IS_EMPTY, \
+ p5-d1-starvation-and-fifo-is-empty) \
+__PMC_EV(P5, D1_STARVATION_AND_ONLY_ONE_INSTRUCTION_IN_FIFO, \
+ p5-d1-starvation-and-only-instruction-in-fifo) \
+__PMC_EV(P5, MMX_INSTRUCTION_DATA_WRITES, \
+ p5-mmx-instruction-data-writes) \
+__PMC_EV(P5, MMX_INSTRUCTION_DATA_WRITE_MISSES, \
+ p5-mmx-instruction-data-write-misses) \
+__PMC_EV(P5, PIPELINE_FLUSHES_DUE_TO_WRONG_BRANCH_PREDICTIONS, \
+ p5-pipeline-flushes-due-to-wrong-branch-predictions) \
+__PMC_EV(P5, \
+ PIPELINE_FLUSHES_DUE_TO_WRONG_BRANCH_PREDICTIONS_RESOLVED_IN_WB_STAGE, \
+ p5-pipeline-flushes-due-to-wrong-branch-predictions-resolved-in-wb-stage) \
+__PMC_EV(P5, MISALIGNED_DATA_MEMORY_REFERENCE_ON_MMX_INSTRUCTIONS, \
+ p5-misaligned-data-memory-reference-on-mmx-instructions) \
+__PMC_EV(P5, PIPELINE_STALL_FOR_MMX_INSTRUCTION_DATA_MEMORY_READS, \
+ p5-pipeline-stall-for-mmx-instruction-data-memory-reads) \
+__PMC_EV(P5, MISPREDICTED_OR_UNPREDICTED_RETURNS, \
+ p5-mispredicted-or-unpredicted-returns) \
+__PMC_EV(P5, PREDICTED_RETURNS, p5-predicted-returns) \
+__PMC_EV(P5, MMX_MULTIPLY_UNIT_INTERLOCK, \
+ p5-mmx-multiply-unit-interlock) \
+__PMC_EV(P5, MOVD_MOVQ_STORE_STALL_DUE_TO_PREVIOUS_MMX_OPERATION, \
+ p5-movd-movq-store-stall-due-to-previous-mmx-operation) \
+__PMC_EV(P5, RETURNS, p5-returns) \
+__PMC_EV(P5, BTB_FALSE_ENTRIES, p5-btb-false-entries) \
+__PMC_EV(P5, BTB_MISS_PREDICTION_ON_NOT_TAKEN_BRANCH, \
+ p5-btb-miss-prediction-on-not-taken-branch) \
+__PMC_EV(P5, \
+ FULL_WRITE_BUFFER_STALL_DURATION_WHILE_EXECUTING_MMX_INSTRUCTIONS, \
+ p5-full-write-buffer-stall-duration-while-executing-mmx-instructions) \
+__PMC_EV(P5, STALL_ON_MMX_INSTRUCTION_WRITE_TO_E_OR_M_STATE_LINE, \
+ p5-stall-on-mmx-instruction-write-to-e-o-m-state-line)
+
+#define PMC_EV_P5_FIRST PMC_EV_P5_DATA_READ
+#define PMC_EV_P5_LAST \
+ PMC_EV_P5_STALL_ON_MMX_INSTRUCTION_WRITE_TO_E_OR_M_STATE_LINE
+
+/* timestamp counters. */
+#define __PMC_EV_TSC() \
+ __PMC_EV(TSC, TSC, tsc)
+
+/* All known PMC events */
+#define __PMC_EVENTS() \
+ __PMC_EV_TSC() \
+ __PMC_EV_K7() \
+ __PMC_EV_P6() \
+ __PMC_EV_P4() \
+ __PMC_EV_K8() \
+ __PMC_EV_P5() \
+
+#define PMC_EVENT_FIRST PMC_EV_TSC_TSC
+#define PMC_EVENT_LAST PMC_EV_P5_LAST
+
+#endif /* _DEV_HWPMC_PMC_EVENTS_H_ */
diff --git a/sys/i386/include/pmc_mdep.h b/sys/i386/include/pmc_mdep.h
index e66fe4e..2d4c4a3 100644
--- a/sys/i386/include/pmc_mdep.h
+++ b/sys/i386/include/pmc_mdep.h
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2003, Joseph Koshy
+ * Copyright (c) 2003-2005 Joseph Koshy
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -26,166 +26,58 @@
* $FreeBSD$
*/
-/* Machine dependent interfaces */
-
#ifndef _MACHINE_PMC_MDEP_H
#define _MACHINE_PMC_MDEP_H 1
-#include <machine/cpufunc.h>
-#include <machine/specialreg.h>
-
-/* AMD K7 PMCs */
-
-#define K7_NPMCS 5 /* 1 TSC + 4 PMCs */
-
-#define K7_PMC_COUNTERMASK 0xFF000000
-#define K7_PMC_TO_COUNTER(x) (((x) << 24) & K7_PMC_COUNTERMASK)
-#define K7_PMC_INVERT (1 << 23)
-#define K7_PMC_ENABLE (1 << 22)
-#define K7_PMC_INT (1 << 20)
-#define K7_PMC_PC (1 << 19)
-#define K7_PMC_EDGE (1 << 18)
-#define K7_PMC_OS (1 << 17)
-#define K7_PMC_USR (1 << 16)
-
-#define K7_PMC_UNITMASK_M 0x10
-#define K7_PMC_UNITMASK_O 0x08
-#define K7_PMC_UNITMASK_E 0x04
-#define K7_PMC_UNITMASK_S 0x02
-#define K7_PMC_UNITMASK_I 0x01
-#define K7_PMC_UNITMASK_MOESI 0x1F
-
-#define K7_PMC_UNITMASK 0xFF00
-#define K7_PMC_EVENTMASK 0x00FF
-#define K7_PMC_TO_UNITMASK(x) (((x) << 8) & K7_PMC_UNITMASK)
-#define K7_PMC_TO_EVENTMASK(x) ((x) & 0xFF)
-#define K7_VALID_BITS (K7_PMC_COUNTERMASK | K7_PMC_INVERT | \
- K7_PMC_ENABLE | K7_PMC_INT | K7_PMC_PC | K7_PMC_EDGE | K7_PMC_OS | \
- K7_PMC_USR | K7_PMC_UNITMASK | K7_PMC_EVENTMASK)
-
-/* Intel P4 PMCs */
-
-#define P4_NPMCS 19 /* 1 TSC + 18 PMCS */
-#define P4_NESCR 45
-#define P4_INVALID_PMC_INDEX -1
-#define P4_MAX_ESCR_PER_EVENT 2
-#define P4_MAX_PMC_PER_ESCR 3
-
-#define P4_CCCR_OVF (1 << 31)
-#define P4_CCCR_CASCADE (1 << 30)
-#define P4_CCCR_OVF_PMI_T1 (1 << 27)
-#define P4_CCCR_OVF_PMI_T0 (1 << 26)
-#define P4_CCCR_FORCE_OVF (1 << 25)
-#define P4_CCCR_EDGE (1 << 24)
-#define P4_CCCR_THRESHOLD_SHIFT 20
-#define P4_CCCR_THRESHOLD_MASK 0x00F00000
-#define P4_CCCR_TO_THRESHOLD(C) (((C) << P4_CCCR_THRESHOLD_SHIFT) & \
- P4_CCCR_THRESHOLD_MASK)
-#define P4_CCCR_COMPLEMENT (1 << 19)
-#define P4_CCCR_COMPARE (1 << 18)
-#define P4_CCCR_ACTIVE_THREAD_SHIFT 16
-#define P4_CCCR_ACTIVE_THREAD_MASK 0x00030000
-#define P4_CCCR_TO_ACTIVE_THREAD(T) (((T) << P4_CCCR_ACTIVE_THREAD_SHIFT) & \
- P4_CCCR_ACTIVE_THREAD_MASK)
-#define P4_CCCR_ESCR_SELECT_SHIFT 13
-#define P4_CCCR_ESCR_SELECT_MASK 0x0000E000
-#define P4_CCCR_TO_ESCR_SELECT(E) (((E) << P4_CCCR_ESCR_SELECT_SHIFT) & \
- P4_CCCR_ESCR_SELECT_MASK)
-#define P4_CCCR_ENABLE (1 << 12)
-#define P4_CCCR_VALID_BITS (P4_CCCR_OVF | P4_CCCR_CASCADE | \
- P4_CCCR_OVF_PMI_T1 | P4_CCCR_OVF_PMI_T0 | P4_CCCR_FORCE_OVF | \
- P4_CCCR_EDGE | P4_CCCR_THRESHOLD_MASK | P4_CCCR_COMPLEMENT | \
- P4_CCCR_COMPARE | P4_CCCR_ESCR_SELECT_MASK | P4_CCCR_ENABLE)
-
-#define P4_ESCR_EVENT_SELECT_SHIFT 25
-#define P4_ESCR_EVENT_SELECT_MASK 0x7E000000
-#define P4_ESCR_TO_EVENT_SELECT(E) (((E) << P4_ESCR_EVENT_SELECT_SHIFT) & \
- P4_ESCR_EVENT_SELECT_MASK)
-#define P4_ESCR_EVENT_MASK_SHIFT 9
-#define P4_ESCR_EVENT_MASK_MASK 0x01FFFE00
-#define P4_ESCR_TO_EVENT_MASK(M) (((M) << P4_ESCR_EVENT_MASK_SHIFT) & \
- P4_ESCR_EVENT_MASK_MASK)
-#define P4_ESCR_TAG_VALUE_SHIFT 5
-#define P4_ESCR_TAG_VALUE_MASK 0x000001E0
-#define P4_ESCR_TO_TAG_VALUE(T) (((T) << P4_ESCR_TAG_VALUE_SHIFT) & \
- P4_ESCR_TAG_VALUE_MASK)
-#define P4_ESCR_TAG_ENABLE 0x00000010
-#define P4_ESCR_T0_OS 0x00000008
-#define P4_ESCR_T0_USR 0x00000004
-#define P4_ESCR_T1_OS 0x00000002
-#define P4_ESCR_T1_USR 0x00000001
-#define P4_ESCR_OS P4_ESCR_T0_OS
-#define P4_ESCR_USR P4_ESCR_T0_USR
-#define P4_ESCR_VALID_BITS (P4_ESCR_EVENT_SELECT_MASK | \
- P4_ESCR_EVENT_MASK_MASK | P4_ESCR_TAG_VALUE_MASK | \
- P4_ESCR_TAG_ENABLE | P4_ESCR_T0_OS | P4_ESCR_T0_USR | P4_ESCR_T1_OS \
- P4_ESCR_T1_USR)
-
-#define P4_PERFCTR_MASK 0xFFFFFFFFFFLL /* 40 bits */
-
-#define P4_CCCR_MSR_FIRST 0x360 /* MSR_BPU_CCCR0 */
-#define P4_PERFCTR_MSR_FIRST 0x300 /* MSR_BPU_COUNTER0 */
-
-#define P4_RELOAD_COUNT_TO_PERFCTR_VALUE(V) (1 - (V))
-#define P4_PERFCTR_VALUE_TO_RELOAD_COUNT(P) (1 - (P))
-
-/* Intel PPro, Celeron, P-II, P-III, Pentium-M PMCS */
-
-#define P6_NPMCS 3 /* 1 TSC + 2 PMCs */
+/*
+ * On the i386 platform we support the following PMCs.
+ *
+ * K7 AMD Athlon XP/MP and other 32 bit processors.
+ * K8 AMD Athlon64 and Opteron PMCs in 32 bit mode.
+ * PIV Intel P4/HTT and P4/EMT64
+ * PPRO Intel Pentium Pro, Pentium-II, Pentium-III, Celeron and
+ * Pentium-M processors
+ * PENTIUM Intel Pentium MMX.
+ */
-#define P6_EVSEL_CMASK_MASK 0xFF000000
-#define P6_EVSEL_TO_CMASK(C) (((C) & 0xFF) << 24)
-#define P6_EVSEL_INV (1 << 23)
-#define P6_EVSEL_EN (1 << 22)
-#define P6_EVSEL_INT (1 << 20)
-#define P6_EVSEL_PC (1 << 19)
-#define P6_EVSEL_E (1 << 18)
-#define P6_EVSEL_OS (1 << 17)
-#define P6_EVSEL_USR (1 << 16)
-#define P6_EVSEL_UMASK_MASK 0x0000FF00
-#define P6_EVSEL_TO_UMASK(U) (((U) & 0xFF) << 8)
-#define P6_EVSEL_EVENT_SELECT(ES) ((ES) & 0xFF)
-#define P6_EVSEL_RESERVED (1 << 21)
+#include <dev/hwpmc/hwpmc_amd.h> /* K7 and K8 */
+#include <dev/hwpmc/hwpmc_piv.h>
+#include <dev/hwpmc/hwpmc_ppro.h>
+#include <dev/hwpmc/hwpmc_pentium.h>
-#define P6_MSR_EVSEL0 0x0186
-#define P6_MSR_EVSEL1 0x0187
-#define P6_MSR_PERFCTR0 0x00C1
-#define P6_MSR_PERFCTR1 0x00C2
+/*
+ * Architecture specific extensions to <sys/pmc.h> structures.
+ */
-#define P6_PERFCTR_MASK 0xFFFFFFFFFFLL /* 40 bits */
+union pmc_md_op_pmcallocate {
+ struct pmc_md_amd_op_pmcallocate pm_amd;
+ struct pmc_md_ppro_op_pmcallocate pm_ppro;
+ struct pmc_md_pentium_op_pmcallocate pm_pentium;
+ struct pmc_md_p4_op_pmcallocate pm_p4;
+ uint64_t __pad[4];
+};
-/* Intel Pentium PMCs */
+/* Logging */
+#define PMCLOG_READADDR PMCLOG_READ32
+#define PMCLOG_EMITADDR PMCLOG_EMIT32
-#define PENTIUM_NPMCS 3 /* 1 TSC + 2 PMCs */
-#define PENTIUM_CESR_PC1 (1 << 25)
-#define PENTIUM_CESR_CC1_MASK 0x01C00000
-#define PENTIUM_CESR_TO_CC1(C) (((C) & 0x07) << 22)
-#define PENTIUM_CESR_ES1_MASK 0x003F0000
-#define PENTIUM_CESR_TO_ES1(E) (((E) & 0x3F) << 16)
-#define PENTIUM_CESR_PC0 (1 << 9)
-#define PENTIUM_CESR_CC0_MASK 0x000001C0
-#define PENTIUM_CESR_TO_CC0(C) (((C) & 0x07) << 6)
-#define PENTIUM_CESR_ES0_MASK 0x0000003F
-#define PENTIUM_CESR_TO_ES0(E) ((E) & 0x3F)
-#define PENTIUM_CESR_RESERVED 0xFC00FC00
+#ifdef _KERNEL
-#define PENTIUM_MSR_CESR 0x11
-#define PENTIUM_MSR_CTR0 0x12
-#define PENTIUM_MSR_CTR1 0x13
+/* MD extension for 'struct pmc' */
+union pmc_md_pmc {
+ struct pmc_md_amd_pmc pm_amd;
+ struct pmc_md_ppro_pmc pm_ppro;
+ struct pmc_md_pentium_pmc pm_pentium;
+ struct pmc_md_p4_pmc pm_p4;
+};
-#ifdef _KERNEL
+struct pmc;
/*
* Prototypes
*/
-#if defined(__i386__)
-struct pmc_mdep *pmc_amd_initialize(void); /* AMD K7/K8 PMCs */
-struct pmc_mdep *pmc_intel_initialize(void); /* Intel PMCs */
-int pmc_initialize_p4(struct pmc_mdep *); /* Pentium IV PMCs */
-int pmc_initialize_p5(struct pmc_mdep *); /* Pentium PMCs */
-int pmc_initialize_p6(struct pmc_mdep *); /* Pentium Pro PMCs */
-#endif /* defined(__i386__) */
+void pmc_x86_lapic_enable_pmc_interrupt(void);
#endif /* _KERNEL */
#endif /* _MACHINE_PMC_MDEP_H */
diff --git a/sys/ia64/include/pmc_mdep.h b/sys/ia64/include/pmc_mdep.h
index 54100fb..cf643c7 100644
--- a/sys/ia64/include/pmc_mdep.h
+++ b/sys/ia64/include/pmc_mdep.h
@@ -7,4 +7,18 @@
#ifndef _MACHINE_PMC_MDEP_H_
#define _MACHINE_PMC_MDEP_H_
+union pmc_md_op_pmcallocate {
+ uint64_t __pad[4];
+};
+
+/* Logging */
+#define PMCLOG_READADDR PMCLOG_READ64
+#define PMCLOG_EMITADDR PMCLOG_EMIT64
+
+#if _KERNEL
+union pmc_md_pmc {
+};
+
+#endif
+
#endif /* !_MACHINE_PMC_MDEP_H_ */
diff --git a/sys/kern/kern_exec.c b/sys/kern/kern_exec.c
index 78d86ce..2a4c9ab 100644
--- a/sys/kern/kern_exec.c
+++ b/sys/kern/kern_exec.c
@@ -670,14 +670,15 @@ interpret:
#ifdef HWPMC_HOOKS
/*
- * Check if the process is using PMCs and if so do exec() time
+ * Check if system-wide sampling is in effect or if the
+ * current process is using PMCs. If so, do exec() time
* processing. This processing needs to happen AFTER the
* P_INEXEC flag is cleared.
*
* The proc lock needs to be released before taking the PMC
* SX.
*/
- if (PMC_PROC_IS_USING_PMCS(p)) {
+ if (PMC_SYSTEM_SAMPLING_ACTIVE() || PMC_PROC_IS_USING_PMCS(p)) {
PROC_UNLOCK(p);
PMC_CALL_HOOK_X(td, PMC_FN_PROCESS_EXEC,
(void *) &credential_changing);
diff --git a/sys/kern/kern_pmc.c b/sys/kern/kern_pmc.c
index ee0f8ec..43059cc 100644
--- a/sys/kern/kern_pmc.c
+++ b/sys/kern/kern_pmc.c
@@ -37,7 +37,14 @@ int (*pmc_hook)(struct thread *td, int function, void *arg) = NULL;
/* Interrupt handler */
int (*pmc_intr)(int cpu, uintptr_t pc, int usermode) = NULL;
-cpumask_t pmc_cpumask;
+volatile cpumask_t pmc_cpumask;
+
+/*
+ * A global count of SS mode PMCs. When non-zero, this means that
+ * we have processes that are sampling the system as a whole.
+ */
+
+volatile int pmc_ss_count;
/*
* Since PMC(4) may not be loaded in the current kernel, the
diff --git a/sys/modules/hwpmc/Makefile b/sys/modules/hwpmc/Makefile
index e07b83b..9d7dfde 100644
--- a/sys/modules/hwpmc/Makefile
+++ b/sys/modules/hwpmc/Makefile
@@ -1,4 +1,4 @@
-#
+#
# $FreeBSD$
#
@@ -6,16 +6,38 @@
KMOD= hwpmc
-SRCS= hwpmc_mod.c
+SRCS= hwpmc_mod.c hwpmc_logging.c vnode_if.h
WARNS?= 2
-.if ${MACHINE_ARCH} == "i386"
-SRCS+= hwpmc_amd.c hwpmc_intel.c hwpmc_piv.c hwpmc_ppro.c hwpmc_pentium.c
+.if ${MACHINE_ARCH} == "alpha"
+SRCS+= hwpmc_alpha.c
.endif
.if ${MACHINE_ARCH} == "amd64"
-SRCS+= hwpmc_amd.c
+SRCS+= hwpmc_amd.c hwpmc_piv.c hwpmc_x86.c
+SRCS+= device_if.h bus_if.h
+.endif
+
+.if ${MACHINE_ARCH} == "arm"
+SRCS+= hwpmc_arm.c
+.endif
+
+.if ${MACHINE_ARCH} == "i386"
+SRCS+= hwpmc_amd.c hwpmc_piv.c hwpmc_ppro.c hwpmc_pentium.c hwpmc_x86.c
+SRCS+= device_if.h bus_if.h
+.endif
+
+.if ${MACHINE_ARCH} == "ia64"
+SRCS+= hwpmc_ia64.c
+.endif
+
+.if ${MACHINE_ARCH} == "powerpc"
+SRCS+= hwpmc_powerpc.c
+.endif
+
+.if ${MACHINE_ARCH} == "sparc64"
+SRCS+= hwpmc_sparc64.c
.endif
.include <bsd.kmod.mk>
diff --git a/sys/powerpc/include/pmc_mdep.h b/sys/powerpc/include/pmc_mdep.h
index 54100fb..d5417c3 100644
--- a/sys/powerpc/include/pmc_mdep.h
+++ b/sys/powerpc/include/pmc_mdep.h
@@ -7,4 +7,19 @@
#ifndef _MACHINE_PMC_MDEP_H_
#define _MACHINE_PMC_MDEP_H_
+union pmc_md_op_pmcallocate {
+ uint64_t __pad[4];
+};
+
+/* Logging */
+#define PMCLOG_READADDR PMCLOG_READ32
+#define PMCLOG_EMITADDR PMCLOG_EMIT32
+
+#if _KERNEL
+
+union pmc_md_pmc {
+};
+
+#endif
+
#endif /* !_MACHINE_PMC_MDEP_H_ */
diff --git a/sys/sparc64/include/pmc_mdep.h b/sys/sparc64/include/pmc_mdep.h
index 54100fb..cf643c7 100644
--- a/sys/sparc64/include/pmc_mdep.h
+++ b/sys/sparc64/include/pmc_mdep.h
@@ -7,4 +7,18 @@
#ifndef _MACHINE_PMC_MDEP_H_
#define _MACHINE_PMC_MDEP_H_
+union pmc_md_op_pmcallocate {
+ uint64_t __pad[4];
+};
+
+/* Logging */
+#define PMCLOG_READADDR PMCLOG_READ64
+#define PMCLOG_EMITADDR PMCLOG_EMIT64
+
+#if _KERNEL
+union pmc_md_pmc {
+};
+
+#endif
+
#endif /* !_MACHINE_PMC_MDEP_H_ */
diff --git a/sys/sys/pmc.h b/sys/sys/pmc.h
index ae1d124..5126f32 100644
--- a/sys/sys/pmc.h
+++ b/sys/sys/pmc.h
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2003, Joseph Koshy
+ * Copyright (c) 2003-2005, Joseph Koshy
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -29,17 +29,30 @@
#ifndef _SYS_PMC_H_
#define _SYS_PMC_H_
+#include <dev/hwpmc/pmc_events.h>
+
#include <machine/pmc_mdep.h>
+#include <machine/profile.h>
#define PMC_MODULE_NAME "hwpmc"
#define PMC_NAME_MAX 16 /* HW counter name size */
#define PMC_CLASS_MAX 4 /* #classes of PMCs in a system */
-/* Kernel<->userland API version number [MMmmpppp] */
-
+/*
+ * Kernel<->userland API version number [MMmmpppp]
+ *
+ * Major numbers are to be incremented when an incompatible change to
+ * the ABI occurs that older clients will not be able to handle.
+ *
+ * Minor numbers are incremented when a backwards compatible change
+ * occurs that allows older correct programs to run unchanged. For
+ * example, when support for a new PMC type is added.
+ *
+ * The patch version is incremented for every bug fix.
+ */
#define PMC_VERSION_MAJOR 0x01
-#define PMC_VERSION_MINOR 0x01
-#define PMC_VERSION_PATCH 0x0002
+#define PMC_VERSION_MINOR 0x02
+#define PMC_VERSION_PATCH 0x0003
#define PMC_VERSION (PMC_VERSION_MAJOR << 24 | \
PMC_VERSION_MINOR << 16 | PMC_VERSION_PATCH)
@@ -198,511 +211,6 @@ enum pmc_disp {
#define PMC_DISP_LAST PMC_DISP_THREAD
/*
- * PMC event codes
- *
- * __PMC_EV(CLASS, SYMBOLIC-NAME, VALUE, READABLE-NAME)
- */
-
-/*
- * AMD K7 Events, from "The AMD Athlon(tm) Processor x86 Code
- * Optimization Guide" [Doc#22007K, Feb 2002]
- */
-
-#define __PMC_EV_K7() \
-__PMC_EV(K7, DC_ACCESSES, k7-dc-accesses) \
-__PMC_EV(K7, DC_MISSES, k7-dc-misses) \
-__PMC_EV(K7, DC_REFILLS_FROM_L2, k7-dc-refills-from-l2) \
-__PMC_EV(K7, DC_REFILLS_FROM_SYSTEM, k7-dc-refills-from-system) \
-__PMC_EV(K7, DC_WRITEBACKS, k7-dc-writebacks) \
-__PMC_EV(K7, L1_DTLB_MISS_AND_L2_DTLB_HITS, \
- k7-l1-dtlb-miss-and-l2-dtlb-hits) \
-__PMC_EV(K7, L1_AND_L2_DTLB_MISSES, k7-l1-and-l2-dtlb-misses) \
-__PMC_EV(K7, MISALIGNED_REFERENCES, k7-misaligned-references) \
-__PMC_EV(K7, IC_FETCHES, k7-ic-fetches) \
-__PMC_EV(K7, IC_MISSES, k7-ic-misses) \
-__PMC_EV(K7, L1_ITLB_MISSES, k7-l1-itlb-misses) \
-__PMC_EV(K7, L1_L2_ITLB_MISSES, k7-l1-l2-itlb-misses) \
-__PMC_EV(K7, RETIRED_INSTRUCTIONS, k7-retired-instructions) \
-__PMC_EV(K7, RETIRED_OPS, k7-retired-ops) \
-__PMC_EV(K7, RETIRED_BRANCHES, k7-retired-branches) \
-__PMC_EV(K7, RETIRED_BRANCHES_MISPREDICTED, \
- k7-retired-branches-mispredicted) \
-__PMC_EV(K7, RETIRED_TAKEN_BRANCHES, k7-retired-taken-branches) \
-__PMC_EV(K7, RETIRED_TAKEN_BRANCHES_MISPREDICTED, \
- k7-retired-taken-branches-mispredicted) \
-__PMC_EV(K7, RETIRED_FAR_CONTROL_TRANSFERS, \
- k7-retired-far-control-transfers) \
-__PMC_EV(K7, RETIRED_RESYNC_BRANCHES, k7-retired-resync-branches) \
-__PMC_EV(K7, INTERRUPTS_MASKED_CYCLES, k7-interrupts-masked-cycles) \
-__PMC_EV(K7, INTERRUPTS_MASKED_WHILE_PENDING_CYCLES, \
- k7-interrupts-masked-while-pending-cycles) \
-__PMC_EV(K7, HARDWARE_INTERRUPTS, k7-hardware-interrupts)
-
-#define PMC_EV_K7_FIRST PMC_EV_K7_DC_ACCESSES
-#define PMC_EV_K7_LAST PMC_EV_K7_HARDWARE_INTERRUPTS
-
-/*
- * Intel P4 Events, from "IA-32 Intel(r) Architecture Software
- * Developer's Manual, Volume 3: System Programming Guide" [245472-012]
- */
-
-#define __PMC_EV_P4() \
-__PMC_EV(P4, TC_DELIVER_MODE, p4-tc-deliver-mode) \
-__PMC_EV(P4, BPU_FETCH_REQUEST, p4-bpu-fetch-request) \
-__PMC_EV(P4, ITLB_REFERENCE, p4-itlb-reference) \
-__PMC_EV(P4, MEMORY_CANCEL, p4-memory-cancel) \
-__PMC_EV(P4, MEMORY_COMPLETE, p4-memory-complete) \
-__PMC_EV(P4, LOAD_PORT_REPLAY, p4-load-port-replay) \
-__PMC_EV(P4, STORE_PORT_REPLAY, p4-store-port-replay) \
-__PMC_EV(P4, MOB_LOAD_REPLAY, p4-mob-load-replay) \
-__PMC_EV(P4, PAGE_WALK_TYPE, p4-page-walk-type) \
-__PMC_EV(P4, BSQ_CACHE_REFERENCE, p4-bsq-cache-reference) \
-__PMC_EV(P4, IOQ_ALLOCATION, p4-ioq-allocation) \
-__PMC_EV(P4, IOQ_ACTIVE_ENTRIES, p4-ioq-active-entries) \
-__PMC_EV(P4, FSB_DATA_ACTIVITY, p4-fsb-data-activity) \
-__PMC_EV(P4, BSQ_ALLOCATION, p4-bsq-allocation) \
-__PMC_EV(P4, BSQ_ACTIVE_ENTRIES, p4-bsq-active-entries) \
-__PMC_EV(P4, SSE_INPUT_ASSIST, p4-sse-input-assist) \
-__PMC_EV(P4, PACKED_SP_UOP, p4-packed-sp-uop) \
-__PMC_EV(P4, PACKED_DP_UOP, p4-packed-dp-uop) \
-__PMC_EV(P4, SCALAR_SP_UOP, p4-scalar-sp-uop) \
-__PMC_EV(P4, SCALAR_DP_UOP, p4-scalar-dp-uop) \
-__PMC_EV(P4, 64BIT_MMX_UOP, p4-64bit-mmx-uop) \
-__PMC_EV(P4, 128BIT_MMX_UOP, p4-128bit-mmx-uop) \
-__PMC_EV(P4, X87_FP_UOP, p4-x87-fp-uop) \
-__PMC_EV(P4, X87_SIMD_MOVES_UOP, p4-x87-simd-moves-uop) \
-__PMC_EV(P4, GLOBAL_POWER_EVENTS, p4-global-power-events) \
-__PMC_EV(P4, TC_MS_XFER, p4-tc-ms-xfer) \
-__PMC_EV(P4, UOP_QUEUE_WRITES, p4-uop-queue-writes) \
-__PMC_EV(P4, RETIRED_MISPRED_BRANCH_TYPE, \
- p4-retired-mispred-branch-type) \
-__PMC_EV(P4, RETIRED_BRANCH_TYPE, p4-retired-branch-type) \
-__PMC_EV(P4, RESOURCE_STALL, p4-resource-stall) \
-__PMC_EV(P4, WC_BUFFER, p4-wc-buffer) \
-__PMC_EV(P4, B2B_CYCLES, p4-b2b-cycles) \
-__PMC_EV(P4, BNR, p4-bnr) \
-__PMC_EV(P4, SNOOP, p4-snoop) \
-__PMC_EV(P4, RESPONSE, p4-response) \
-__PMC_EV(P4, FRONT_END_EVENT, p4-front-end-event) \
-__PMC_EV(P4, EXECUTION_EVENT, p4-execution-event) \
-__PMC_EV(P4, REPLAY_EVENT, p4-replay-event) \
-__PMC_EV(P4, INSTR_RETIRED, p4-instr-retired) \
-__PMC_EV(P4, UOPS_RETIRED, p4-uops-retired) \
-__PMC_EV(P4, UOP_TYPE, p4-uop-type) \
-__PMC_EV(P4, BRANCH_RETIRED, p4-branch-retired) \
-__PMC_EV(P4, MISPRED_BRANCH_RETIRED, p4-mispred-branch-retired) \
-__PMC_EV(P4, X87_ASSIST, p4-x87-assist) \
-__PMC_EV(P4, MACHINE_CLEAR, p4-machine-clear)
-
-#define PMC_EV_P4_FIRST PMC_EV_P4_TC_DELIVER_MODE
-#define PMC_EV_P4_LAST PMC_EV_P4_MACHINE_CLEAR
-
-/* Intel Pentium Pro, P-II, P-III and Pentium-M style events */
-
-#define __PMC_EV_P6() \
-__PMC_EV(P6, DATA_MEM_REFS, p6-data-mem-refs) \
-__PMC_EV(P6, DCU_LINES_IN, p6-dcu-lines-in) \
-__PMC_EV(P6, DCU_M_LINES_IN, p6-dcu-m-lines-in) \
-__PMC_EV(P6, DCU_M_LINES_OUT, p6-dcu-m-lines-out) \
-__PMC_EV(P6, DCU_MISS_OUTSTANDING, p6-dcu-miss-outstanding) \
-__PMC_EV(P6, IFU_FETCH, p6-ifu-fetch) \
-__PMC_EV(P6, IFU_FETCH_MISS, p6-ifu-fetch-miss) \
-__PMC_EV(P6, ITLB_MISS, p6-itlb-miss) \
-__PMC_EV(P6, IFU_MEM_STALL, p6-ifu-mem-stall) \
-__PMC_EV(P6, ILD_STALL, p6-ild-stall) \
-__PMC_EV(P6, L2_IFETCH, p6-l2-ifetch) \
-__PMC_EV(P6, L2_LD, p6-l2-ld) \
-__PMC_EV(P6, L2_ST, p6-l2-st) \
-__PMC_EV(P6, L2_LINES_IN, p6-l2-lines-in) \
-__PMC_EV(P6, L2_LINES_OUT, p6-l2-lines-out) \
-__PMC_EV(P6, L2_M_LINES_INM, p6-l2-m-lines-inm) \
-__PMC_EV(P6, L2_M_LINES_OUTM, p6-l2-m-lines-outm) \
-__PMC_EV(P6, L2_RQSTS, p6-l2-rqsts) \
-__PMC_EV(P6, L2_ADS, p6-l2-ads) \
-__PMC_EV(P6, L2_DBUS_BUSY, p6-l2-dbus-busy) \
-__PMC_EV(P6, L2_DBUS_BUSY_RD, p6-l2-dbus-busy-rd) \
-__PMC_EV(P6, BUS_DRDY_CLOCKS, p6-bus-drdy-clocks) \
-__PMC_EV(P6, BUS_LOCK_CLOCKS, p6-bus-lock-clocks) \
-__PMC_EV(P6, BUS_REQ_OUTSTANDING, p6-bus-req-outstanding) \
-__PMC_EV(P6, BUS_TRAN_BRD, p6-bus-tran-brd) \
-__PMC_EV(P6, BUS_TRAN_RFO, p6-bus-tran-rfo) \
-__PMC_EV(P6, BUS_TRANS_WB, p6-bus-trans-wb) \
-__PMC_EV(P6, BUS_TRAN_IFETCH, p6-bus-tran-ifetch) \
-__PMC_EV(P6, BUS_TRAN_INVAL, p6-bus-tran-inval) \
-__PMC_EV(P6, BUS_TRAN_PWR, p6-bus-tran-pwr) \
-__PMC_EV(P6, BUS_TRANS_P, p6-bus-trans-p) \
-__PMC_EV(P6, BUS_TRANS_IO, p6-bus-trans-io) \
-__PMC_EV(P6, BUS_TRAN_DEF, p6-bus-tran-def) \
-__PMC_EV(P6, BUS_TRAN_BURST, p6-bus-tran-burst) \
-__PMC_EV(P6, BUS_TRAN_ANY, p6-bus-tran-any) \
-__PMC_EV(P6, BUS_TRAN_MEM, p6-bus-tran-mem) \
-__PMC_EV(P6, BUS_DATA_RCV, p6-bus-data-rcv) \
-__PMC_EV(P6, BUS_BNR_DRV, p6-bus-bnr-drv) \
-__PMC_EV(P6, BUS_HIT_DRV, p6-bus-hit-drv) \
-__PMC_EV(P6, BUS_HITM_DRV, p6-bus-hitm-drv) \
-__PMC_EV(P6, BUS_SNOOP_STALL, p6-bus-snoop-stall) \
-__PMC_EV(P6, FLOPS, p6-flops) \
-__PMC_EV(P6, FP_COMPS_OPS_EXE, p6-fp-comps-ops-exe) \
-__PMC_EV(P6, FP_ASSIST, p6-fp-assist) \
-__PMC_EV(P6, MUL, p6-mul) \
-__PMC_EV(P6, DIV, p6-div) \
-__PMC_EV(P6, CYCLES_DIV_BUSY, p6-cycles-div-busy) \
-__PMC_EV(P6, LD_BLOCKS, p6-ld-blocks) \
-__PMC_EV(P6, SB_DRAINS, p6-sb-drains) \
-__PMC_EV(P6, MISALIGN_MEM_REF, p6-misalign-mem-ref) \
-__PMC_EV(P6, EMON_KNI_PREF_DISPATCHED, p6-emon-kni-pref-dispatched) \
-__PMC_EV(P6, EMON_KNI_PREF_MISS, p6-emon-kni-pref-miss) \
-__PMC_EV(P6, INST_RETIRED, p6-inst-retired) \
-__PMC_EV(P6, UOPS_RETIRED, p6-uops-retired) \
-__PMC_EV(P6, INST_DECODED, p6-inst-decoded) \
-__PMC_EV(P6, EMON_KNI_INST_RETIRED, p6-emon-kni-inst-retired) \
-__PMC_EV(P6, EMON_KNI_COMP_INST_RET, p6-emon-kni-comp-inst-ret) \
-__PMC_EV(P6, HW_INT_RX, p6-hw-int-rx) \
-__PMC_EV(P6, CYCLES_INT_MASKED, p6-cycles-int-masked) \
-__PMC_EV(P6, CYCLES_INT_PENDING_AND_MASKED, \
- p6-cycles-in-pending-and-masked) \
-__PMC_EV(P6, BR_INST_RETIRED, p6-br-inst-retired) \
-__PMC_EV(P6, BR_MISS_PRED_RETIRED, p6-br-miss-pred-retired) \
-__PMC_EV(P6, BR_TAKEN_RETIRED, p6-br-taken-retired) \
-__PMC_EV(P6, BR_MISS_PRED_TAKEN_RET, p6-br-miss-pred-taken-ret) \
-__PMC_EV(P6, BR_INST_DECODED, p6-br-inst-decoded) \
-__PMC_EV(P6, BTB_MISSES, p6-btb-misses) \
-__PMC_EV(P6, BR_BOGUS, p6-br-bogus) \
-__PMC_EV(P6, BACLEARS, p6-baclears) \
-__PMC_EV(P6, RESOURCE_STALLS, p6-resource-stalls) \
-__PMC_EV(P6, PARTIAL_RAT_STALLS, p6-partial-rat-stalls) \
-__PMC_EV(P6, SEGMENT_REG_LOADS, p6-segment-reg-loads) \
-__PMC_EV(P6, CPU_CLK_UNHALTED, p6-cpu-clk-unhalted) \
-__PMC_EV(P6, MMX_INSTR_EXEC, p6-mmx-instr-exec) \
-__PMC_EV(P6, MMX_SAT_INSTR_EXEC, p6-mmx-sat-instr-exec) \
-__PMC_EV(P6, MMX_UOPS_EXEC, p6-mmx-uops-exec) \
-__PMC_EV(P6, MMX_INSTR_TYPE_EXEC, p6-mmx-instr-type-exec) \
-__PMC_EV(P6, FP_MMX_TRANS, p6-fp-mmx-trans) \
-__PMC_EV(P6, MMX_ASSIST, p6-mmx-assist) \
-__PMC_EV(P6, MMX_INSTR_RET, p6-mmx-instr-ret) \
-__PMC_EV(P6, SEG_RENAME_STALLS, p6-seg-rename-stalls) \
-__PMC_EV(P6, SEG_REG_RENAMES, p6-seg-reg-renames) \
-__PMC_EV(P6, RET_SEG_RENAMES, p6-ret-seg-renames) \
-__PMC_EV(P6, EMON_EST_TRANS, p6-emon-est-trans) \
-__PMC_EV(P6, EMON_THERMAL_TRIP, p6-emon-thermal-trip) \
-__PMC_EV(P6, BR_INST_EXEC, p6-br-inst-exec) \
-__PMC_EV(P6, BR_MISSP_EXEC, p6-br-missp-exec) \
-__PMC_EV(P6, BR_BAC_MISSP_EXEC, p6-br-bac-missp-exec) \
-__PMC_EV(P6, BR_CND_EXEC, p6-br-cnd-exec) \
-__PMC_EV(P6, BR_CND_MISSP_EXEC, p6-br-cnd-missp-exec) \
-__PMC_EV(P6, BR_IND_EXEC, p6-br-ind-exec) \
-__PMC_EV(P6, BR_IND_MISSP_EXEC, p6-br-ind-missp-exec) \
-__PMC_EV(P6, BR_RET_EXEC, p6-br-ret-exec) \
-__PMC_EV(P6, BR_RET_MISSP_EXEC, p6-br-ret-missp-exec) \
-__PMC_EV(P6, BR_RET_BAC_MISSP_EXEC, p6-br-ret-bac-missp-exec) \
-__PMC_EV(P6, BR_CALL_EXEC, p6-br-call-exec) \
-__PMC_EV(P6, BR_CALL_MISSP_EXEC, p6-br-call-missp-exec) \
-__PMC_EV(P6, BR_IND_CALL_EXEC, p6-br-ind-call-exec) \
-__PMC_EV(P6, EMON_SIMD_INSTR_RETIRED, p6-emon-simd-instr-retired) \
-__PMC_EV(P6, EMON_SYNCH_UOPS, p6-emon-synch-uops) \
-__PMC_EV(P6, EMON_ESP_UOPS, p6-emon-esp-uops) \
-__PMC_EV(P6, EMON_FUSED_UOPS_RET, p6-emon-fused-uops-ret) \
-__PMC_EV(P6, EMON_UNFUSION, p6-emon-unfusion) \
-__PMC_EV(P6, EMON_PREF_RQSTS_UP, p6-emon-pref-rqsts-up) \
-__PMC_EV(P6, EMON_PREF_RQSTS_DN, p6-emon-pref-rqsts-dn) \
-__PMC_EV(P6, EMON_SSE_SSE2_INST_RETIRED, \
- p6-emon-sse-sse2-inst-retired) \
-__PMC_EV(P6, EMON_SSE_SSE2_COMP_INST_RETIRED, \
- p6-emon-sse-sse2-comp-inst-retired)
-
-
-#define PMC_EV_P6_FIRST PMC_EV_P6_DATA_MEM_REFS
-#define PMC_EV_P6_LAST PMC_EV_P6_EMON_SSE_SSE2_COMP_INST_RETIRED
-
-/* AMD K8 PMCs */
-
-#define __PMC_EV_K8() \
-__PMC_EV(K8, FP_DISPATCHED_FPU_OPS, k8-fp-dispatched-fpu-ops) \
-__PMC_EV(K8, FP_CYCLES_WITH_NO_FPU_OPS_RETIRED, \
- k8-fp-cycles-with-no-fpu-ops-retired) \
-__PMC_EV(K8, FP_DISPATCHED_FPU_FAST_FLAG_OPS, \
- k8-fp-dispatched-fpu-fast-flag-ops) \
-__PMC_EV(K8, LS_SEGMENT_REGISTER_LOAD, k8-ls-segment-register-load) \
-__PMC_EV(K8, LS_MICROARCHITECTURAL_RESYNC_BY_SELF_MODIFYING_CODE, \
- k8-ls-microarchitectural-resync-by-self-modifying-code) \
-__PMC_EV(K8, LS_MICROARCHITECTURAL_RESYNC_BY_SNOOP, \
- k8-ls-microarchitectural-resync-by-snoop) \
-__PMC_EV(K8, LS_BUFFER2_FULL, k8-ls-buffer2-full) \
-__PMC_EV(K8, LS_LOCKED_OPERATION, k8-ls-locked-operation) \
-__PMC_EV(K8, LS_MICROARCHITECTURAL_LATE_CANCEL, \
- k8-ls-microarchitectural-late-cancel) \
-__PMC_EV(K8, LS_RETIRED_CFLUSH_INSTRUCTIONS, \
- k8-ls-retired-cflush-instructions) \
-__PMC_EV(K8, LS_RETIRED_CPUID_INSTRUCTIONS, \
- k8-ls-retired-cpuid-instructions) \
-__PMC_EV(K8, DC_ACCESS, k8-dc-access) \
-__PMC_EV(K8, DC_MISS, k8-dc-miss) \
-__PMC_EV(K8, DC_REFILL_FROM_L2, k8-dc-refill-from-l2) \
-__PMC_EV(K8, DC_REFILL_FROM_SYSTEM, k8-dc-refill-from-system) \
-__PMC_EV(K8, DC_COPYBACK, k8-dc-copyback) \
-__PMC_EV(K8, DC_L1_DTLB_MISS_AND_L2_DTLB_HIT, \
- k8-dc-l1-dtlb-miss-and-l2-dtlb-hit) \
-__PMC_EV(K8, DC_L1_DTLB_MISS_AND_L2_DTLB_MISS, \
- k8-dc-l1-dtlb-miss-and-l2-dtlb-miss) \
-__PMC_EV(K8, DC_MISALIGNED_DATA_REFERENCE, \
- k8-dc-misaligned-data-reference) \
-__PMC_EV(K8, DC_MICROARCHITECTURAL_LATE_CANCEL, \
- k8-dc-microarchitectural-late-cancel-of-an-access) \
-__PMC_EV(K8, DC_MICROARCHITECTURAL_EARLY_CANCEL, \
- k8-dc-microarchitectural-early-cancel-of-an-access) \
-__PMC_EV(K8, DC_ONE_BIT_ECC_ERROR, k8-dc-one-bit-ecc-error) \
-__PMC_EV(K8, DC_DISPATCHED_PREFETCH_INSTRUCTIONS, \
- k8-dc-dispatched-prefetch-instructions) \
-__PMC_EV(K8, DC_DCACHE_ACCESSES_BY_LOCKS, \
- k8-dc-dcache-accesses-by-locks) \
-__PMC_EV(K8, BU_CPU_CLK_UNHALTED, k8-bu-cpu-clk-unhalted) \
-__PMC_EV(K8, BU_INTERNAL_L2_REQUEST, k8-bu-internal-l2-request) \
-__PMC_EV(K8, BU_FILL_REQUEST_L2_MISS, k8-bu-fill-request-l2-miss) \
-__PMC_EV(K8, BU_FILL_INTO_L2, k8-bu-fill-into-l2) \
-__PMC_EV(K8, IC_FETCH, k8-ic-fetch) \
-__PMC_EV(K8, IC_MISS, k8-ic-miss) \
-__PMC_EV(K8, IC_REFILL_FROM_L2, k8-ic-refill-from-l2) \
-__PMC_EV(K8, IC_REFILL_FROM_SYSTEM, k8-ic-refill-from-system) \
-__PMC_EV(K8, IC_L1_ITLB_MISS_AND_L2_ITLB_HIT, \
- k8-ic-l1-itlb-miss-and-l2-itlb-hit) \
-__PMC_EV(K8, IC_L1_ITLB_MISS_AND_L2_ITLB_MISS, \
- k8-ic-l1-itlb-miss-and-l2-itlb-miss) \
-__PMC_EV(K8, IC_MICROARCHITECTURAL_RESYNC_BY_SNOOP, \
- k8-ic-microarchitectural-resync-by-snoop) \
-__PMC_EV(K8, IC_INSTRUCTION_FETCH_STALL, \
- k8-ic-instruction-fetch-stall) \
-__PMC_EV(K8, IC_RETURN_STACK_HIT, k8-ic-return-stack-hit) \
-__PMC_EV(K8, IC_RETURN_STACK_OVERFLOW, k8-ic-return-stack-overflow) \
-__PMC_EV(K8, FR_RETIRED_X86_INSTRUCTIONS, \
- k8-fr-retired-x86-instructions) \
-__PMC_EV(K8, FR_RETIRED_UOPS, k8-fr-retired-uops) \
-__PMC_EV(K8, FR_RETIRED_BRANCHES, k8-fr-retired-branches) \
-__PMC_EV(K8, FR_RETIRED_BRANCHES_MISPREDICTED, \
- k8-fr-retired-branches-mispredicted) \
-__PMC_EV(K8, FR_RETIRED_TAKEN_BRANCHES, \
- k8-fr-retired-taken-branches) \
-__PMC_EV(K8, FR_RETIRED_TAKEN_BRANCHES_MISPREDICTED, \
- k8-fr-retired-taken-branches-mispredicted) \
-__PMC_EV(K8, FR_RETIRED_FAR_CONTROL_TRANSFERS, \
- k8-fr-retired-far-control-transfers) \
-__PMC_EV(K8, FR_RETIRED_RESYNCS, k8-fr-retired-resyncs) \
-__PMC_EV(K8, FR_RETIRED_NEAR_RETURNS, k8-fr-retired-near-returns) \
-__PMC_EV(K8, FR_RETIRED_NEAR_RETURNS_MISPREDICTED, \
- k8-fr-retired-near-returns-mispredicted) \
-__PMC_EV(K8, \
- FR_RETIRED_TAKEN_BRANCHES_MISPREDICTED_BY_ADDR_MISCOMPARE, \
- k8-fr-retired-taken-branches-mispredicted-by-addr-miscompare) \
-__PMC_EV(K8, FR_RETIRED_FPU_INSTRUCTIONS, \
- k8-fr-retired-fpu-instructions) \
-__PMC_EV(K8, FR_RETIRED_FASTPATH_DOUBLE_OP_INSTRUCTIONS, \
- k8-fr-retired-fastpath-double-op-instructions) \
-__PMC_EV(K8, FR_INTERRUPTS_MASKED_CYCLES, \
- k8-fr-interrupts-masked-cycles) \
-__PMC_EV(K8, FR_INTERRUPTS_MASKED_WHILE_PENDING_CYCLES, \
- k8-fr-interrupts-masked-while-pending-cycles) \
-__PMC_EV(K8, FR_TAKEN_HARDWARE_INTERRUPTS, \
- k8-fr-taken-hardware-interrupts) \
-__PMC_EV(K8, FR_DECODER_EMPTY, k8-fr-decoder-empty) \
-__PMC_EV(K8, FR_DISPATCH_STALLS, k8-fr-dispatch-stalls) \
-__PMC_EV(K8, FR_DISPATCH_STALL_FROM_BRANCH_ABORT_TO_RETIRE, \
- k8-fr-dispatch-stall-from-branch-abort-to-retire) \
-__PMC_EV(K8, FR_DISPATCH_STALL_FOR_SERIALIZATION, \
- k8-fr-dispatch-stall-for-serialization) \
-__PMC_EV(K8, FR_DISPATCH_STALL_FOR_SEGMENT_LOAD, \
- k8-fr-dispatch-stall-for-segment-load) \
-__PMC_EV(K8, FR_DISPATCH_STALL_WHEN_REORDER_BUFFER_IS_FULL, \
- k8-fr-dispatch-stall-when-reorder-buffer-is-full) \
-__PMC_EV(K8, \
- FR_DISPATCH_STALL_WHEN_RESERVATION_STATIONS_ARE_FULL, \
- k8-fr-dispatch-stall-when-reservation-stations-are-full) \
-__PMC_EV(K8, FR_DISPATCH_STALL_WHEN_FPU_IS_FULL, \
- k8-fr-dispatch-stall-when-fpu-is-full) \
-__PMC_EV(K8, FR_DISPATCH_STALL_WHEN_LS_IS_FULL, \
- k8-fr-dispatch-stall-when-ls-is-full) \
-__PMC_EV(K8, FR_DISPATCH_STALL_WHEN_WAITING_FOR_ALL_TO_BE_QUIET, \
- k8-fr-dispatch-stall-when-waiting-for-all-to-be-quiet) \
-__PMC_EV(K8, \
- FR_DISPATCH_STALL_WHEN_FAR_XFER_OR_RESYNC_BRANCH_PENDING, \
- k8-fr-dispatch-stall-when-far-xfer-or-resync-branch-pending) \
-__PMC_EV(K8, FR_FPU_EXCEPTIONS, k8-fr-fpu-exceptions) \
-__PMC_EV(K8, FR_NUMBER_OF_BREAKPOINTS_FOR_DR0, \
- k8-fr-number-of-breakpoints-for-dr0) \
-__PMC_EV(K8, FR_NUMBER_OF_BREAKPOINTS_FOR_DR1, \
- k8-fr-number-of-breakpoints-for-dr1) \
-__PMC_EV(K8, FR_NUMBER_OF_BREAKPOINTS_FOR_DR2, \
- k8-fr-number-of-breakpoints-for-dr2) \
-__PMC_EV(K8, FR_NUMBER_OF_BREAKPOINTS_FOR_DR3, \
- k8-fr-number-of-breakpoints-for-dr3) \
-__PMC_EV(K8, NB_MEMORY_CONTROLLER_PAGE_ACCESS_EVENT, \
- k8-nb-memory-controller-page-access-event) \
-__PMC_EV(K8, NB_MEMORY_CONTROLLER_PAGE_TABLE_OVERFLOW, \
- k8-nb-memory-controller-page-table-overflow) \
-__PMC_EV(K8, NB_MEMORY_CONTROLLER_DRAM_COMMAND_SLOTS_MISSED, \
- k8-nb-memory-controller-dram-slots-missed) \
-__PMC_EV(K8, NB_MEMORY_CONTROLLER_TURNAROUND, \
- k8-nb-memory-controller-turnaround) \
-__PMC_EV(K8, NB_MEMORY_CONTROLLER_BYPASS_SATURATION, \
- k8-nb-memory-controller-bypass-saturation) \
-__PMC_EV(K8, NB_SIZED_COMMANDS, k8-nb-sized-commands) \
-__PMC_EV(K8, NB_PROBE_RESULT, k8-nb-probe-result) \
-__PMC_EV(K8, NB_HT_BUS0_BANDWIDTH, k8-nb-ht-bus0-bandwidth) \
-__PMC_EV(K8, NB_HT_BUS1_BANDWIDTH, k8-nb-ht-bus1-bandwidth) \
-__PMC_EV(K8, NB_HT_BUS2_BANDWIDTH, k8-nb-ht-bus2-bandwidth)
-
-#define PMC_EV_K8_FIRST PMC_EV_K8_FP_DISPATCHED_FPU_OPS
-#define PMC_EV_K8_LAST PMC_EV_K8_NB_HT_BUS2_BANDWIDTH
-
-
-/* Intel Pentium Events */
-#define __PMC_EV_P5() \
-__PMC_EV(P5, DATA_READ, p5-data-read) \
-__PMC_EV(P5, DATA_WRITE, p5-data-write) \
-__PMC_EV(P5, DATA_TLB_MISS, p5-data-tlb-miss) \
-__PMC_EV(P5, DATA_READ_MISS, p5-data-read-miss) \
-__PMC_EV(P5, DATA_WRITE_MISS, p5-data-write-miss) \
-__PMC_EV(P5, WRITE_HIT_TO_M_OR_E_STATE_LINES, \
- p5-write-hit-to-m-or-e-state-lines) \
-__PMC_EV(P5, DATA_CACHE_LINES_WRITTEN_BACK, \
- p4-data-cache-lines-written-back) \
-__PMC_EV(P5, EXTERNAL_SNOOPS, p5-external-snoops) \
-__PMC_EV(P5, EXTERNAL_DATA_CACHE_SNOOP_HITS, \
- p5-external-data-cache-snoop-hits) \
-__PMC_EV(P5, MEMORY_ACCESSES_IN_BOTH_PIPES, \
- p5-memory-accesses-in-both-pipes) \
-__PMC_EV(P5, BANK_CONFLICTS, p5-bank-conflicts) \
-__PMC_EV(P5, MISALIGNED_DATA_OR_IO_REFERENCES, \
- p5-misaligned-data-or-io-references) \
-__PMC_EV(P5, CODE_READ, p5-code-read) \
-__PMC_EV(P5, CODE_TLB_MISS, p5-code-tlb-miss) \
-__PMC_EV(P5, CODE_CACHE_MISS, p5-code-cache-miss) \
-__PMC_EV(P5, ANY_SEGMENT_REGISTER_LOADED, \
- p5-any-segment-register-loaded) \
-__PMC_EV(P5, BRANCHES, p5-branches) \
-__PMC_EV(P5, BTB_HITS, p5-btb-hits) \
-__PMC_EV(P5, TAKEN_BRANCH_OR_BTB_HIT, \
- p5-taken-branch-or-btb-hit) \
-__PMC_EV(P5, PIPELINE_FLUSHES, p5-pipeline-flushes) \
-__PMC_EV(P5, INSTRUCTIONS_EXECUTED, p5-instructions-executed) \
-__PMC_EV(P5, INSTRUCTIONS_EXECUTED_V_PIPE, \
- p5-instructions-executed-v-pipe) \
-__PMC_EV(P5, BUS_CYCLE_DURATION, p5-bus-cycle-duration) \
-__PMC_EV(P5, WRITE_BUFFER_FULL_STALL_DURATION, \
- p5-write-buffer-full-stall-duration) \
-__PMC_EV(P5, WAITING_FOR_DATA_MEMORY_READ_STALL_DURATION, \
- p5-waiting-for-data-memory-read-stall-duration) \
-__PMC_EV(P5, STALL_ON_WRITE_TO_AN_E_OR_M_STATE_LINE, \
- p5-stall-on-write-to-an-e-or-m-state-line) \
-__PMC_EV(P5, LOCKED_BUS_CYCLE, p5-locked-bus-cycle) \
-__PMC_EV(P5, IO_READ_OR_WRITE_CYCLE, p5-io-read-or-write-cycle) \
-__PMC_EV(P5, NONCACHEABLE_MEMORY_READS, \
- p5-noncacheable-memory-reads) \
-__PMC_EV(P5, PIPELINE_AGI_STALLS, p5-pipeline-agi-stalls) \
-__PMC_EV(P5, FLOPS, p5-flops) \
-__PMC_EV(P5, BREAKPOINT_MATCH_ON_DR0_REGISTER, \
- p5-breakpoint-match-on-dr0-register) \
-__PMC_EV(P5, BREAKPOINT_MATCH_ON_DR1_REGISTER, \
- p5-breakpoint-match-on-dr1-register) \
-__PMC_EV(P5, BREAKPOINT_MATCH_ON_DR2_REGISTER, \
- p5-breakpoint-match-on-dr2-register) \
-__PMC_EV(P5, BREAKPOINT_MATCH_ON_DR3_REGISTER, \
- p5-breakpoint-match-on-dr3-register) \
-__PMC_EV(P5, HARDWARE_INTERRUPTS, p5-hardware-interrupts) \
-__PMC_EV(P5, DATA_READ_OR_WRITE, p5-data-read-or-write) \
-__PMC_EV(P5, DATA_READ_MISS_OR_WRITE_MISS, \
- p5-data-read-miss-or-write-miss) \
-__PMC_EV(P5, BUS_OWNERSHIP_LATENCY, p5-bus-ownership-latency) \
-__PMC_EV(P5, BUS_OWNERSHIP_TRANSFERS, p5-bus-ownership-transfers) \
-__PMC_EV(P5, MMX_INSTRUCTIONS_EXECUTED_U_PIPE, \
- p5-mmx-instructions-executed-u-pipe) \
-__PMC_EV(P5, MMX_INSTRUCTIONS_EXECUTED_V_PIPE, \
- p5-mmx-instructions-executed-v-pipe) \
-__PMC_EV(P5, CACHE_M_LINE_SHARING, p5-cache-m-line-sharing) \
-__PMC_EV(P5, CACHE_LINE_SHARING, p5-cache-line-sharing) \
-__PMC_EV(P5, EMMS_INSTRUCTIONS_EXECUTED, \
- p5-emms-instructions-executed) \
-__PMC_EV(P5, TRANSITIONS_BETWEEN_MMX_AND_FP_INSTRUCTIONS, \
- p5-transitions-between-mmx-and-fp-instructions) \
-__PMC_EV(P5, BUS_UTILIZATION_DUE_TO_PROCESSOR_ACTIVITY, \
- p5-bus-utilization-due-to-processor-activity) \
-__PMC_EV(P5, WRITES_TO_NONCACHEABLE_MEMORY, \
- p5-writes-to-noncacheable-memory) \
-__PMC_EV(P5, SATURATING_MMX_INSTRUCTIONS_EXECUTED, \
- p5-saturating-mmx-instructions-executed) \
-__PMC_EV(P5, SATURATIONS_PERFORMED, p5-saturations-performed) \
-__PMC_EV(P5, NUMBER_OF_CYCLES_NOT_IN_HALT_STATE, \
- p5-number-of-cycles-not-in-halt-state) \
-__PMC_EV(P5, DATA_CACHE_TLB_MISS_STALL_DURATION, \
- p5-data-cache-tlb-miss-stall-duration) \
-__PMC_EV(P5, MMX_INSTRUCTION_DATA_READS, \
- p5-mmx-instruction-data-reads) \
-__PMC_EV(P5, MMX_INSTRUCTION_DATA_READ_MISSES, \
- p5-mmx-instruction-data-read-misses) \
-__PMC_EV(P5, FLOATING_POINT_STALLS_DURATION, \
- p5-floating-point-stalls-duration) \
-__PMC_EV(P5, TAKEN_BRANCHES, p5-taken-branches) \
-__PMC_EV(P5, D1_STARVATION_AND_FIFO_IS_EMPTY, \
- p5-d1-starvation-and-fifo-is-empty) \
-__PMC_EV(P5, D1_STARVATION_AND_ONLY_ONE_INSTRUCTION_IN_FIFO, \
- p5-d1-starvation-and-only-instruction-in-fifo) \
-__PMC_EV(P5, MMX_INSTRUCTION_DATA_WRITES, \
- p5-mmx-instruction-data-writes) \
-__PMC_EV(P5, MMX_INSTRUCTION_DATA_WRITE_MISSES, \
- p5-mmx-instruction-data-write-misses) \
-__PMC_EV(P5, PIPELINE_FLUSHES_DUE_TO_WRONG_BRANCH_PREDICTIONS, \
- p5-pipeline-flushes-due-to-wrong-branch-predictions) \
-__PMC_EV(P5, \
- PIPELINE_FLUSHES_DUE_TO_WRONG_BRANCH_PREDICTIONS_RESOLVED_IN_WB_STAGE, \
- p5-pipeline-flushes-due-to-wrong-branch-predictions-resolved-in-wb-stage) \
-__PMC_EV(P5, MISALIGNED_DATA_MEMORY_REFERENCE_ON_MMX_INSTRUCTIONS, \
- p5-misaligned-data-memory-reference-on-mmx-instructions) \
-__PMC_EV(P5, PIPELINE_STALL_FOR_MMX_INSTRUCTION_DATA_MEMORY_READS, \
- p5-pipeline-stall-for-mmx-instruction-data-memory-reads) \
-__PMC_EV(P5, MISPREDICTED_OR_UNPREDICTED_RETURNS, \
- p5-mispredicted-or-unpredicted-returns) \
-__PMC_EV(P5, PREDICTED_RETURNS, p5-predicted-returns) \
-__PMC_EV(P5, MMX_MULTIPLY_UNIT_INTERLOCK, \
- p5-mmx-multiply-unit-interlock) \
-__PMC_EV(P5, MOVD_MOVQ_STORE_STALL_DUE_TO_PREVIOUS_MMX_OPERATION, \
- p5-movd-movq-store-stall-due-to-previous-mmx-operation) \
-__PMC_EV(P5, RETURNS, p5-returns) \
-__PMC_EV(P5, BTB_FALSE_ENTRIES, p5-btb-false-entries) \
-__PMC_EV(P5, BTB_MISS_PREDICTION_ON_NOT_TAKEN_BRANCH, \
- p5-btb-miss-prediction-on-not-taken-branch) \
-__PMC_EV(P5, \
- FULL_WRITE_BUFFER_STALL_DURATION_WHILE_EXECUTING_MMX_INSTRUCTIONS, \
- p5-full-write-buffer-stall-duration-while-executing-mmx-instructions) \
-__PMC_EV(P5, STALL_ON_MMX_INSTRUCTION_WRITE_TO_E_OR_M_STATE_LINE, \
- p5-stall-on-mmx-instruction-write-to-e-o-m-state-line)
-
-#define PMC_EV_P5_FIRST PMC_EV_P5_DATA_READ
-#define PMC_EV_P5_LAST \
- PMC_EV_P5_STALL_ON_MMX_INSTRUCTION_WRITE_TO_E_OR_M_STATE_LINE
-
-/* timestamp counters. */
-#define __PMC_EV_TSC() \
- __PMC_EV(TSC, TSC, tsc)
-
-/* All known PMC events */
-#define __PMC_EVENTS() \
- __PMC_EV_TSC() \
- __PMC_EV_K7() \
- __PMC_EV_P6() \
- __PMC_EV_P4() \
- __PMC_EV_K8() \
- __PMC_EV_P5() \
-
-
-
-enum pmc_event {
-#undef __PMC_EV
-#define __PMC_EV(C,N,D) PMC_EV_ ## C ## _ ## N ,
- __PMC_EVENTS()
-};
-
-#define PMC_EVENT_FIRST PMC_EV_TSC_TSC
-#define PMC_EVENT_LAST PMC_EV_P5_LAST
-
-/*
* Counter capabilities
*
* __PMC_CAPS(NAME, VALUE, DESCRIPTION)
@@ -733,6 +241,21 @@ enum pmc_caps
#define PMC_CAP_LAST PMC_CAP_CASCADE
/*
+ * PMC Event Numbers
+ *
+ * These are generated from the definitions in "dev/hwpmc/pmc_events.h".
+ */
+
+enum pmc_event {
+#undef __PMC_EV
+#define __PMC_EV(C,N,D) PMC_EV_ ## C ## _ ## N ,
+ __PMC_EVENTS()
+};
+
+#define PMC_EVENT_FIRST PMC_EV_TSC_TSC
+#define PMC_EVENT_LAST PMC_EV_P5_LAST
+
+/*
* PMC SYSCALL INTERFACE
*/
@@ -743,6 +266,7 @@ enum pmc_caps
#define __PMC_OPS() \
__PMC_OP(CONFIGURELOG, "Set log file") \
+ __PMC_OP(FLUSHLOG, "Flush log file") \
__PMC_OP(GETCPUINFO, "Get system CPU information") \
__PMC_OP(GETDRIVERSTATS, "Get driver statistics") \
__PMC_OP(GETMODULEVERSION, "Get module version") \
@@ -751,13 +275,13 @@ enum pmc_caps
__PMC_OP(PMCALLOCATE, "Allocate and configure a PMC") \
__PMC_OP(PMCATTACH, "Attach a PMC to a process") \
__PMC_OP(PMCDETACH, "Detach a PMC from a process") \
+ __PMC_OP(PMCGETMSR, "Get a PMC's hardware address") \
__PMC_OP(PMCRELEASE, "Release a PMC") \
__PMC_OP(PMCRW, "Read/Set a PMC") \
__PMC_OP(PMCSETCOUNT, "Set initial count/sampling rate") \
__PMC_OP(PMCSTART, "Start a PMC") \
__PMC_OP(PMCSTOP, "Start a PMC") \
- __PMC_OP(WRITELOG, "Write a log file entry") \
- __PMC_OP(PMCX86GETMSR, "(x86 architectures) retrieve MSR")
+ __PMC_OP(WRITELOG, "Write a cookie to the log file")
enum pmc_ops {
#undef __PMC_OP
@@ -772,11 +296,17 @@ enum pmc_ops {
#define PMC_F_FORCE 0x00000001 /*OP ADMIN force operation */
#define PMC_F_DESCENDANTS 0x00000002 /*OP ALLOCATE track descendants */
-#define PMC_F_LOG_TC_CSW 0x00000004 /*OP ALLOCATE track ctx switches */
-#define PMC_F_LOG_TC_PROCEXIT 0x00000008 /*OP ALLOCATE log proc exits */
+#define PMC_F_LOG_PROCCSW 0x00000004 /*OP ALLOCATE track ctx switches */
+#define PMC_F_LOG_PROCEXIT 0x00000008 /*OP ALLOCATE log proc exits */
#define PMC_F_NEWVALUE 0x00000010 /*OP RW write new value */
#define PMC_F_OLDVALUE 0x00000020 /*OP RW get old value */
+#define PMC_F_KGMON 0x00000040 /*OP ALLOCATE kgmon(8) profiling */
+
+/* internal flags */
#define PMC_F_ATTACHED_TO_OWNER 0x00010000 /*attached to owner*/
+#define PMC_F_NEEDS_LOGFILE 0x00020000 /*needs log file */
+#define PMC_F_ATTACH_DONE 0x00040000 /*attached at least once */
+#define PMC_F_IS_STALLED 0x00080000 /*sampling is stalled*/
/*
* Cookies used to denote allocated PMCs, and the values of PMCs.
@@ -834,22 +364,7 @@ struct pmc_op_pmcallocate {
enum pmc_mode pm_mode; /* desired mode */
pmc_id_t pm_pmcid; /* [return] process pmc id */
- /*
- * Machine dependent extensions
- */
-
-#if __i386__
- uint32_t pm_config1;
- uint32_t pm_config2;
-#define pm_amd_config pm_config1
-#define pm_p4_cccrconfig pm_config1
-#define pm_p4_escrconfig pm_config2
-#define pm_p6_config pm_config1
-
-#elif __amd64__
- uint32_t pm_k8_config;
-#define pm_amd_config pm_k8_config
-#endif
+ union pmc_md_op_pmcallocate pm_md; /* MD layer extensions */
};
/*
@@ -973,8 +488,12 @@ struct pmc_op_configurelog {
struct pmc_op_getdriverstats {
int pm_intr_ignored; /* #interrupts ignored */
int pm_intr_processed; /* #interrupts processed */
+ int pm_intr_bufferfull; /* #interrupts with ENOSPC */
int pm_syscalls; /* #syscalls */
int pm_syscall_errors; /* #syscalls with errors */
+ int pm_buffer_requests; /* #buffer requests */
+ int pm_buffer_requests_failed; /* #failed buffer requests */
+ int pm_log_sweeps; /* #sample buffer processing passes */
};
/*
@@ -987,32 +506,50 @@ struct pmc_op_simple {
pmc_id_t pm_pmcid;
};
-#if __i386__ || __amd64__
+/*
+ * OP WRITELOG
+ *
+ * Flush the current log buffer and write 4 bytes of user data to it.
+ */
+
+struct pmc_op_writelog {
+ uint32_t pm_userdata;
+};
/*
- * OP X86_GETMSR
+ * OP GETMSR
*
- * Retrieve the model specific register assoicated with the
- * allocated PMC. This number can be used subsequently with
- * RDPMC instructions.
+ * Retrieve the machine specific address assoicated with the allocated
+ * PMC. This number can be used subsequently with a read-performance-counter
+ * instruction.
*/
-struct pmc_op_x86_getmsr {
- uint32_t pm_msr; /* MSR for the PMC */
+struct pmc_op_getmsr {
+ uint32_t pm_msr; /* machine specific address */
pmc_id_t pm_pmcid; /* allocated pmc id */
};
-#endif
-
#ifdef _KERNEL
#include <sys/malloc.h>
#include <sys/sysctl.h>
-#define PMC_REQUEST_POOL_SIZE 128
+#define PMC_REQUEST_POOL_SIZE 32
#define PMC_HASH_SIZE 16
-#define PMC_PCPU_BUFFER_SIZE 4096
#define PMC_MTXPOOL_SIZE 32
+#define PMC_LOG_BUFFER_SIZE 4
+#define PMC_NLOGBUFFERS 16
+#define PMC_NSAMPLES 16
+
+#define PMC_SYSCTL_NAME_PREFIX "kern." PMC_MODULE_NAME "."
+
+/*
+ * Locking keys
+ *
+ * (b) - pmc_bufferlist_mtx (spin lock)
+ * (k) - pmc_kthread_mtx (sleep lock)
+ * (o) - po->po_mtx (spin lock)
+ */
/*
* PMC commands
@@ -1035,7 +572,7 @@ struct pmc_syscall_args {
*/
struct pmc_descr {
- const char pd_name[PMC_NAME_MAX]; /* name */
+ char pd_name[PMC_NAME_MAX]; /* name */
uint32_t pd_caps; /* capabilities */
enum pmc_class pd_class; /* class of the PMC */
uint32_t pd_width; /* width in bits */
@@ -1077,7 +614,8 @@ struct pmc_target {
*/
struct pmc {
- LIST_HEAD(,pmc_target) pm_targets; /* list of target processes */
+ LIST_HEAD(,pmc_target) pm_targets; /* list of target processes */
+ LIST_ENTRY(pmc) pm_next; /* owner's list */
/*
* System-wide PMCs are allocated on a CPU and are not moved
@@ -1123,36 +661,7 @@ struct pmc {
pmc_id_t pm_id; /* allocated PMC id */
/* md extensions */
-#if __i386__
- union {
- /* AMD Athlon counters */
- struct {
- uint32_t pm_amd_evsel;
- } pm_amd;
-
- /* Intel P4 counters */
- struct {
- uint32_t pm_p4_cccrvalue;
- uint32_t pm_p4_escrvalue;
- uint32_t pm_p4_escr;
- uint32_t pm_p4_escrmsr;
- } pm_p4;
-
- /* Intel P6 counters */
- struct {
- uint32_t pm_p6_evsel;
- } pm_p6;
- } pm_md;
-
-#elif __amd64__
- union {
- /* AMD Athlon counters */
- struct {
- uint32_t pm_amd_evsel;
- } pm_amd;
- } pm_md;
-
-#endif
+ union pmc_md_pmc pm_md;
};
/*
@@ -1165,17 +674,6 @@ struct pmc {
#define PMC_TO_CPU(P) PMC_ID_TO_CPU((P)->pm_id)
/*
- * struct pmc_list
- *
- * Describes a list of PMCs.
- */
-
-struct pmc_list {
- LIST_ENTRY(pmc_list) pl_next;
- struct pmc *pl_pmc; /* PMC descriptor */
-};
-
-/*
* struct pmc_process
*
* Record a 'target' process being profiled.
@@ -1222,15 +720,22 @@ struct pmc_process {
*/
struct pmc_owner {
- LIST_ENTRY(pmc_owner) po_next; /* hash chain */
- LIST_HEAD(, pmc_list) po_pmcs; /* list of owned PMCs */
- uint32_t po_flags; /* flags PMC_PO_* */
- struct proc *po_owner; /* owner proc */
- int po_logfd; /* XXX for now */
+ LIST_ENTRY(pmc_owner) po_next; /* hash chain */
+ LIST_ENTRY(pmc_owner) po_ssnext; /* list of SS PMC owners */
+ LIST_HEAD(, pmc) po_pmcs; /* owned PMC list */
+ TAILQ_HEAD(, pmclog_buffer) po_logbuffers; /* (o) logbuffer list */
+ struct mtx po_mtx; /* spin lock for (o) */
+ struct proc *po_owner; /* owner proc */
+ uint32_t po_flags; /* (k) flags PMC_PO_* */
+ struct proc *po_kthread; /* (k) helper kthread */
+ struct pmclog_buffer *po_curbuf; /* current log buffer */
+ struct file *po_file; /* file reference */
+ int po_error; /* recorded error */
+ int po_sscount; /* # SS PMCs owned */
};
-#define PMC_PO_HAS_TS_PMC 0x00000001
-#define PMC_PO_OWNS_LOGFILE 0x00000002
+#define PMC_PO_OWNS_LOGFILE 0x00000001 /* has a log file */
+#define PMC_PO_IN_FLUSH 0x00000010 /* in the middle of a flush */
/*
* struct pmc_hw -- describe the state of the PMC hardware
@@ -1275,6 +780,27 @@ struct pmc_hw {
#define PMC_PHW_FLAG_IS_SHAREABLE (PMC_PHW_FLAGS_TO_STATE(0x02))
/*
+ * struct pmc_sample
+ *
+ * Space for N (tunable) PC samples and associated control data.
+ */
+
+struct pmc_sample {
+ uintfptr_t ps_pc; /* PC value at interrupt */
+ struct pmc *ps_pmc; /* interrupting PMC */
+ int ps_usermode; /* true for user mode PCs */
+ pid_t ps_pid; /* process PID or -1 */
+};
+
+struct pmc_samplebuffer {
+ struct pmc_sample * volatile ps_read; /* read pointer */
+ struct pmc_sample * volatile ps_write; /* write pointer */
+ struct pmc_sample *ps_fence; /* one beyond ps_samples[] */
+ struct pmc_sample ps_samples[]; /* array of sample entries */
+};
+
+
+/*
* struct pmc_cpustate
*
* A CPU is modelled as a collection of HW PMCs with space for additional
@@ -1283,6 +809,7 @@ struct pmc_hw {
struct pmc_cpu {
uint32_t pc_state; /* physical cpu number + flags */
+ struct pmc_samplebuffer *pc_sb; /* space for samples */
struct pmc_hw *pc_hwpmcs[]; /* 'npmc' pointers */
/* other machine dependent fields come here */
};
@@ -1352,10 +879,7 @@ struct pmc_mdep {
int (*pmd_describe)(int _cpu, int _ri, struct pmc_info *_pi,
struct pmc **_ppmc);
- /* Machine dependent methods */
-#if __i386__ || __amd64__
int (*pmd_get_msr)(int _ri, uint32_t *_msr);
-#endif
};
@@ -1365,36 +889,46 @@ struct pmc_mdep {
*/
extern struct pmc_cpu **pmc_pcpu;
+extern struct pmc_mdep *md;
/* driver statistics */
extern struct pmc_op_getdriverstats pmc_stats;
#if DEBUG
-/* debug flags */
-extern unsigned int pmc_debugflags; /* [Maj:12bits] [Min:16bits] [level:4] */
+/* debug flags, major flag groups */
+struct pmc_debugflags {
+ int pdb_CPU;
+ int pdb_CSW;
+ int pdb_LOG;
+ int pdb_MDP;
+ int pdb_MOD;
+ int pdb_OWN;
+ int pdb_PMC;
+ int pdb_PRC;
+ int pdb_SAM;
+};
-#define PMC_DEBUG_DEFAULT_FLAGS 0
-#define PMC_DEBUG_STRSIZE 128
+extern struct pmc_debugflags pmc_debugflags;
-#define __PMCDFMAJ(M) (1 << (PMC_DEBUG_MAJ_##M+20))
-#define __PMCDFMIN(M) (1 << (PMC_DEBUG_MIN_##M+4))
+#define PMC_DEBUG_STRSIZE 128
+#define PMC_DEBUG_DEFAULT_FLAGS { 0, 0, 0, 0, 0, 0, 0, 0 }
-#define __PMCDF(M,N) (__PMCDFMAJ(M) | __PMCDFMIN(N))
#define PMCDBG(M,N,L,F,...) do { \
- if (((pmc_debugflags & __PMCDF(M,N)) == __PMCDF(M,N)) && \
- ((pmc_debugflags & 0xF) > (L))) \
- printf(#M ":" #N ": " F "\n", __VA_ARGS__); \
+ if (pmc_debugflags.pdb_ ## M & PMC_DEBUG_MIN_ ## N) \
+ printf(#M ":" #N ":" #L ": " F "\n", __VA_ARGS__); \
} while (0)
/* Major numbers */
-#define PMC_DEBUG_MAJ_MOD 0 /* misc module infrastructure */
-#define PMC_DEBUG_MAJ_PMC 1 /* pmc management */
-#define PMC_DEBUG_MAJ_CTX 2 /* context switches */
-#define PMC_DEBUG_MAJ_OWN 3 /* owner */
-#define PMC_DEBUG_MAJ_PRC 4 /* processes */
-#define PMC_DEBUG_MAJ_MDP 5 /* machine dependent */
-#define PMC_DEBUG_MAJ_CPU 6 /* cpu switches */
+#define PMC_DEBUG_MAJ_CPU 0 /* cpu switches */
+#define PMC_DEBUG_MAJ_CSW 1 /* context switches */
+#define PMC_DEBUG_MAJ_LOG 2 /* logging */
+#define PMC_DEBUG_MAJ_MDP 3 /* machine dependent */
+#define PMC_DEBUG_MAJ_MOD 4 /* misc module infrastructure */
+#define PMC_DEBUG_MAJ_OWN 5 /* owner */
+#define PMC_DEBUG_MAJ_PMC 6 /* pmc management */
+#define PMC_DEBUG_MAJ_PRC 7 /* processes */
+#define PMC_DEBUG_MAJ_SAM 8 /* sampling */
/* Minor numbers */
@@ -1420,6 +954,7 @@ extern unsigned int pmc_debugflags; /* [Maj:12bits] [Min:16bits] [level:4] */
#define PMC_DEBUG_MIN_EXC 11 /* process exec */
#define PMC_DEBUG_MIN_FRK 12 /* process fork */
#define PMC_DEBUG_MIN_ATT 13 /* attach/detach */
+#define PMC_DEBUG_MIN_SIG 14 /* signalling */
/* CONTEXT SWITCHES */
#define PMC_DEBUG_MIN_SWI 8 /* switch in */
@@ -1441,6 +976,12 @@ extern unsigned int pmc_debugflags; /* [Maj:12bits] [Min:16bits] [level:4] */
#define PMC_DEBUG_MIN_BND 8 /* bind */
#define PMC_DEBUG_MIN_SEL 9 /* select */
+/* LOG */
+#define PMC_DEBUG_MIN_GTB 8 /* get buf */
+#define PMC_DEBUG_MIN_SIO 9 /* schedule i/o */
+#define PMC_DEBUG_MIN_FLS 10 /* flush */
+#define PMC_DEBUG_MIN_SAM 11 /* sample */
+
#else
#define PMCDBG(M,N,L,F,...) /* nothing */
#endif
@@ -1452,9 +993,10 @@ MALLOC_DECLARE(M_PMC);
* Functions
*/
-void pmc_update_histogram(struct pmc_hw *phw, uintptr_t pc);
-void pmc_send_signal(struct pmc *pmc);
-int pmc_getrowdisp(int ri);
+struct pmc_mdep *pmc_md_initialize(void); /* MD init function */
+int pmc_getrowdisp(int _ri);
+int pmc_process_interrupt(int _cpu, struct pmc *_pm, intfptr_t _pc,
+ int _usermode);
#endif /* _KERNEL */
#endif /* _SYS_PMC_H_ */
diff --git a/sys/sys/pmckern.h b/sys/sys/pmckern.h
index 7a222da..5f2e158 100644
--- a/sys/sys/pmckern.h
+++ b/sys/sys/pmckern.h
@@ -44,9 +44,6 @@
#define PMC_FN_CSW_OUT 3
#define PMC_FN_DO_SAMPLES 4
-#define PMC_FN_PROCESS_EXIT 5 /* obsolete */
-#define PMC_FN_PROCESS_FORK 6 /* obsolete */
-
/* hook */
extern int (*pmc_hook)(struct thread *_td, int _function, void *_arg);
extern int (*pmc_intr)(int _cpu, uintptr_t _pc, int _usermode);
@@ -55,7 +52,10 @@ extern int (*pmc_intr)(int _cpu, uintptr_t _pc, int _usermode);
extern struct sx pmc_sx;
/* Per-cpu flags indicating availability of sampling data */
-extern cpumask_t pmc_cpumask;
+extern volatile cpumask_t pmc_cpumask;
+
+/* Count of system-wide sampling PMCs in existence */
+extern volatile int pmc_ss_count;
/* Hook invocation; for use within the kernel */
#define PMC_CALL_HOOK(t, cmd, arg) \
@@ -92,6 +92,8 @@ do { \
(__predict_false(atomic_load_acq_int(&(p)->p_flag) & \
P_HWPMC))
+#define PMC_SYSTEM_SAMPLING_ACTIVE() (pmc_ss_count > 0)
+
/* Check if a CPU has recorded samples. */
#define PMC_CPU_HAS_SAMPLES(C) (__predict_false(pmc_cpumask & (1 << (C))))
diff --git a/sys/sys/pmclog.h b/sys/sys/pmclog.h
new file mode 100644
index 0000000..97aa2f6
--- /dev/null
+++ b/sys/sys/pmclog.h
@@ -0,0 +1,229 @@
+/*-
+ * Copyright (c) 2005 Joseph Koshy
+ * All rights reserved.
+ *
+ * Redistribution and use in source and binary forms, with or without
+ * modification, are permitted provided that the following conditions
+ * are met:
+ * 1. Redistributions of source code must retain the above copyright
+ * notice, this list of conditions and the following disclaimer.
+ * 2. Redistributions in binary form must reproduce the above copyright
+ * notice, this list of conditions and the following disclaimer in the
+ * documentation and/or other materials provided with the distribution.
+ *
+ * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
+ * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
+ * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
+ * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
+ * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
+ * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
+ * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
+ * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
+ * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
+ * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
+ * SUCH DAMAGE.
+ *
+ * $FreeBSD$
+ */
+
+#ifndef _SYS_PMCLOG_H_
+#define _SYS_PMCLOG_H_
+
+#include <sys/pmc.h>
+
+enum pmclog_type {
+ PMCLOG_TYPE_CLOSELOG,
+ PMCLOG_TYPE_DROPNOTIFY,
+ PMCLOG_TYPE_INITIALIZE,
+ PMCLOG_TYPE_MAPPINGCHANGE,
+ PMCLOG_TYPE_PCSAMPLE,
+ PMCLOG_TYPE_PMCALLOCATE,
+ PMCLOG_TYPE_PMCATTACH,
+ PMCLOG_TYPE_PMCDETACH,
+ PMCLOG_TYPE_PROCCSW,
+ PMCLOG_TYPE_PROCEXEC,
+ PMCLOG_TYPE_PROCEXIT,
+ PMCLOG_TYPE_PROCFORK,
+ PMCLOG_TYPE_SYSEXIT,
+ PMCLOG_TYPE_USERDATA
+};
+
+#define PMCLOG_MAPPING_INSERT 0x01
+#define PMCLOG_MAPPING_DELETE 0x02
+
+/*
+ * A log entry descriptor comprises of a 32 bit header and a 64 bit
+ * time stamp followed by as many 32 bit words are required to record
+ * the event.
+ *
+ * Header field format:
+ *
+ * 31 24 16 0
+ * +------------+------------+-----------------------------------+
+ * | MAGIC | TYPE | LENGTH |
+ * +------------+------------+-----------------------------------+
+ *
+ * MAGIC is the constant PMCLOG_HEADER_MAGIC.
+ * TYPE contains a value of type enum pmclog_type.
+ * LENGTH contains the length of the event record, in bytes.
+ */
+
+#define PMCLOG_ENTRY_HEADER \
+ uint32_t pl_header; \
+ uint32_t pl_ts_sec; \
+ uint32_t pl_ts_nsec;
+
+
+/*
+ * The following structures are used to describe the size of each kind
+ * of log entry to sizeof(). To keep the compiler from adding
+ * padding, the fields of each structure are aligned to their natural
+ * boundaries, and the structures are marked as 'packed'.
+ *
+ * The actual reading and writing of the log file is always in terms
+ * of 4 byte quantities.
+ */
+
+struct pmclog_closelog {
+ PMCLOG_ENTRY_HEADER
+};
+
+struct pmclog_dropnotify {
+ PMCLOG_ENTRY_HEADER
+};
+
+struct pmclog_initialize {
+ PMCLOG_ENTRY_HEADER
+ uint32_t pl_version; /* driver version */
+ uint32_t pl_cpu; /* enum pmc_cputype */
+} __packed;
+
+struct pmclog_mappingchange {
+ PMCLOG_ENTRY_HEADER
+ uint32_t pl_type;
+ uintfptr_t pl_start; /* 8 byte aligned */
+ uintfptr_t pl_end;
+ uint32_t pl_pid;
+ char pl_pathname[PATH_MAX];
+} __packed;
+
+
+struct pmclog_pcsample {
+ PMCLOG_ENTRY_HEADER
+ uint32_t pl_pid;
+ uintfptr_t pl_pc; /* 8 byte aligned */
+ uint32_t pl_pmcid;
+} __packed;
+
+struct pmclog_pmcallocate {
+ PMCLOG_ENTRY_HEADER
+ uint32_t pl_pmcid;
+ uint32_t pl_event;
+ uint32_t pl_flags;
+} __packed;
+
+struct pmclog_pmcattach {
+ PMCLOG_ENTRY_HEADER
+ uint32_t pl_pmcid;
+ uint32_t pl_pid;
+ char pl_pathname[PATH_MAX];
+} __packed;
+
+struct pmclog_pmcdetach {
+ PMCLOG_ENTRY_HEADER
+ uint32_t pl_pmcid;
+ uint32_t pl_pid;
+} __packed;
+
+struct pmclog_proccsw {
+ PMCLOG_ENTRY_HEADER
+ uint32_t pl_pmcid;
+ uint64_t pl_value; /* keep 8 byte aligned */
+ uint32_t pl_pid;
+} __packed;
+
+struct pmclog_procexec {
+ PMCLOG_ENTRY_HEADER
+ uint32_t pl_pid;
+ char pl_pathname[PATH_MAX];
+} __packed;
+
+struct pmclog_procexit {
+ PMCLOG_ENTRY_HEADER
+ uint32_t pl_pmcid;
+ uint64_t pl_value; /* keep 8 byte aligned */
+ uint32_t pl_pid;
+} __packed;
+
+struct pmclog_procfork {
+ PMCLOG_ENTRY_HEADER
+ uint32_t pl_oldpid;
+ uint32_t pl_newpid;
+} __packed;
+
+struct pmclog_sysexit {
+ PMCLOG_ENTRY_HEADER
+ uint32_t pl_pid;
+} __packed;
+
+struct pmclog_userdata {
+ PMCLOG_ENTRY_HEADER
+ uint32_t pl_userdata;
+} __packed;
+
+union pmclog_entry { /* only used to size scratch areas */
+ struct pmclog_closelog pl_cl;
+ struct pmclog_dropnotify pl_dn;
+ struct pmclog_initialize pl_i;
+ struct pmclog_pcsample pl_s;
+ struct pmclog_pmcallocate pl_a;
+ struct pmclog_pmcattach pl_t;
+ struct pmclog_pmcdetach pl_d;
+ struct pmclog_proccsw pl_c;
+ struct pmclog_procexec pl_x;
+ struct pmclog_procexit pl_e;
+ struct pmclog_procfork pl_f;
+ struct pmclog_sysexit pl_se;
+ struct pmclog_userdata pl_u;
+};
+
+#define PMCLOG_HEADER_MAGIC 0xEEU
+
+#define PMCLOG_HEADER_TO_LENGTH(H) \
+ ((H) & 0x0000FFFF)
+#define PMCLOG_HEADER_TO_TYPE(H) \
+ (((H) & 0x00FF0000) >> 16)
+#define PMCLOG_HEADER_TO_MAGIC(H) \
+ (((H) & 0xFF000000) >> 24)
+#define PMCLOG_HEADER_CHECK_MAGIC(H) \
+ (PMCLOG_HEADER_TO_MAGIC(H) == PMCLOG_HEADER_MAGIC)
+
+#ifdef _KERNEL
+
+/*
+ * Prototypes
+ */
+int pmclog_configure_log(struct pmc_owner *_po, int _logfd);
+int pmclog_deconfigure_log(struct pmc_owner *_po);
+int pmclog_flush(struct pmc_owner *_po);
+void pmclog_initialize(void);
+void pmclog_process_closelog(struct pmc_owner *po);
+void pmclog_process_dropnotify(struct pmc_owner *po);
+void pmclog_process_mappingchange(struct pmc_owner *po, pid_t pid, int type,
+ uintfptr_t start, uintfptr_t end, char *path);
+void pmclog_process_pcsample(struct pmc *_pm, struct pmc_sample *_ps);
+void pmclog_process_pmcallocate(struct pmc *_pm);
+void pmclog_process_pmcattach(struct pmc *_pm, pid_t _pid, char *_path);
+void pmclog_process_pmcdetach(struct pmc *_pm, pid_t _pid);
+void pmclog_process_proccsw(struct pmc *_pm, struct pmc_process *_pp,
+ pmc_value_t _v);
+void pmclog_process_procexec(struct pmc_owner *_po, pid_t _pid, char *_path);
+void pmclog_process_procexit(struct pmc *_pm, struct pmc_process *_pp);
+void pmclog_process_procfork(struct pmc_owner *_po, pid_t _oldpid, pid_t _newpid);
+void pmclog_process_sysexit(struct pmc_owner *_po, pid_t _pid);
+int pmclog_process_userlog(struct pmc_owner *_po,
+ struct pmc_op_writelog *_wl);
+void pmclog_shutdown(void);
+#endif /* _KERNEL */
+
+#endif /* _SYS_PMCLOG_H_ */
diff --git a/usr.sbin/pmccontrol/pmccontrol.c b/usr.sbin/pmccontrol/pmccontrol.c
index 6ffa2d1..8e52614 100644
--- a/usr.sbin/pmccontrol/pmccontrol.c
+++ b/usr.sbin/pmccontrol/pmccontrol.c
@@ -222,8 +222,8 @@ pmcc_do_list_state(void)
int c, cpu, n, npmc, ncpu;
unsigned int logical_cpus_mask;
struct pmc_info *pd;
- struct pmc_op_getpmcinfo *pi;
- const struct pmc_op_getcpuinfo *pc;
+ struct pmc_pmcinfo *pi;
+ const struct pmc_cpuinfo *pc;
if (pmc_cpuinfo(&pc) != 0)
err(EX_OSERR, "Unable to determine CPU information");
@@ -280,7 +280,7 @@ pmcc_do_list_events(void)
enum pmc_class c;
unsigned int i, j, nevents;
const char **eventnamelist;
- const struct pmc_op_getcpuinfo *ci;
+ const struct pmc_cpuinfo *ci;
if (pmc_cpuinfo(&ci) != 0)
err(EX_OSERR, "Unable to determine CPU information");
@@ -307,7 +307,7 @@ static int
pmcc_show_statistics(void)
{
- struct pmc_op_getdriverstats gms;
+ struct pmc_driverstats gms;
if (pmc_get_driver_stats(&gms) < 0)
err(EX_OSERR, "ERROR: cannot retrieve driver statistics");
@@ -316,12 +316,15 @@ pmcc_show_statistics(void)
* Print statistics.
*/
-#define PRINT(N,V) (void) printf("%20s %d\n", (N), gms.pm_##V)
-
- PRINT("interrupts-processed", intr_processed);
- PRINT("interrupts-ignored", intr_ignored);
- PRINT("system-calls", syscalls);
- PRINT("system-calls-with-errors", syscall_errors);
+#define PRINT(N,V) (void) printf("%-40s %d\n", (N), gms.pm_##V)
+ PRINT("interrupts processed:", intr_processed);
+ PRINT("non-PMC interrupts:", intr_ignored);
+ PRINT("interrupts dropped due to lack of space:", intr_bufferfull);
+ PRINT("system calls:", syscalls);
+ PRINT("system calls with errors:", syscall_errors);
+ PRINT("buffer requests:", buffer_requests);
+ PRINT("buffer requests failed:", buffer_requests_failed);
+ PRINT("sampling log sweeps:", log_sweeps);
return 0;
}
diff --git a/usr.sbin/pmcstat/pmcstat.8 b/usr.sbin/pmcstat/pmcstat.8
index 75f132b..16a2e0e 100644
--- a/usr.sbin/pmcstat/pmcstat.8
+++ b/usr.sbin/pmcstat/pmcstat.8
@@ -31,12 +31,18 @@
.Nd performance measurement with performance monitoring hardware
.Sh SYNOPSIS
.Nm
+.Op Fl D Ar pathname
.Op Fl C
+.Op Fl E
.Op Fl O Ar logfilename
.Op Fl P Ar event-spec
+.Op Fl R Ar logfilename
.Op Fl S Ar event-spec
+.Op Fl W
.Op Fl c Ar cpu
.Op Fl d
+.Op Fl g
+.Op Fl m
.Op Fl n Ar count
.Op Fl o Ar outputfile
.Op Fl p Ar event-spec
@@ -76,9 +82,9 @@ counting and sampling flavors.
The values of all counting PMCs are printed in human readable form
at regular intervals by
.Nm .
-The output of sampling PMCs is configured to go to log file, for later
-analysis by tools like
-.Xr pmcreport 8 .
+The output of sampling PMCs may be configured to go to a log file for
+subsequent offline analysis, or, at the expense of greater
+overhead, may be configured to be processed on the fly.
.Pp
Hardware events to measure are specified to
.Nm
@@ -94,23 +100,48 @@ process' current and future children.
The following options are available:
.Bl -tag -width indent
.It Fl C
-Toggle between showing cumulative and incremental counts for
+Toggle between showing cumulative or incremental counts for
subsequent counting mode PMCs specified on the command line.
The default is to show incremental counts.
+.It Fl D Ar pathname
+Create files with per-program samples in the directory named
+by
+.Ar pathname .
+The default is to create these files in the current directory.
+.It Fl E
+Toggle showing per-process counts at the time a tracked process
+exits for subsequent process-mode PMCs specified on the command line.
+This option is useful for mapping the performance characteristics of a
+complex pipeline of processes when used in conjunction with the
+.Fl d
+option.
+The default is to not to enable per-process tracking.
.It Fl O Ar logfilename
Send the output of sampling mode PMCs to
.Ar logfilename .
-The default file name is
-.Pa pmcstat.out ,
-in the current directory.
+If this option is not specified and one of the logging options
+is requested, then
+.Nm
+will print a human-readable version of the log to the configured
+output file.
.It Fl P Ar event-spec
Allocate a process mode sampling PMC measuring hardware events
specified in
.Ar event-spec .
+.It Fl R Ar logfilename
+Perform offline analysis using sampling data in file
+.Ar logfilename .
.It Fl S Ar event-spec
Allocate a system mode sampling PMC measuring hardware events
specified in
.Ar event-spec .
+.It Fl W
+Toggle logging the incremental counts seen by the threads of a
+tracked process each time they are scheduled on a CPU.
+This is an experimental feature intended to help analyse the
+dynamic behaviour of processes in the system.
+It may incur substantial overhead if enabled.
+The default is for this feature to be disabled.
.It Fl c Ar cpu
Set the cpu for subsequent system mode PMCs specified on the
command line to
@@ -119,16 +150,23 @@ The default is to allocate system mode PMCs on CPU zero.
.It Fl d
Toggle between process mode PMCs measuring events for the target
process' current and future children or only measuring events for
-the attached process.
+the target process.
The default is to measure events for the target process alone.
+.It Fl g
+Produce execution profiles in a format compatible with
+.Xr gprof 1 .
+.It Fl m
+When producing
+.Xr gprof 1
+compatible execution profiles, merge profiles across multiple
+invocations of the same executable.
.It Fl n Ar rate
Set the default sampling rate for subsequent sampling mode
PMCs specified on the command line.
The default is to configure PMCs to sample the CPU's instruction
pointer every 65536 events.
.It Fl o Ar outputfile
-Send the periodic counter output of
-.Nm
+Send counter readings and printed representations of logged data
to file
.Ar outputfile .
The default is to send output to
@@ -188,9 +226,9 @@ sometime after
.Sh AUTHORS
.An Joseph Koshy Aq jkoshy@FreeBSD.org
.Sh SEE ALSO
+.Xr gprof 1 ,
.Xr execvp 3 ,
.Xr pmc 3 ,
.Xr hwpmc 4 ,
.Xr pmccontrol 8 ,
-.Xr pmcreport 8 ,
.Xr sysctl 8
diff --git a/usr.sbin/pmcstat/pmcstat.c b/usr.sbin/pmcstat/pmcstat.c
index ee3c427..e4fc143 100644
--- a/usr.sbin/pmcstat/pmcstat.c
+++ b/usr.sbin/pmcstat/pmcstat.c
@@ -1,5 +1,5 @@
/*-
- * Copyright (c) 2003,2004 Joseph Koshy
+ * Copyright (c) 2003-2005, Joseph Koshy
* All rights reserved.
*
* Redistribution and use in source and binary forms, with or without
@@ -22,7 +22,6 @@
* LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
* OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
* SUCH DAMAGE.
- *
*/
#include <sys/cdefs.h>
@@ -42,6 +41,7 @@ __FBSDID("$FreeBSD$");
#include <limits.h>
#include <math.h>
#include <pmc.h>
+#include <pmclog.h>
#include <signal.h>
#include <stdarg.h>
#include <stdio.h>
@@ -51,26 +51,55 @@ __FBSDID("$FreeBSD$");
#include <sysexits.h>
#include <unistd.h>
+/*
+ * A given invocation of pmcstat(8) can manage multiple PMCs of both
+ * the system-wide and per-process variety. Each of these could be in
+ * 'counting mode' or in 'sampling mode'.
+ *
+ * For 'counting mode' PMCs, pmcstat(8) will periodically issue a
+ * pmc_read() at the configured time interval and print out the value
+ * of the requested PMCs.
+ *
+ * For 'sampling mode' PMCs it can log to a file for offline analysis,
+ * or can analyse sampling data "on the fly", either by converting
+ * samples to printed textual form or by creating gprof(1) compatible
+ * profiles, one per program executed. When creating gprof(1)
+ * profiles it can optionally merge entries from multiple processes
+ * for a given executable into a single profile file.
+ */
+
/* Operation modes */
#define FLAG_HAS_PID 0x00000001
#define FLAG_HAS_WAIT_INTERVAL 0x00000002
#define FLAG_HAS_LOG_FILE 0x00000004
#define FLAG_HAS_PROCESS 0x00000008
-#define FLAG_USING_SAMPLING 0x00000010
-#define FLAG_USING_COUNTING 0x00000020
-#define FLAG_USING_PROCESS_PMC 0x00000040
+#define FLAG_HAS_SAMPLING_PMCS 0x00000010
+#define FLAG_HAS_COUNTING_PMCS 0x00000020
+#define FLAG_HAS_PROCESS_PMCS 0x00000040
+#define FLAG_HAS_SYSTEM_PMCS 0x00000080
+#define FLAG_HAS_PIPE 0x00000100
+#define FLAG_PROCESS_LOGFILE 0x00000200
+#define FLAG_DO_GPROF 0x00000400
+#define FLAG_DO_GPROF_MERGED 0x00000800
#define DEFAULT_SAMPLE_COUNT 65536
#define DEFAULT_WAIT_INTERVAL 5.0
#define DEFAULT_DISPLAY_HEIGHT 23
-#define DEFAULT_LOGFILE_NAME "pmcstat.out"
+#define DEFAULT_BUFFER_SIZE 4096
+#define WRITELOG_MAGIC 0xA55AA55A
#define PRINT_HEADER_PREFIX "# "
#define READPIPEFD 0
#define WRITEPIPEFD 1
#define NPIPEFD 2
+enum pmcstat_state {
+ PMCSTAT_FINISHED = 0,
+ PMCSTAT_EXITING = 1,
+ PMCSTAT_RUNNING = 2
+};
+
struct pmcstat_ev {
STAILQ_ENTRY(pmcstat_ev) ev_next;
char *ev_spec; /* event specification */
@@ -78,7 +107,7 @@ struct pmcstat_ev {
enum pmc_mode ev_mode; /* desired mode */
int ev_count; /* associated count if in sampling mode */
int ev_cpu; /* specific cpu if requested */
- int ev_descendants; /* attach to descendants */
+ int ev_flags; /* PMC_F_* */
int ev_cumulative; /* show cumulative counts */
int ev_fieldwidth; /* print width */
int ev_fieldskip; /* #leading spaces */
@@ -87,13 +116,16 @@ struct pmcstat_ev {
};
struct pmcstat_args {
+ int pa_required;
int pa_flags;
pid_t pa_pid;
- FILE *pa_outputfile;
- FILE *pa_logfile;
- double pa_interval;
+ FILE *pa_outputfile;
+ FILE *pa_logfile;
+ void *pa_logparser;
+ char *pa_outputdir;
+ double pa_interval;
int pa_argc;
- char **pa_argv;
+ char **pa_argv;
STAILQ_HEAD(, pmcstat_ev) pa_head;
} args;
@@ -103,15 +135,22 @@ int pmcstat_pipefd[NPIPEFD];
int pmcstat_kq;
/* Function prototypes */
-void pmcstat_cleanup(struct pmcstat_args *_a);
-void pmcstat_print_counters(struct pmcstat_args *_a);
-void pmcstat_print_headers(struct pmcstat_args *_a);
-void pmcstat_print_pmcs(struct pmcstat_args *_a);
-void pmcstat_setup_process(struct pmcstat_args *_a);
-void pmcstat_show_usage(void);
-void pmcstat_start_pmcs(struct pmcstat_args *_a);
-void pmcstat_start_process(struct pmcstat_args *_a);
-
+void pmcstat_cleanup(struct pmcstat_args *_a);
+int pmcstat_close_log(struct pmcstat_args *_a);
+void pmcstat_print_counters(struct pmcstat_args *_a);
+void pmcstat_print_headers(struct pmcstat_args *_a);
+void pmcstat_print_pmcs(struct pmcstat_args *_a);
+void pmcstat_setup_process(struct pmcstat_args *_a);
+void pmcstat_show_usage(void);
+void pmcstat_start_pmcs(struct pmcstat_args *_a);
+void pmcstat_start_process(struct pmcstat_args *_a);
+void pmcstat_process_log(struct pmcstat_args *_a);
+int pmcstat_print_log(struct pmcstat_args *_a);
+
+#define PMCSTAT_PRINT_LOG(A,T,...) do { \
+ fprintf((A)->pa_outputfile, T "\t" __VA_ARGS__); \
+ fprintf((A)->pa_outputfile, "\n"); \
+ } while (0)
/*
* cleanup
@@ -123,22 +162,25 @@ pmcstat_cleanup(struct pmcstat_args *a)
struct pmcstat_ev *ev, *tmp;
/* de-configure the log file if present. */
- if (a->pa_flags & FLAG_USING_SAMPLING) {
+ if (a->pa_flags & FLAG_HAS_LOG_FILE)
(void) pmc_configure_logfile(-1);
- (void) fclose(a->pa_logfile);
- }
/* release allocated PMCs. */
STAILQ_FOREACH_SAFE(ev, &a->pa_head, ev_next, tmp)
if (ev->ev_pmcid != PMC_ID_INVALID) {
if (pmc_release(ev->ev_pmcid) < 0)
err(EX_OSERR, "ERROR: cannot release pmc "
- "%d \"%s\"", ev->ev_pmcid, ev->ev_name);
+ "0x%x \"%s\"", ev->ev_pmcid, ev->ev_name);
free(ev->ev_name);
free(ev->ev_spec);
STAILQ_REMOVE(&a->pa_head, ev, pmcstat_ev, ev_next);
free(ev);
}
+
+ if (a->pa_logparser) {
+ pmclog_close(a->pa_logparser);
+ a->pa_logparser = NULL;
+ }
}
void
@@ -151,9 +193,10 @@ pmcstat_start_pmcs(struct pmcstat_args *a)
assert(ev->ev_pmcid != PMC_ID_INVALID);
if (pmc_start(ev->ev_pmcid) < 0) {
- warn("ERROR: Cannot start pmc %d \"%s\"",
+ warn("ERROR: Cannot start pmc 0x%x \"%s\"",
ev->ev_pmcid, ev->ev_name);
pmcstat_cleanup(a);
+ exit(EX_OSERR);
}
}
@@ -255,12 +298,10 @@ pmcstat_setup_process(struct pmcstat_args *a)
struct kevent kev;
if (a->pa_flags & FLAG_HAS_PID) {
-
- STAILQ_FOREACH(ev, &args.pa_head, ev_next)
+ STAILQ_FOREACH(ev, &a->pa_head, ev_next)
if (pmc_attach(ev->ev_pmcid, a->pa_pid) != 0)
err(EX_OSERR, "ERROR: cannot attach pmc \"%s\" to "
"process %d", ev->ev_name, (int) a->pa_pid);
-
} else {
/*
@@ -269,7 +310,6 @@ pmcstat_setup_process(struct pmcstat_args *a)
* process reads its pipe for a token so that the parent
* can finish doing its pmc_attach() calls.
*/
-
if (pipe(pmcstat_pipefd) < 0)
err(EX_OSERR, "ERROR: cannot create pipe");
@@ -288,8 +328,11 @@ pmcstat_setup_process(struct pmcstat_args *a)
(void) close(pmcstat_pipefd[READPIPEFD]);
/* exec() the program requested */
- execvp(*args.pa_argv, args.pa_argv);
- err(EX_OSERR, "ERROR (child): execvp failed");
+ execvp(*a->pa_argv, a->pa_argv);
+ /* and if that fails, notify the parent */
+ kill(getppid(), SIGCHLD);
+ err(EX_OSERR, "ERROR: execvp \"%s\" failed",
+ *a->pa_argv);
/*NOTREACHED*/
default: /* parent */
@@ -307,13 +350,12 @@ pmcstat_setup_process(struct pmcstat_args *a)
}
}
- /* Ask to be notified via a kevent when the child exits */
- EV_SET(&kev, a->pa_pid, EVFILT_PROC, EV_ADD, NOTE_EXIT, 0, 0);
-
+ /* Ask to be notified via a kevent when the target process exits */
+ EV_SET(&kev, a->pa_pid, EVFILT_PROC, EV_ADD|EV_ONESHOT, NOTE_EXIT, 0,
+ NULL);
if (kevent(pmcstat_kq, &kev, 1, NULL, 0, NULL) < 0)
- err(EX_OSERR, "ERROR: cannot monitor process %d",
+ err(EX_OSERR, "ERROR: cannot monitor child process %d",
a->pa_pid);
-
return;
}
@@ -332,6 +374,147 @@ pmcstat_start_process(struct pmcstat_args *a)
(void) close(pmcstat_pipefd[WRITEPIPEFD]);
}
+
+/*
+ * Process a log file in offline analysis mode.
+ */
+
+void
+pmcstat_process_log(struct pmcstat_args *a)
+{
+ int runstate;
+
+ /*
+ * If gprof style profiles haven't been asked for, just print the
+ * log to the current output file.
+ */
+ if ((a->pa_flags & (FLAG_DO_GPROF_MERGED|FLAG_DO_GPROF)) == 0) {
+ while ((runstate = pmcstat_print_log(a)) == PMCSTAT_RUNNING)
+ ;
+ return;
+ }
+
+ /* convert the log to gprof compatible profiles */
+ assert(0); /* To be implemented */
+}
+
+/*
+ * Print log entries available in a configured parser.
+ */
+
+int
+pmcstat_print_log(struct pmcstat_args *a)
+{
+ struct pmclog_ev ev;
+
+ while (pmclog_read(a->pa_logparser, &ev) == 0) {
+ assert(ev.pl_state == PMCLOG_OK);
+ switch (ev.pl_type) {
+ case PMCLOG_TYPE_CLOSELOG:
+ PMCSTAT_PRINT_LOG(a,"close",);
+ break;
+ case PMCLOG_TYPE_DROPNOTIFY:
+ PMCSTAT_PRINT_LOG(a,"drop",);
+ break;
+ case PMCLOG_TYPE_INITIALIZE:
+ PMCSTAT_PRINT_LOG(a,"init","0x%x \"%s\"",
+ ev.pl_u.pl_i.pl_version,
+ pmc_name_of_cputype(ev.pl_u.pl_i.pl_arch));
+ break;
+ case PMCLOG_TYPE_MAPPINGCHANGE:
+ PMCSTAT_PRINT_LOG(a,"mapping","%s %d %p %p \"%s\"",
+ ev.pl_u.pl_m.pl_type == PMCLOG_MAPPING_INSERT ?
+ "insert" : "delete",
+ ev.pl_u.pl_m.pl_pid,
+ (void *) ev.pl_u.pl_m.pl_start,
+ (void *) ev.pl_u.pl_m.pl_end,
+ ev.pl_u.pl_m.pl_pathname);
+ break;
+ case PMCLOG_TYPE_PCSAMPLE:
+ PMCSTAT_PRINT_LOG(a,"sample","0x%x %d %p",
+ ev.pl_u.pl_s.pl_pmcid,
+ ev.pl_u.pl_s.pl_pid,
+ (void *) ev.pl_u.pl_s.pl_pc);
+ break;
+ case PMCLOG_TYPE_PMCALLOCATE:
+ PMCSTAT_PRINT_LOG(a,"allocate","0x%x \"%s\" 0x%x",
+ ev.pl_u.pl_a.pl_pmcid,
+ ev.pl_u.pl_a.pl_evname,
+ ev.pl_u.pl_a.pl_flags);
+ break;
+ case PMCLOG_TYPE_PMCATTACH:
+ PMCSTAT_PRINT_LOG(a,"attach","0x%x %d \"%s\"",
+ ev.pl_u.pl_t.pl_pmcid,
+ ev.pl_u.pl_t.pl_pid,
+ ev.pl_u.pl_t.pl_pathname);
+ break;
+ case PMCLOG_TYPE_PMCDETACH:
+ PMCSTAT_PRINT_LOG(a,"detach","0x%x %d",
+ ev.pl_u.pl_d.pl_pmcid,
+ ev.pl_u.pl_d.pl_pid);
+ break;
+ case PMCLOG_TYPE_PROCCSW:
+ PMCSTAT_PRINT_LOG(a,"csw","0x%x %d %jd",
+ ev.pl_u.pl_c.pl_pmcid,
+ ev.pl_u.pl_c.pl_pid,
+ ev.pl_u.pl_c.pl_value);
+ break;
+ case PMCLOG_TYPE_PROCEXEC:
+ PMCSTAT_PRINT_LOG(a,"exec","%d \"%s\"",
+ ev.pl_u.pl_x.pl_pid,
+ ev.pl_u.pl_x.pl_pathname);
+ break;
+ case PMCLOG_TYPE_PROCEXIT:
+ PMCSTAT_PRINT_LOG(a,"exitvalue","0x%x %d %jd",
+ ev.pl_u.pl_e.pl_pmcid,
+ ev.pl_u.pl_e.pl_pid,
+ ev.pl_u.pl_e.pl_value);
+ break;
+ case PMCLOG_TYPE_PROCFORK:
+ PMCSTAT_PRINT_LOG(a,"fork","%d %d",
+ ev.pl_u.pl_f.pl_oldpid,
+ ev.pl_u.pl_f.pl_newpid);
+ break;
+ case PMCLOG_TYPE_USERDATA:
+ PMCSTAT_PRINT_LOG(a,"user","0x%x",
+ ev.pl_u.pl_u.pl_userdata);
+ break;
+ case PMCLOG_TYPE_SYSEXIT:
+ PMCSTAT_PRINT_LOG(a,"exit","%d",
+ ev.pl_u.pl_se.pl_pid);
+ break;
+ default:
+ fprintf(a->pa_outputfile, "unknown %d",
+ ev.pl_type);
+ }
+ }
+
+ if (ev.pl_state == PMCLOG_EOF)
+ return PMCSTAT_FINISHED;
+ else if (ev.pl_state == PMCLOG_REQUIRE_DATA)
+ return PMCSTAT_RUNNING;
+
+ err(EX_DATAERR, "ERROR: event parsing failed "
+ "(record %jd, offset 0x%jx)",
+ (uintmax_t) ev.pl_count + 1, ev.pl_offset);
+ /*NOTREACHED*/
+}
+
+/*
+ * Close a logfile, after first flushing all in-module queued data.
+ */
+
+int
+pmcstat_close_log(struct pmcstat_args *a)
+{
+ if (pmc_flush_logfile() < 0 ||
+ pmc_configure_logfile(-1) < 0)
+ err(EX_OSERR, "ERROR: logging failed");
+ a->pa_flags &= ~FLAG_HAS_LOG_FILE;
+ return a->pa_flags & FLAG_HAS_PIPE ? PMCSTAT_EXITING :
+ PMCSTAT_FINISHED;
+}
+
void
pmcstat_show_usage(void)
{
@@ -340,16 +523,22 @@ pmcstat_show_usage(void)
"\t Measure process and/or system performance using hardware\n"
"\t performance monitoring counters.\n"
"\t Options include:\n"
- "\t -C\t\t toggle showing cumulative counts\n"
- "\t -O file\t set sampling log file to \"file\"\n"
- "\t -P spec\t allocate process-private sampling PMC\n"
- "\t -S spec\t allocate system-wide sampling PMC\n"
- "\t -c cpu\t\t set default cpu\n"
- "\t -d\t\t toggle tracking descendants\n"
+ "\t -C\t\t (toggle) show cumulative counts\n"
+ "\t -D path\t create profiles in directory \"path\"\n"
+ "\t -E\t\t (toggle) show counts at process exit\n"
+ "\t -O file\t send log output to \"file\"\n"
+ "\t -P spec\t allocate a process-private sampling PMC\n"
+ "\t -R file\t read events from \"file\"\n"
+ "\t -S spec\t allocate a system-wide sampling PMC\n"
+ "\t -W\t\t (toggle) show counts per context switch\n"
+ "\t -c cpu\t\t set cpu for subsequent system-wide PMCs\n"
+ "\t -d\t\t (toggle) track descendants\n"
+ "\t -g\t\t produce gprof(1) compatible profiles\n"
+ "\t -m\t\t merge gprof(1) profiles for executables\n"
"\t -n rate\t set sampling rate\n"
"\t -o file\t send print output to \"file\"\n"
- "\t -p spec\t allocate process-private counting PMC\n"
- "\t -s spec\t allocate system-wide counting PMC\n"
+ "\t -p spec\t allocate a process-private counting PMC\n"
+ "\t -s spec\t allocate a system-wide counting PMC\n"
"\t -t pid\t\t attach to running process with pid \"pid\"\n"
"\t -w secs\t set printing time interval"
);
@@ -365,12 +554,16 @@ main(int argc, char **argv)
double interval;
int option, npmc, ncpu;
int c, current_cpu, current_sampling_count;
- int running;
- int do_descendants, use_cumulative_counts;
+ int do_print, do_descendants;
+ int do_logproccsw, do_logprocexit;
+ int logfd;
+ int pipefd[2];
+ int use_cumulative_counts;
pid_t pid;
char *end;
+ const char *errmsg;
+ enum pmcstat_state runstate;
struct pmcstat_ev *ev;
- struct pmc_op_getpmcinfo *ppmci;
struct sigaction sa;
struct kevent kev;
struct winsize ws;
@@ -378,33 +571,60 @@ main(int argc, char **argv)
current_cpu = 0;
current_sampling_count = DEFAULT_SAMPLE_COUNT;
do_descendants = 0;
+ do_logproccsw = 0;
+ do_logprocexit = 0;
use_cumulative_counts = 0;
+ args.pa_required = 0;
args.pa_flags = 0;
args.pa_pid = (pid_t) -1;
args.pa_logfile = NULL;
+ args.pa_outputdir = NULL;
args.pa_outputfile = stderr;
args.pa_interval = DEFAULT_WAIT_INTERVAL;
STAILQ_INIT(&args.pa_head);
ev = NULL;
- while ((option = getopt(argc, argv, "CO:P:S:c:dn:o:p:s:t:w:")) != -1)
+ while ((option = getopt(argc, argv, "CD:EO:P:R:S:Wc:dgmn:o:p:s:t:w:"))
+ != -1)
switch (option) {
case 'C': /* cumulative values */
use_cumulative_counts = !use_cumulative_counts;
+ args.pa_required |= FLAG_HAS_COUNTING_PMCS;
break;
case 'c': /* CPU */
current_cpu = strtol(optarg, &end, 0);
if (*end != '\0' || current_cpu < 0)
errx(EX_USAGE,
- "ERROR: Illegal CPU number \"%s\"",
+ "ERROR: Illegal CPU number \"%s\".",
optarg);
-
+ args.pa_required |= FLAG_HAS_SYSTEM_PMCS;
break;
case 'd': /* toggle descendents */
do_descendants = !do_descendants;
+ args.pa_required |= FLAG_HAS_PROCESS_PMCS;
+ break;
+
+ case 'D':
+ args.pa_outputdir = optarg;
+ break;
+
+ case 'g': /* produce gprof compatible profiles */
+ args.pa_flags |= FLAG_DO_GPROF;
+ args.pa_required |= FLAG_HAS_SAMPLING_PMCS;
+ break;
+
+ case 'm': /* produce merged profiles */
+ args.pa_flags |= FLAG_DO_GPROF_MERGED;
+ args.pa_required |= FLAG_HAS_SAMPLING_PMCS;
+ break;
+
+ case 'E': /* log process exit */
+ do_logprocexit = !do_logprocexit;
+ args.pa_required |= (FLAG_HAS_PROCESS_PMCS |
+ FLAG_HAS_COUNTING_PMCS | FLAG_HAS_LOG_FILE);
break;
case 'p': /* process virtual counting PMC */
@@ -412,7 +632,7 @@ main(int argc, char **argv)
case 'P': /* process virtual sampling PMC */
case 'S': /* system-wide sampling PMC */
if ((ev = malloc(sizeof(*ev))) == NULL)
- errx(EX_SOFTWARE, "ERROR: Out of memory");
+ errx(EX_SOFTWARE, "ERROR: Out of memory.");
switch (option) {
case 'p': ev->ev_mode = PMC_MODE_TC; break;
@@ -421,14 +641,22 @@ main(int argc, char **argv)
case 'S': ev->ev_mode = PMC_MODE_SS; break;
}
- if (option == 'P' || option == 'p')
- args.pa_flags |= FLAG_USING_PROCESS_PMC;
+ if (option == 'P' || option == 'p') {
+ args.pa_flags |= FLAG_HAS_PROCESS_PMCS;
+ args.pa_required |= (FLAG_HAS_PROCESS |
+ FLAG_HAS_PID);
+ }
- if (option == 'P' || option == 'S')
- args.pa_flags |= FLAG_USING_SAMPLING;
+ if (option == 'P' || option == 'S') {
+ args.pa_flags |= FLAG_HAS_SAMPLING_PMCS;
+ args.pa_required |= FLAG_HAS_LOG_FILE;
+ }
if (option == 'p' || option == 's')
- args.pa_flags |= FLAG_USING_COUNTING;
+ args.pa_flags |= FLAG_HAS_COUNTING_PMCS;
+
+ if (option == 's' || option == 'S')
+ args.pa_flags |= FLAG_HAS_SYSTEM_PMCS;
ev->ev_spec = strdup(optarg);
@@ -442,7 +670,14 @@ main(int argc, char **argv)
else
ev->ev_cpu = PMC_CPU_ANY;
- ev->ev_descendants = do_descendants;
+ ev->ev_flags = 0;
+ if (do_descendants)
+ ev->ev_flags |= PMC_F_DESCENDANTS;
+ if (do_logprocexit)
+ ev->ev_flags |= PMC_F_LOG_PROCEXIT;
+ if (do_logproccsw)
+ ev->ev_flags |= PMC_F_LOG_PROCCSW;
+
ev->ev_cumulative = use_cumulative_counts;
ev->ev_saved = 0LL;
@@ -458,50 +693,68 @@ main(int argc, char **argv)
break;
+ case 'R': /* read an existing log file */
+ if ((logfd = open(optarg, O_RDONLY, 0)) < 0)
+ err(EX_OSERR, "ERROR: Cannot open \"%s\" for "
+ "reading", optarg);
+ if ((args.pa_logparser = pmclog_open(logfd))
+ == NULL)
+ err(EX_OSERR, "ERROR: Cannot create parser");
+ args.pa_flags |= FLAG_PROCESS_LOGFILE;
+ break;
+
case 'n': /* sampling count */
current_sampling_count = strtol(optarg, &end, 0);
if (*end != '\0' || current_sampling_count <= 0)
errx(EX_USAGE,
- "ERROR: Illegal count value \"%s\"",
+ "ERROR: Illegal count value \"%s\".",
optarg);
+ args.pa_required |= FLAG_HAS_SAMPLING_PMCS;
break;
case 'o': /* outputfile */
if (args.pa_outputfile != NULL)
(void) fclose(args.pa_outputfile);
-
if ((args.pa_outputfile = fopen(optarg, "w")) == NULL)
errx(EX_OSERR, "ERROR: cannot open \"%s\" for "
- "writing", optarg);
+ "writing.", optarg);
+ args.pa_required |= FLAG_HAS_COUNTING_PMCS;
+ break;
case 'O': /* sampling output */
if (args.pa_logfile != NULL)
- (void) fclose(args.pa_logfile);
-
+ errx(EX_OSERR, "ERROR: option -O may only be "
+ "specified once.");
if ((args.pa_logfile = fopen(optarg, "w")) == NULL)
errx(EX_OSERR, "ERROR: cannot open \"%s\" for "
- "writing", optarg);
+ "writing.", optarg);
+ args.pa_flags |= FLAG_HAS_LOG_FILE;
break;
case 't': /* target pid */
pid = strtol(optarg, &end, 0);
if (*end != '\0' || pid <= 0)
errx(EX_USAGE, "ERROR: Illegal pid value "
- "\"%s\"", optarg);
+ "\"%s\".", optarg);
args.pa_flags |= FLAG_HAS_PID;
+ args.pa_required |= FLAG_HAS_PROCESS_PMCS;
args.pa_pid = pid;
-
break;
case 'w': /* wait interval */
interval = strtod(optarg, &end);
if (*end != '\0' || interval <= 0)
errx(EX_USAGE, "ERROR: Illegal wait interval "
- "value \"%s\"", optarg);
+ "value \"%s\".", optarg);
args.pa_flags |= FLAG_HAS_WAIT_INTERVAL;
args.pa_interval = interval;
+ break;
+ case 'W': /* toggle LOG_CSW */
+ do_logproccsw = !do_logproccsw;
+ args.pa_required |= (FLAG_HAS_PROCESS_PMCS |
+ FLAG_HAS_COUNTING_PMCS | FLAG_HAS_LOG_FILE);
break;
case '?':
@@ -521,25 +774,84 @@ main(int argc, char **argv)
* Check invocation syntax.
*/
- if (STAILQ_EMPTY(&args.pa_head)) {
+ if (args.pa_flags & FLAG_PROCESS_LOGFILE) {
+ errmsg = NULL;
+ if (args.pa_flags & FLAG_HAS_PROCESS)
+ errmsg = "a command line specification";
+ else if (args.pa_flags & FLAG_HAS_PID)
+ errmsg = "option -t";
+ else if (!STAILQ_EMPTY(&args.pa_head))
+ errmsg = "a PMC event specification";
+ if (errmsg)
+ errx(EX_USAGE, "ERROR: option -R may not be used with "
+ "%s.", errmsg);
+ } else if (STAILQ_EMPTY(&args.pa_head)) {
warnx("ERROR: At least one PMC event must be specified");
pmcstat_show_usage();
}
- if (argc == 0) {
- if (args.pa_pid == -1) {
- if (args.pa_flags & FLAG_USING_PROCESS_PMC)
- errx(EX_USAGE, "ERROR: the -P or -p options "
- "require a target process");
- } else if ((args.pa_flags & FLAG_USING_PROCESS_PMC) == 0)
- errx(EX_USAGE,
- "ERROR: option -t requires a process-mode pmc "
- "specification");
- } else if (args.pa_pid != -1)
+ /* check for -t pid without a process PMC spec */
+ if ((args.pa_required & FLAG_HAS_PID) &&
+ (args.pa_flags & FLAG_HAS_PROCESS_PMCS) == 0)
+ errx(EX_USAGE, "ERROR: option -t requires a process mode PMC "
+ "to be specified.");
+
+ /* check for process-mode options without a command or -t pid */
+ if ((args.pa_required & FLAG_HAS_PROCESS_PMCS) &&
+ (args.pa_flags & (FLAG_HAS_PROCESS | FLAG_HAS_PID)) == 0)
+ errx(EX_USAGE, "ERROR: options -d,-E,-p,-P,-W require a "
+ "command line or target process.");
+
+ /* check for -p | -P without a target process of some sort */
+ if ((args.pa_required & (FLAG_HAS_PROCESS | FLAG_HAS_PID)) &&
+ (args.pa_flags & (FLAG_HAS_PROCESS | FLAG_HAS_PID)) == 0)
+ errx(EX_USAGE, "ERROR: the -P or -p options require a "
+ "target process or a command line.");
+
+ /* check for process-mode options without a process-mode PMC */
+ if ((args.pa_required & FLAG_HAS_PROCESS_PMCS) &&
+ (args.pa_flags & FLAG_HAS_PROCESS_PMCS) == 0)
+ errx(EX_USAGE, "ERROR: options -d,-E,-W require a "
+ "process mode PMC to be specified.");
+
+ /* check for -c cpu and not system mode PMCs */
+ if ((args.pa_required & FLAG_HAS_SYSTEM_PMCS) &&
+ (args.pa_flags & FLAG_HAS_SYSTEM_PMCS) == 0)
+ errx(EX_USAGE, "ERROR: option -c requires at least one "
+ "system mode PMC to be specified.");
+
+ /* check for counting mode options without a counting PMC */
+ if ((args.pa_required & FLAG_HAS_COUNTING_PMCS) &&
+ (args.pa_flags & FLAG_HAS_COUNTING_PMCS) == 0)
+ errx(EX_USAGE, "ERROR: options -C,-o,-W require at least one "
+ "counting mode PMC to be specified.");
+
+ /* check for sampling mode options without a sampling PMC spec */
+ if ((args.pa_required & FLAG_HAS_SAMPLING_PMCS) &&
+ (args.pa_flags & FLAG_HAS_SAMPLING_PMCS) == 0)
+ errx(EX_USAGE, "ERROR: options -n,-O require at least one "
+ "sampling mode PMC to be specified.");
+
+ if ((args.pa_flags & (FLAG_HAS_PID | FLAG_HAS_PROCESS)) ==
+ (FLAG_HAS_PID | FLAG_HAS_PROCESS))
errx(EX_USAGE,
"ERROR: option -t cannot be specified with a command "
- "name");
+ "line.");
+
+ /* check if -O was spuriously specified */
+ if ((args.pa_flags & FLAG_HAS_LOG_FILE) &&
+ (args.pa_required & FLAG_HAS_LOG_FILE) == 0)
+ errx(EX_USAGE,
+ "ERROR: option -O is used only with options "
+ "-E,-P,-S and -W.");
+ /* if we've been asked to process a log file, do that and exit */
+ if (args.pa_flags & FLAG_PROCESS_LOGFILE) {
+ pmcstat_process_log(&args);
+ exit(EX_OK);
+ }
+
+ /* otherwise, we've been asked to collect data */
if (pmc_init() < 0)
err(EX_UNAVAILABLE,
"ERROR: Initialization of the pmc(3) library failed");
@@ -556,15 +868,9 @@ main(int argc, char **argv)
* Allocate PMCs.
*/
- if (pmc_pmcinfo(0, &ppmci) < 0)
- err(EX_OSERR, "ERROR: cannot retrieve pmc information");
-
- assert(ppmci != NULL);
-
STAILQ_FOREACH(ev, &args.pa_head, ev_next)
if (pmc_allocate(ev->ev_spec, ev->ev_mode,
- (ev->ev_descendants ? PMC_F_DESCENDANTS : 0),
- ev->ev_cpu, &ev->ev_pmcid) < 0)
+ ev->ev_flags, ev->ev_cpu, &ev->ev_pmcid) < 0)
err(EX_OSERR, "ERROR: Cannot allocate %s-mode pmc with "
"specification \"%s\"",
PMC_IS_SYSTEM_MODE(ev->ev_mode) ? "system" : "process",
@@ -614,25 +920,48 @@ main(int argc, char **argv)
}
EV_SET(&kev, SIGINT, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL);
-
if (kevent(pmcstat_kq, &kev, 1, NULL, 0, NULL) < 0)
err(EX_OSERR, "ERROR: Cannot register kevent for SIGINT");
- if (args.pa_flags & FLAG_USING_SAMPLING) {
+ EV_SET(&kev, SIGIO, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL);
+ if (kevent(pmcstat_kq, &kev, 1, NULL, 0, NULL) < 0)
+ err(EX_OSERR, "ERROR: Cannot register kevent for SIGIO");
- /*
- * configure log file
- */
+ /*
+ * An exec() failure of a forked child is signalled by the
+ * child sending the parent a SIGCHLD. We don't register an
+ * actual signal handler for SIGCHLD, but instead use our
+ * kqueue to pick up the signal.
+ */
+ EV_SET(&kev, SIGCHLD, EVFILT_SIGNAL, EV_ADD, 0, 0, NULL);
+ if (kevent(pmcstat_kq, &kev, 1, NULL, 0, NULL) < 0)
+ err(EX_OSERR, "ERROR: Cannot register kevent for SIGCHLD");
- if (args.pa_logfile == NULL)
- if ((args.pa_logfile =
- fopen(DEFAULT_LOGFILE_NAME, "w")) == NULL)
- err(EX_CANTCREAT, "ERROR: Cannot open sampling "
- "log file \"%s\"", DEFAULT_LOGFILE_NAME);
+ /*
+ * Configure the specified log file or setup a default log
+ * consumer via a pipe.
+ */
+ if (args.pa_required & FLAG_HAS_LOG_FILE) {
+
+ if (args.pa_logfile == NULL) {
+ if (pipe(pipefd) < 0)
+ err(EX_OSERR, "ERROR: pipe(2) failed");
+
+ EV_SET(&kev, pipefd[READPIPEFD], EVFILT_READ, EV_ADD,
+ 0, 0, NULL);
+
+ if (kevent(pmcstat_kq, &kev, 1, NULL, 0, NULL) < 0)
+ err(EX_OSERR, "ERROR: Cannot register kevent");
+
+ logfd = pipefd[WRITEPIPEFD];
+
+ args.pa_flags |= (FLAG_HAS_PIPE | FLAG_HAS_LOG_FILE);
+ args.pa_logparser = pmclog_open(pipefd[READPIPEFD]);
+ } else
+ logfd = fileno(args.pa_logfile);
- if (pmc_configure_logfile(fileno(args.pa_logfile)) < 0)
- err(EX_OSERR, "ERROR: Cannot configure sampling "
- "log");
+ if (pmc_configure_logfile(logfd) < 0)
+ err(EX_OSERR, "ERROR: Cannot configure log file");
STAILQ_FOREACH(ev, &args.pa_head, ev_next)
if (PMC_IS_SAMPLING_MODE(ev->ev_mode) &&
@@ -642,7 +971,7 @@ main(int argc, char **argv)
}
/* setup a timer for any counting mode PMCs */
- if (args.pa_flags & FLAG_USING_COUNTING) {
+ if (args.pa_flags & FLAG_HAS_COUNTING_PMCS) {
EV_SET(&kev, 0, EVFILT_TIMER, EV_ADD, 0,
args.pa_interval * 1000, NULL);
@@ -674,8 +1003,8 @@ main(int argc, char **argv)
* loop till either the target process (if any) exits, or we
* are killed by a SIGINT.
*/
-
- running = 1;
+ runstate = PMCSTAT_RUNNING;
+ do_print = 0;
do {
if ((c = kevent(pmcstat_kq, NULL, 0, &kev, 1, NULL)) <= 0) {
if (errno != EINTR)
@@ -688,26 +1017,43 @@ main(int argc, char **argv)
errc(EX_OSERR, kev.data, "ERROR: kevent failed");
switch (kev.filter) {
- case EVFILT_PROC: /* target process exited */
- running = 0;
- /* FALLTHROUGH */
-
- case EVFILT_TIMER: /* print out counting PMCs */
- pmcstat_print_pmcs(&args);
+ case EVFILT_PROC: /* target has exited */
+ if (args.pa_flags & FLAG_HAS_LOG_FILE)
+ runstate = pmcstat_close_log(&args);
+ break;
- if (running == 0) /* final newline */
- (void) fprintf(args.pa_outputfile, "\n");
+ case EVFILT_READ: /* log file data is present */
+ runstate = pmcstat_print_log(&args);
break;
case EVFILT_SIGNAL:
- if (kev.ident == SIGINT) {
+ if (kev.ident == SIGCHLD) {
+ /*
+ * The child process sends us a
+ * SIGCHLD if its exec() failed. We
+ * wait for it to exit and then exit
+ * ourselves.
+ */
+ (void) wait(&c);
+ runstate = PMCSTAT_FINISHED;
+ } else if (kev.ident == SIGIO) {
+ /*
+ * We get a SIGIO if a PMC loses all
+ * of its targets, or if logfile
+ * writes encounter an error.
+ */
+ if (args.pa_flags & FLAG_HAS_LOG_FILE)
+ runstate = pmcstat_close_log(&args);
+ do_print = 1; /* print PMCs at exit */
+ runstate = PMCSTAT_FINISHED;
+ } else if (kev.ident == SIGINT) {
/* pass the signal on to the child process */
if ((args.pa_flags & FLAG_HAS_PROCESS) &&
(args.pa_flags & FLAG_HAS_PID) == 0)
if (kill(args.pa_pid, SIGINT) != 0)
- err(EX_OSERR, "cannot kill "
- "child");
- running = 0;
+ err(EX_OSERR, "ERROR: cannot "
+ "signal child process");
+ runstate = PMCSTAT_FINISHED;
} else if (kev.ident == SIGWINCH) {
if (ioctl(fileno(args.pa_outputfile),
TIOCGWINSZ, &ws) < 0)
@@ -718,9 +1064,25 @@ main(int argc, char **argv)
assert(0);
break;
+
+ case EVFILT_TIMER: /* print out counting PMCs */
+ do_print = 1;
+ break;
+
}
- } while (running);
+ if (do_print) {
+ pmcstat_print_pmcs(&args);
+ if (runstate == PMCSTAT_FINISHED) /* final newline */
+ (void) fprintf(args.pa_outputfile, "\n");
+ do_print = 0;
+ }
+
+ } while (runstate != PMCSTAT_FINISHED);
+
+ /* flush any pending log entries */
+ if (args.pa_flags & FLAG_HAS_LOG_FILE)
+ pmc_flush_logfile();
pmcstat_cleanup(&args);
OpenPOWER on IntegriCloud